Initial import of InnoDB-plugin 1.0.1 source tree

2025-11-27 05:41:41 +03:00 · 2008-11-30 22:10:29 -08:00
commit d6c7789c34
336 changed files with 203154 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,64 @@
 # Copyright (C) 2006 MySQL AB
 # 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; version 2 of the License.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB)
 INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
                    ${CMAKE_SOURCE_DIR}/storage/innobase/include
                    ${CMAKE_SOURCE_DIR}/storage/innobase/handler
                    ${CMAKE_SOURCE_DIR}/sql
                    ${CMAKE_SOURCE_DIR}/regex
                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
 SET(INNOBASE_SOURCES  btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c 
 					 buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c 
 					 data/data0data.c data/data0type.c 
 					 dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c 
 					 dyn/dyn0dyn.c 
 					 eval/eval0eval.c eval/eval0proc.c 
 					 fil/fil0fil.c 
 					 fsp/fsp0fsp.c
 					 fut/fut0fut.c fut/fut0lst.c 
 					 ha/ha0ha.c ha/hash0hash.c ha/ha0storage.c 
 					 ibuf/ibuf0ibuf.c 
 					 pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c 
 					 lock/lock0lock.c lock/lock0iter.c 
 					 log/log0log.c log/log0recv.c 
 					 mach/mach0data.c 
 					 mem/mem0mem.c mem/mem0pool.c 
 					 mtr/mtr0log.c mtr/mtr0mtr.c 
 					 os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c 
 					 page/page0cur.c page/page0page.c page/page0zip.c 
 					 que/que0que.c 
 					 handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc handler/mysql_addons.cc 
 					 read/read0read.c 
 					 rem/rem0cmp.c rem/rem0rec.c
 					 row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c 
 					 row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c 
 					 row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c 
 					 srv/srv0que.c srv/srv0srv.c srv/srv0start.c 
 					 sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c 
 					 thr/thr0loc.c 
 					 trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c 
 					 trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c 
 					 usr/usr0sess.c 
 					 ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c)
 IF(NOT SOURCE_SUBLIBS)
  ADD_LIBRARY(innobase ${INNOBASE_SOURCES})
  ADD_DEPENDENCIES(innobase GenError)
 ENDIF(NOT SOURCE_SUBLIBS)
--- a/352
+++ b/352
@@ -0,0 +1,352 @@
                      GNU GENERAL PUBLIC LICENSE
                         Version 2, June 1991
     Copyright (C) 1989, 1991 Free Software Foundation, Inc.
     59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
     Everyone is permitted to copy and distribute verbatim copies
     of this license document, but changing it is not allowed.
 Preamble
 ========
 The licenses for most software are designed to take away your freedom
 to share and change it.  By contrast, the GNU General Public License is
 intended to guarantee your freedom to share and change free
 software--to make sure the software is free for all its users.  This
 General Public License applies to most of the Free Software
 Foundation's software and to any other program whose authors commit to
 using it.  (Some other Free Software Foundation software is covered by
 the GNU Library General Public License instead.)  You can apply it to
 your programs, too.
 When we speak of free software, we are referring to freedom, not price.
 Our General Public Licenses are designed to make sure that you have
 the freedom to distribute copies of free software (and charge for this
 service if you wish), that you receive source code or can get it if you
 want it, that you can change the software or use pieces of it in new
 free programs; and that you know you can do these things.
 To protect your rights, we need to make restrictions that forbid anyone
 to deny you these rights or to ask you to surrender the rights.  These
 restrictions translate to certain responsibilities for you if you
 distribute copies of the software, or if you modify it.
 For example, if you distribute copies of such a program, whether gratis
 or for a fee, you must give the recipients all the rights that you
 have.  You must make sure that they, too, receive or can get the source
 code.  And you must show them these terms so they know their rights.
 We protect your rights with two steps: (1) copyright the software, and
 (2) offer you this license which gives you legal permission to copy,
 distribute and/or modify the software.
 Also, for each author's protection and ours, we want to make certain
 that everyone understands that there is no warranty for this free
 software.  If the software is modified by someone else and passed on, we
 want its recipients to know that what they have is not the original, so
 that any problems introduced by others will not reflect on the original
 authors' reputations.
 Finally, any free program is threatened constantly by software patents.
 We wish to avoid the danger that redistributors of a free program will
 individually obtain patent licenses, in effect making the program
 proprietary.  To prevent this, we have made it clear that any patent
 must be licensed for everyone's free use or not licensed at all.
 The precise terms and conditions for copying, distribution and
 modification follow.
                      GNU GENERAL PUBLIC LICENSE
    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
  0. This License applies to any program or other work which contains a
     notice placed by the copyright holder saying it may be distributed
     under the terms of this General Public License.  The "Program",
     below, refers to any such program or work, and a "work based on
     the Program" means either the Program or any derivative work under
     copyright law: that is to say, a work containing the Program or a
     portion of it, either verbatim or with modifications and/or
     translated into another language.  (Hereinafter, translation is
     included without limitation in the term "modification".)  Each
     licensee is addressed as "you".
     Activities other than copying, distribution and modification are
     not covered by this License; they are outside its scope.  The act
     of running the Program is not restricted, and the output from the
     Program is covered only if its contents constitute a work based on
     the Program (independent of having been made by running the
     Program).  Whether that is true depends on what the Program does.
  1. You may copy and distribute verbatim copies of the Program's
     source code as you receive it, in any medium, provided that you
     conspicuously and appropriately publish on each copy an appropriate
     copyright notice and disclaimer of warranty; keep intact all the
     notices that refer to this License and to the absence of any
     warranty; and give any other recipients of the Program a copy of
     this License along with the Program.
     You may charge a fee for the physical act of transferring a copy,
     and you may at your option offer warranty protection in exchange
     for a fee.
  2. You may modify your copy or copies of the Program or any portion
     of it, thus forming a work based on the Program, and copy and
     distribute such modifications or work under the terms of Section 1
     above, provided that you also meet all of these conditions:
       a. You must cause the modified files to carry prominent notices
          stating that you changed the files and the date of any change.
       b. You must cause any work that you distribute or publish, that
          in whole or in part contains or is derived from the Program
          or any part thereof, to be licensed as a whole at no charge
          to all third parties under the terms of this License.
       c. If the modified program normally reads commands interactively
          when run, you must cause it, when started running for such
          interactive use in the most ordinary way, to print or display
          an announcement including an appropriate copyright notice and
          a notice that there is no warranty (or else, saying that you
          provide a warranty) and that users may redistribute the
          program under these conditions, and telling the user how to
          view a copy of this License.  (Exception: if the Program
          itself is interactive but does not normally print such an
          announcement, your work based on the Program is not required
          to print an announcement.)
     These requirements apply to the modified work as a whole.  If
     identifiable sections of that work are not derived from the
     Program, and can be reasonably considered independent and separate
     works in themselves, then this License, and its terms, do not
     apply to those sections when you distribute them as separate
     works.  But when you distribute the same sections as part of a
     whole which is a work based on the Program, the distribution of
     the whole must be on the terms of this License, whose permissions
     for other licensees extend to the entire whole, and thus to each
     and every part regardless of who wrote it.
     Thus, it is not the intent of this section to claim rights or
     contest your rights to work written entirely by you; rather, the
     intent is to exercise the right to control the distribution of
     derivative or collective works based on the Program.
     In addition, mere aggregation of another work not based on the
     Program with the Program (or with a work based on the Program) on
     a volume of a storage or distribution medium does not bring the
     other work under the scope of this License.
  3. You may copy and distribute the Program (or a work based on it,
     under Section 2) in object code or executable form under the terms
     of Sections 1 and 2 above provided that you also do one of the
     following:
       a. Accompany it with the complete corresponding machine-readable
          source code, which must be distributed under the terms of
          Sections 1 and 2 above on a medium customarily used for
          software interchange; or,
       b. Accompany it with a written offer, valid for at least three
          years, to give any third-party, for a charge no more than your
          cost of physically performing source distribution, a complete
          machine-readable copy of the corresponding source code, to be
          distributed under the terms of Sections 1 and 2 above on a
          medium customarily used for software interchange; or,
       c. Accompany it with the information you received as to the offer
          to distribute corresponding source code.  (This alternative is
          allowed only for noncommercial distribution and only if you
          received the program in object code or executable form with
          such an offer, in accord with Subsection b above.)
     The source code for a work means the preferred form of the work for
     making modifications to it.  For an executable work, complete
     source code means all the source code for all modules it contains,
     plus any associated interface definition files, plus the scripts
     used to control compilation and installation of the executable.
     However, as a special exception, the source code distributed need
     not include anything that is normally distributed (in either
     source or binary form) with the major components (compiler,
     kernel, and so on) of the operating system on which the executable
     runs, unless that component itself accompanies the executable.
     If distribution of executable or object code is made by offering
     access to copy from a designated place, then offering equivalent
     access to copy the source code from the same place counts as
     distribution of the source code, even though third parties are not
     compelled to copy the source along with the object code.
  4. You may not copy, modify, sublicense, or distribute the Program
     except as expressly provided under this License.  Any attempt
     otherwise to copy, modify, sublicense or distribute the Program is
     void, and will automatically terminate your rights under this
     License.  However, parties who have received copies, or rights,
     from you under this License will not have their licenses
     terminated so long as such parties remain in full compliance.
  5. You are not required to accept this License, since you have not
     signed it.  However, nothing else grants you permission to modify
     or distribute the Program or its derivative works.  These actions
     are prohibited by law if you do not accept this License.
     Therefore, by modifying or distributing the Program (or any work
     based on the Program), you indicate your acceptance of this
     License to do so, and all its terms and conditions for copying,
     distributing or modifying the Program or works based on it.
  6. Each time you redistribute the Program (or any work based on the
     Program), the recipient automatically receives a license from the
     original licensor to copy, distribute or modify the Program
     subject to these terms and conditions.  You may not impose any
     further restrictions on the recipients' exercise of the rights
     granted herein.  You are not responsible for enforcing compliance
     by third parties to this License.
  7. If, as a consequence of a court judgment or allegation of patent
     infringement or for any other reason (not limited to patent
     issues), conditions are imposed on you (whether by court order,
     agreement or otherwise) that contradict the conditions of this
     License, they do not excuse you from the conditions of this
     License.  If you cannot distribute so as to satisfy simultaneously
     your obligations under this License and any other pertinent
     obligations, then as a consequence you may not distribute the
     Program at all.  For example, if a patent license would not permit
     royalty-free redistribution of the Program by all those who
     receive copies directly or indirectly through you, then the only
     way you could satisfy both it and this License would be to refrain
     entirely from distribution of the Program.
     If any portion of this section is held invalid or unenforceable
     under any particular circumstance, the balance of the section is
     intended to apply and the section as a whole is intended to apply
     in other circumstances.
     It is not the purpose of this section to induce you to infringe any
     patents or other property right claims or to contest validity of
     any such claims; this section has the sole purpose of protecting
     the integrity of the free software distribution system, which is
     implemented by public license practices.  Many people have made
     generous contributions to the wide range of software distributed
     through that system in reliance on consistent application of that
     system; it is up to the author/donor to decide if he or she is
     willing to distribute software through any other system and a
     licensee cannot impose that choice.
     This section is intended to make thoroughly clear what is believed
     to be a consequence of the rest of this License.
  8. If the distribution and/or use of the Program is restricted in
     certain countries either by patents or by copyrighted interfaces,
     the original copyright holder who places the Program under this
     License may add an explicit geographical distribution limitation
     excluding those countries, so that distribution is permitted only
     in or among countries not thus excluded.  In such case, this
     License incorporates the limitation as if written in the body of
     this License.
  9. The Free Software Foundation may publish revised and/or new
     versions of the General Public License from time to time.  Such
     new versions will be similar in spirit to the present version, but
     may differ in detail to address new problems or concerns.
     Each version is given a distinguishing version number.  If the
     Program specifies a version number of this License which applies
     to it and "any later version", you have the option of following
     the terms and conditions either of that version or of any later
     version published by the Free Software Foundation.  If the Program
     does not specify a version number of this License, you may choose
     any version ever published by the Free Software Foundation.
 10. If you wish to incorporate parts of the Program into other free
     programs whose distribution conditions are different, write to the
     author to ask for permission.  For software which is copyrighted
     by the Free Software Foundation, write to the Free Software
     Foundation; we sometimes make exceptions for this.  Our decision
     will be guided by the two goals of preserving the free status of
     all derivatives of our free software and of promoting the sharing
     and reuse of software generally.
                                NO WARRANTY
 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
     WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
     LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
     HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
     WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
     NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE
     QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
     PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
     SERVICING, REPAIR OR CORRECTION.
 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
     WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
     MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
     LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
     INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
     INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
     DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
     OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
     OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
     ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
                      END OF TERMS AND CONDITIONS
 How to Apply These Terms to Your New Programs
 =============================================
 If you develop a new program, and you want it to be of the greatest
 possible use to the public, the best way to achieve this is to make it
 free software which everyone can redistribute and change under these
 terms.
 To do so, attach the following notices to the program.  It is safest to
 attach them to the start of each source file to most effectively convey
 the exclusion of warranty; and each file should have at least the
 "copyright" line and a pointer to where the full notice is found.
     ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
     Copyright (C) YYYY  NAME OF AUTHOR
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
     (at your option) any later version.
     This program is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 Also add information on how to contact you by electronic and paper mail.
 If the program is interactive, make it output a short notice like this
 when it starts in an interactive mode:
     Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
     This is free software, and you are welcome to redistribute it
     under certain conditions; type `show c' for details.
 The hypothetical commands `show w' and `show c' should show the
 appropriate parts of the General Public License.  Of course, the
 commands you use may be called something other than `show w' and `show
 c'; they could even be mouse-clicks or menu items--whatever suits your
 program.
 You should also get your employer (if you work as a programmer) or your
 school, if any, to sign a "copyright disclaimer" for the program, if
 necessary.  Here is a sample; alter the names:
     Yoyodyne, Inc., hereby disclaims all copyright interest in the program
     `Gnomovision' (which makes passes at compilers) written by James Hacker.
     SIGNATURE OF TY COON, 1 April 1989
     Ty Coon, President of Vice
 This General Public License does not permit incorporating your program
 into proprietary programs.  If your program is a subroutine library,
 you may consider it more useful to permit linking proprietary
 applications with the library.  If this is what you want to do, use the
 GNU Library General Public License instead of this License.
--- a/70
+++ b/70
@@ -0,0 +1,70 @@
 2008-05-06	The InnoDB Team
 	* handler/ha_innodb.cc, include/srv0srv.h, include/sync0sync.h,
 	include/trx0sys.h, mysql-test/innodb-zip.result,
 	mysql-test/innodb-zip.test, srv/srv0srv.c, srv/srv0start.c,
 	sync/sync0sync.c, trx/trx0sys.c:
 	Implement the system tablespace tagging
 	* handler/ha_innodb.cc, handler/i_s.cc, include/univ.i,
 	srv/srv0start.c:
 	Add InnoDB version in INFORMATION_SCHEMA.PLUGINS.PLUGIN_VERSION,
 	in the startup message and in a server variable innodb_version.
 	* sync/sync0sync.c:
 	Fix a bug in the sync debug code where a lock with level
 	SYNC_LEVEL_VARYING would cause an assertion failure when a thread
 	tried to release it.
 2008-04-30	The InnoDB Team
 	* Makefile.am:
 	Fix Bug#36434 ha_innodb.so is installed in the wrong directory
 	* handler/ha_innodb.cc:
 	Merge change from MySQL (Fix Bug#35406 5.1-opt crashes on select from
 	I_S.REFERENTIAL_CONSTRAINTS):
 	ChangeSet@1.2563, 2008-03-18 19:42:04+04:00, gluh@mysql.com +1 -0
 	* scripts/install_innodb_plugins.sql:
 	Added
 	* mysql-test/innodb.result:
 	Merge change from MySQL (this fixes the failing innodb test):
 	ChangeSet@1.1810.3601.4, 2008-02-07 02:33:21+04:00
 	* row/row0sel.c:
 	Fix Bug#35226 RBR event crashes slave
 	* handler/ha_innodb.cc:
 	Change the fix for Bug#32440 to show bytes instead of kilobytes in
 	INFORMATION_SCHEMA.TABLES.DATA_FREE
 	* handler/ha_innodb.cc, mysql-test/innodb.result,
 	mysql-test/innodb.test:
 	Fix Bug#29507 TRUNCATE shows to many rows effected
 	* handler/ha_innodb.cc, mysql-test/innodb.result,
 	mysql-test/innodb.test:
 	Fix Bug#35537 Innodb doesn't increment handler_update and
 	handler_delete
 2008-04-29	The InnoDB Team
 	* handler/i_s.cc, include/srv0start.h, srv/srv0start.c: 
 	Fix Bug#36310 InnoDB plugin crash
 2008-04-23	The InnoDB Team
 	* mysql-test/innodb_bug36169.result, mysql-test/innodb_bug36169.test,
 	row/row0mysql.c:
 	Fix Bug#36169 create innodb compressed table with too large row size
 	crashed
 	* (outside the source tree):
 	Fix Bug#36222 New InnoDB plugin 1.0 has wrong MKDIR_P defined in
 	Makefile.in
 2008-04-15	The InnoDB Team
 	InnoDB Plugin 1.0.0 released
--- a/Makefile.am
+++ b/Makefile.am
@@ -0,0 +1,195 @@
 # Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; version 2 of the License.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 # Process this file with automake to create Makefile.in
 MYSQLDATAdir=		$(localstatedir)
 MYSQLSHAREdir=		$(pkgdatadir)
 MYSQLBASEdir=		$(prefix)
 MYSQLLIBdir=		$(pkglibdir)
 pkgplugindir=		$(pkglibdir)/plugin
 INCLUDES=		-I$(top_srcdir)/include -I$(top_builddir)/include \
 			-I$(top_srcdir)/regex \
 			-I$(top_srcdir)/storage/innobase/include \
 			-I$(top_srcdir)/sql \
 			-I$(srcdir)
 DEFS=			@DEFS@
 noinst_HEADERS=		include/btr0btr.h include/btr0btr.ic		\
 			include/btr0cur.h include/btr0cur.ic		\
 			include/btr0pcur.h include/btr0pcur.ic		\
 			include/btr0sea.h include/btr0sea.ic		\
 			include/btr0types.h include/buf0buddy.h		\
 			include/buf0buddy.ic include/buf0buf.h		\
 			include/buf0buf.ic include/buf0flu.h		\
 			include/buf0flu.ic include/buf0lru.h		\
 			include/buf0lru.ic include/buf0rea.h		\
 			include/buf0types.h include/data0data.h		\
 			include/data0data.ic include/data0type.h	\
 			include/data0type.ic include/data0types.h	\
 			include/db0err.h include/dict0boot.h		\
 			include/dict0boot.ic include/dict0crea.h	\
 			include/dict0crea.ic include/dict0dict.h	\
 			include/dict0dict.ic include/dict0load.h	\
 			include/dict0load.ic include/dict0mem.h		\
 			include/dict0mem.ic include/dict0types.h	\
 			include/dyn0dyn.h include/dyn0dyn.ic		\
 			include/eval0eval.h include/eval0eval.ic	\
 			include/eval0proc.h include/eval0proc.ic	\
 			include/fil0fil.h include/fsp0fsp.h		\
 			include/fsp0fsp.ic include/fut0fut.h		\
 			include/fut0fut.ic include/fut0lst.h		\
 			include/fut0lst.ic include/ha0ha.h		\
 			include/ha0ha.ic				\
 			include/ha0storage.h				\
 			include/ha0storage.ic				\
 			include/hash0hash.h				\
 			include/hash0hash.ic include/ibuf0ibuf.h	\
 			include/ibuf0ibuf.ic include/ibuf0types.h	\
 			include/lock0iter.h				\
 			include/lock0lock.h include/lock0lock.ic	\
 			include/lock0priv.h include/lock0priv.ic	\
 			include/lock0types.h include/log0log.h		\
 			include/log0log.ic include/log0recv.h		\
 			include/log0recv.ic include/mach0data.h		\
 			include/mach0data.ic include/mem0dbg.h		\
 			include/mem0dbg.ic mem/mem0dbg.c		\
 			include/mem0mem.h include/mem0mem.ic		\
 			include/mem0pool.h include/mem0pool.ic		\
 			include/mtr0log.h include/mtr0log.ic		\
 			include/mtr0mtr.h include/mtr0mtr.ic		\
 			include/mtr0types.h				\
 			include/mysql_addons.h				\
 			include/os0file.h				\
 			include/os0proc.h include/os0proc.ic		\
 			include/os0sync.h include/os0sync.ic		\
 			include/os0thread.h include/os0thread.ic	\
 			include/page0cur.h include/page0cur.ic		\
 			include/page0page.h include/page0page.ic	\
 			include/page0zip.h include/page0zip.ic		\
 			include/page0types.h include/pars0grm.h		\
 			include/pars0opt.h include/pars0opt.ic		\
 			include/pars0pars.h include/pars0pars.ic	\
 			include/pars0sym.h include/pars0sym.ic		\
 			include/pars0types.h include/que0que.h		\
 			include/que0que.ic include/que0types.h		\
 			include/read0read.h include/read0read.ic	\
 			include/read0types.h include/rem0cmp.h		\
 			include/rem0cmp.ic include/rem0rec.h		\
 			include/rem0rec.ic include/rem0types.h		\
 			include/row0ext.h include/row0ext.ic		\
 			include/row0ins.h include/row0ins.ic		\
 			include/row0merge.h				\
 			include/row0mysql.h include/row0mysql.ic	\
 			include/row0purge.h include/row0purge.ic	\
 			include/row0row.h include/row0row.ic		\
 			include/row0sel.h include/row0sel.ic		\
 			include/row0types.h include/row0uins.h		\
 			include/row0uins.ic include/row0umod.h		\
 			include/row0umod.ic include/row0undo.h		\
 			include/row0undo.ic include/row0upd.h		\
 			include/row0upd.ic include/row0vers.h		\
 			include/row0vers.ic include/srv0que.h		\
 			include/srv0srv.h include/srv0srv.ic		\
 			include/srv0start.h include/sync0arr.h		\
 			include/sync0arr.ic include/sync0rw.h		\
 			include/sync0rw.ic include/sync0sync.h		\
 			include/sync0sync.ic include/sync0types.h	\
 			include/thr0loc.h include/thr0loc.ic		\
 			include/trx0i_s.h				\
 			include/trx0purge.h include/trx0purge.ic	\
 			include/trx0rec.h include/trx0rec.ic		\
 			include/trx0roll.h include/trx0roll.ic		\
 			include/trx0rseg.h include/trx0rseg.ic		\
 			include/trx0sys.h include/trx0sys.ic		\
 			include/trx0trx.h include/trx0trx.ic		\
 			include/trx0types.h include/trx0undo.h		\
 			include/trx0undo.ic include/trx0xa.h		\
 			include/univ.i include/usr0sess.h		\
 			include/usr0sess.ic include/usr0types.h		\
 			include/ut0byte.h include/ut0byte.ic		\
 			include/ut0dbg.h include/ut0lst.h		\
 			include/ut0mem.h include/ut0mem.ic		\
 			include/ut0rnd.h include/ut0rnd.ic		\
 			include/ut0sort.h include/ut0ut.h		\
 			include/ut0ut.ic include/ut0vec.h		\
 			include/ut0vec.ic include/ut0list.h		\
 			include/ut0list.ic include/ut0wqueue.h		\
 			include/ha_prototypes.h handler/ha_innodb.h	\
 			include/handler0alter.h				\
 			handler/i_s.h
 EXTRA_LIBRARIES=	libinnobase.a
 noinst_LIBRARIES=	@plugin_innobase_static_target@
 libinnobase_a_SOURCES=	btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c	\
 			btr/btr0sea.c buf/buf0buddy.c			\
 			buf/buf0buf.c buf/buf0flu.c			\
 			buf/buf0lru.c buf/buf0rea.c data/data0data.c	\
 			data/data0type.c dict/dict0boot.c		\
 			dict/dict0crea.c dict/dict0dict.c		\
 			dict/dict0load.c dict/dict0mem.c dyn/dyn0dyn.c	\
 			eval/eval0eval.c eval/eval0proc.c		\
 			fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c	\
 			fut/fut0lst.c ha/ha0ha.c			\
 			ha/ha0storage.c					\
 			ha/hash0hash.c					\
 			ibuf/ibuf0ibuf.c lock/lock0iter.c		\
 			lock/lock0lock.c				\
 			log/log0log.c log/log0recv.c mach/mach0data.c	\
 			mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c	\
 			mtr/mtr0mtr.c os/os0file.c os/os0proc.c		\
 			os/os0sync.c os/os0thread.c page/page0cur.c	\
 			page/page0page.c page/page0zip.c		\
 			pars/lexyy.c pars/pars0grm.c			\
 			pars/pars0opt.c pars/pars0pars.c		\
 			pars/pars0sym.c que/que0que.c read/read0read.c	\
 			rem/rem0cmp.c rem/rem0rec.c row/row0ext.c	\
 			row/row0ins.c row/row0merge.c			\
 			row/row0mysql.c row/row0purge.c row/row0row.c	\
 			row/row0sel.c row/row0uins.c row/row0umod.c	\
 			row/row0undo.c row/row0upd.c row/row0vers.c	\
 			srv/srv0que.c srv/srv0srv.c srv/srv0start.c	\
 			sync/sync0arr.c sync/sync0rw.c			\
 			sync/sync0sync.c thr/thr0loc.c			\
 			trx/trx0i_s.c					\
 			trx/trx0purge.c					\
 			trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c	\
 			trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c	\
 			usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c		\
 			ut/ut0list.c ut/ut0mem.c ut/ut0rnd.c		\
 			ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c		\
 			handler/ha_innodb.cc handler/handler0alter.cc	\
 			handler/i_s.cc					\
 			handler/mysql_addons.cc
 libinnobase_a_CXXFLAGS=	$(AM_CFLAGS)
 libinnobase_a_CFLAGS=	$(AM_CFLAGS)
 EXTRA_LTLIBRARIES=	ha_innodb.la
 pkgplugin_LTLIBRARIES=	@plugin_innobase_shared_target@
 ha_innodb_la_LDFLAGS=	-module -rpath $(pkgplugindir)
 ha_innodb_la_CXXFLAGS=	$(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
 ha_innodb_la_CFLAGS=	$(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
 ha_innodb_la_SOURCES=	$(libinnobase_a_SOURCES)
 EXTRA_DIST=		CMakeLists.txt plug.in \
 			pars/make_bison.sh pars/make_flex.sh \
 			pars/pars0grm.y pars/pars0lex.l
 # Don't update the files from bitkeeper
 %::SCCS/s.%
--- a/Makefile.in
+++ b/Makefile.in
--- a/26
+++ b/26
@@ -0,0 +1,26 @@
 This is the source of the InnoDB Plugin 1.0.1 for MySQL 5.1
 ===========================================================
 Instructions for compiling the plugin:
 --------------------------------------
 1. Get the latest MySQL 5.1 sources from
   http://dev.mysql.com/downloads/mysql/5.1.html#source
 2. Replace the contents of the mysql-5.1.N/storage/innobase/ directory
   with the contents of this directory.
 3. Optional (only necessary if you are going to run tests from the
   mysql-test suite): cd into the innobase directory and run ./setup.sh
 4. Compile MySQL as usual.
 5. Enjoy!
 See the online documentation for more detailed instructions:
 http://www.innodb.com/doc/innodb_plugin-1.0/innodb-plugin-installation.html
 For more information about InnoDB visit
 http://www.innodb.com
 Thank you for using the InnoDB plugin!
--- a/btr/btr0btr.c
+++ b/btr/btr0btr.c
--- a/btr/btr0cur.c
+++ b/btr/btr0cur.c
--- a/btr/btr0pcur.c
+++ b/btr/btr0pcur.c
@@ -0,0 +1,567 @@
 /******************************************************
 The index tree persistent cursor
 (c) 1996 Innobase Oy
 Created 2/23/1996 Heikki Tuuri
 *******************************************************/
 #include "btr0pcur.h"
 #ifdef UNIV_NONINL
 #include "btr0pcur.ic"
 #endif
 #include "ut0byte.h"
 #include "rem0cmp.h"
 #include "trx0trx.h"
 /******************************************************************
 Allocates memory for a persistent cursor object and initializes the cursor. */
 UNIV_INTERN
 btr_pcur_t*
 btr_pcur_create_for_mysql(void)
 /*============================*/
 				/* out, own: persistent cursor */
 {
 	btr_pcur_t*	pcur;
 	pcur = mem_alloc(sizeof(btr_pcur_t));
 	pcur->btr_cur.index = NULL;
 	btr_pcur_init(pcur);
 	return(pcur);
 }
 /******************************************************************
 Frees the memory for a persistent cursor object. */
 UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
 	btr_pcur_t*	cursor)	/* in, own: persistent cursor */
 {
 	if (cursor->old_rec_buf != NULL) {
 		mem_free(cursor->old_rec_buf);
 		cursor->old_rec_buf = NULL;
 	}
 	cursor->btr_cur.page_cur.rec = NULL;
 	cursor->old_rec = NULL;
 	cursor->old_n_fields = 0;
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	cursor->latch_mode = BTR_NO_LATCHES;
 	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
 	mem_free(cursor);
 }
 /******************************************************************
 The position of the cursor is stored by taking an initial segment of the
 record the cursor is positioned on, before, or after, and copying it to the
 cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
 UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
 	btr_pcur_t*	cursor, /* in: persistent cursor */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	page_cur_t*	page_cursor;
 	buf_block_t*	block;
 	rec_t*		rec;
 	dict_index_t*	index;
 	page_t*		page;
 	ulint		offs;
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	block = btr_pcur_get_block(cursor);
 	index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
 	page_cursor = btr_pcur_get_page_cur(cursor);
 	rec = page_cur_get_rec(page_cursor);
 	page = page_align(rec);
 	offs = page_offset(rec);
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
 	      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_a(cursor->latch_mode != BTR_NO_LATCHES);
 	if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
 		/* It must be an empty index tree; NOTE that in this case
 		we do not store the modify_clock, but always do a search
 		if we restore the cursor position */
 		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
 		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
 		cursor->old_stored = BTR_PCUR_OLD_STORED;
 		if (page_rec_is_supremum_low(offs)) {
 			cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
 		} else {
 			cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
 		}
 		return;
 	}
 	if (page_rec_is_supremum_low(offs)) {
 		rec = page_rec_get_prev(rec);
 		cursor->rel_pos = BTR_PCUR_AFTER;
 	} else if (page_rec_is_infimum_low(offs)) {
 		rec = page_rec_get_next(rec);
 		cursor->rel_pos = BTR_PCUR_BEFORE;
 	} else {
 		cursor->rel_pos = BTR_PCUR_ON;
 	}
 	cursor->old_stored = BTR_PCUR_OLD_STORED;
 	cursor->old_rec = dict_index_copy_rec_order_prefix(
 		index, rec, &cursor->old_n_fields,
 		&cursor->old_rec_buf, &cursor->buf_size);
 	cursor->block_when_stored = block;
 	cursor->modify_clock = buf_block_get_modify_clock(block);
 }
 /******************************************************************
 Copies the stored position of a pcur to another pcur. */
 UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
 	btr_pcur_t*	pcur_receive,	/* in: pcur which will receive the
 					position info */
 	btr_pcur_t*	pcur_donate)	/* in: pcur from which the info is
 					copied */
 {
 	if (pcur_receive->old_rec_buf) {
 		mem_free(pcur_receive->old_rec_buf);
 	}
 	ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
 	if (pcur_donate->old_rec_buf) {
 		pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
 		ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
 			  pcur_donate->buf_size);
 		pcur_receive->old_rec = pcur_receive->old_rec_buf
 			+ (pcur_donate->old_rec - pcur_donate->old_rec_buf);
 	}
 	pcur_receive->old_n_fields = pcur_donate->old_n_fields;
 }
 /******************************************************************
 Restores the stored position of a persistent cursor bufferfixing the page and
 obtaining the specified latches. If the cursor position was saved when the
 (1) cursor was positioned on a user record: this function restores the position
 to the last record LESS OR EQUAL to the stored record;
 (2) cursor was positioned on a page infimum record: restores the position to
 the last record LESS than the user record which was the successor of the page
 infimum;
 (3) cursor was positioned on the page supremum: restores to the first record
 GREATER than the user record which was the predecessor of the supremum.
 (4) cursor was positioned before the first or after the last in an empty tree:
 restores to before first or after the last in the tree. */
 UNIV_INTERN
 ibool
 btr_pcur_restore_position(
 /*======================*/
 					/* out: TRUE if the cursor position
 					was stored when it was on a user record
 					and it can be restored on a user record
 					whose ordering fields are identical to
 					the ones of the original user record */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/* in: detached persistent cursor */
 	mtr_t*		mtr)		/* in: mtr */
 {
 	dict_index_t*	index;
 	dtuple_t*	tuple;
 	ulint		mode;
 	ulint		old_mode;
 	mem_heap_t*	heap;
 	index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
 	if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
 	    || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
 			     && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
 		ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
 		if (cursor->trx_if_known) {
 			trx_print(stderr, cursor->trx_if_known, 0);
 		}
 		ut_error;
 	}
 	if (UNIV_UNLIKELY
 	    (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
 	     || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
 		/* In these cases we do not try an optimistic restoration,
 		but always do a search */
 		btr_cur_open_at_index_side(
 			cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
 			index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
 		cursor->block_when_stored = btr_pcur_get_block(cursor);
 		return(FALSE);
 	}
 	ut_a(cursor->old_rec);
 	ut_a(cursor->old_n_fields);
 	if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
 	    || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
 		/* Try optimistic restoration */
 		if (UNIV_LIKELY(buf_page_optimistic_get(
 					latch_mode,
 					cursor->block_when_stored,
 					cursor->modify_clock, mtr))) {
 			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 #ifdef UNIV_SYNC_DEBUG
 			buf_block_dbg_add_level(btr_pcur_get_block(cursor),
 						SYNC_TREE_NODE);
 #endif /* UNIV_SYNC_DEBUG */
 			if (cursor->rel_pos == BTR_PCUR_ON) {
 #ifdef UNIV_DEBUG
 				const rec_t*	rec;
 				const ulint*	offsets1;
 				const ulint*	offsets2;
 #endif /* UNIV_DEBUG */
 				cursor->latch_mode = latch_mode;
 #ifdef UNIV_DEBUG
 				rec = btr_pcur_get_rec(cursor);
 				heap = mem_heap_create(256);
 				offsets1 = rec_get_offsets(
 					cursor->old_rec, index, NULL,
 					cursor->old_n_fields, &heap);
 				offsets2 = rec_get_offsets(
 					rec, index, NULL,
 					cursor->old_n_fields, &heap);
 				ut_ad(!cmp_rec_rec(cursor->old_rec,
 						   rec, offsets1, offsets2,
 						   index));
 				mem_heap_free(heap);
 #endif /* UNIV_DEBUG */
 				return(TRUE);
 			}
 			return(FALSE);
 		}
 	}
 	/* If optimistic restoration did not succeed, open the cursor anew */
 	heap = mem_heap_create(256);
 	tuple = dict_index_build_data_tuple(index, cursor->old_rec,
 					    cursor->old_n_fields, heap);
 	/* Save the old search mode of the cursor */
 	old_mode = cursor->search_mode;
 	if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
 		mode = PAGE_CUR_LE;
 	} else if (cursor->rel_pos == BTR_PCUR_AFTER) {
 		mode = PAGE_CUR_G;
 	} else {
 		ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
 		mode = PAGE_CUR_L;
 	}
 	btr_pcur_open_with_no_init(index, tuple, mode, latch_mode,
 				   cursor, 0, mtr);
 	/* Restore the old search mode */
 	cursor->search_mode = old_mode;
 	if (cursor->rel_pos == BTR_PCUR_ON
 	    && btr_pcur_is_on_user_rec(cursor)
 	    && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
 				   rec_get_offsets(
 					   btr_pcur_get_rec(cursor), index,
 					   NULL, ULINT_UNDEFINED, &heap))) {
 		/* We have to store the NEW value for the modify clock, since
 		the cursor can now be on a different page! But we can retain
 		the value of old_rec */
 		cursor->block_when_stored = btr_pcur_get_block(cursor);
 		cursor->modify_clock = buf_block_get_modify_clock(
 			cursor->block_when_stored);
 		cursor->old_stored = BTR_PCUR_OLD_STORED;
 		mem_heap_free(heap);
 		return(TRUE);
 	}
 	mem_heap_free(heap);
 	/* We have to store new position information, modify_clock etc.,
 	to the cursor because it can now be on a different page, the record
 	under it may have been removed, etc. */
 	btr_pcur_store_position(cursor, mtr);
 	return(FALSE);
 }
 /******************************************************************
 If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
 releases the page latch and bufferfix reserved by the cursor.
 NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
 made by the current mini-transaction to the data protected by the
 cursor latch, as then the latch must not be released until mtr_commit. */
 UNIV_INTERN
 void
 btr_pcur_release_leaf(
 /*==================*/
 	btr_pcur_t*	cursor, /* in: persistent cursor */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	buf_block_t*	block;
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	block = btr_pcur_get_block(cursor);
 	btr_leaf_page_release(block, cursor->latch_mode, mtr);
 	cursor->latch_mode = BTR_NO_LATCHES;
 	cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 /*************************************************************
 Moves the persistent cursor to the first record on the next page. Releases the
 latch on the current page, and bufferunfixes it. Note that there must not be
 modifications on the current page, as then the x-latch can be released only in
 mtr_commit. */
 UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor; must be on the
 				last record of the current page */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ulint		next_page_no;
 	ulint		space;
 	ulint		zip_size;
 	page_t*		page;
 	buf_block_t*	next_block;
 	page_t*		next_page;
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	ut_ad(btr_pcur_is_after_last_on_page(cursor));
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	page = btr_pcur_get_page(cursor);
 	next_page_no = btr_page_get_next(page, mtr);
 	space = buf_block_get_space(btr_pcur_get_block(cursor));
 	zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
 	ut_ad(next_page_no != FIL_NULL);
 	next_block = btr_block_get(space, zip_size, next_page_no,
 				   cursor->latch_mode, mtr);
 	next_page = buf_block_get_frame(next_block);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(page_is_comp(next_page) == page_is_comp(page));
 	ut_a(btr_page_get_prev(next_page, mtr)
 	     == buf_block_get_page_no(btr_pcur_get_block(cursor)));
 #endif /* UNIV_BTR_DEBUG */
 	next_block->check_index_page_at_flush = TRUE;
 	btr_leaf_page_release(btr_pcur_get_block(cursor),
 			      cursor->latch_mode, mtr);
 	page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
 	page_check_dir(next_page);
 }
 /*************************************************************
 Moves the persistent cursor backward if it is on the first record of the page.
 Commits mtr. Note that to prevent a possible deadlock, the operation
 first stores the position of the cursor, commits mtr, acquires the necessary
 latches and restores the cursor position again before returning. The
 alphabetical position of the cursor is guaranteed to be sensible on
 return, but it may happen that the cursor is not positioned on the last
 record of any page, because the structure of the tree may have changed
 during the time when the cursor had no latches. */
 UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor, must be on the first
 				record of the current page */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ulint		prev_page_no;
 	ulint		space;
 	page_t*		page;
 	buf_block_t*	prev_block;
 	ulint		latch_mode;
 	ulint		latch_mode2;
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	ut_ad(btr_pcur_is_before_first_on_page(cursor));
 	ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
 	latch_mode = cursor->latch_mode;
 	if (latch_mode == BTR_SEARCH_LEAF) {
 		latch_mode2 = BTR_SEARCH_PREV;
 	} else if (latch_mode == BTR_MODIFY_LEAF) {
 		latch_mode2 = BTR_MODIFY_PREV;
 	} else {
 		latch_mode2 = 0; /* To eliminate compiler warning */
 		ut_error;
 	}
 	btr_pcur_store_position(cursor, mtr);
 	mtr_commit(mtr);
 	mtr_start(mtr);
 	btr_pcur_restore_position(latch_mode2, cursor, mtr);
 	page = btr_pcur_get_page(cursor);
 	prev_page_no = btr_page_get_prev(page, mtr);
 	space = buf_block_get_space(btr_pcur_get_block(cursor));
 	if (prev_page_no == FIL_NULL) {
 	} else if (btr_pcur_is_before_first_on_page(cursor)) {
 		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 		btr_leaf_page_release(btr_pcur_get_block(cursor),
 				      latch_mode, mtr);
 		page_cur_set_after_last(prev_block,
 					btr_pcur_get_page_cur(cursor));
 	} else {
 		/* The repositioned cursor did not end on an infimum record on
 		a page. Cursor repositioning acquired a latch also on the
 		previous page, but we do not need the latch: release it. */
 		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 		btr_leaf_page_release(prev_block, latch_mode, mtr);
 	}
 	cursor->latch_mode = latch_mode;
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 /*************************************************************
 Moves the persistent cursor to the previous record in the tree. If no records
 are left, the cursor stays 'before first in tree'. */
 UNIV_INTERN
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
 				/* out: TRUE if the cursor was not before first
 				in tree */
 	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
 				function may release the page latch */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	if (btr_pcur_is_before_first_on_page(cursor)) {
 		if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
 			return(FALSE);
 		}
 		btr_pcur_move_backward_from_page(cursor, mtr);
 		return(TRUE);
 	}
 	btr_pcur_move_to_prev_on_page(cursor);
 	return(TRUE);
 }
 /******************************************************************
 If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
 user record satisfying the search condition, in the case PAGE_CUR_L or
 PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
 UNIV_INTERN
 void
 btr_pcur_open_on_user_rec(
 /*======================*/
 	dict_index_t*	index,		/* in: index */
 	const dtuple_t*	tuple,		/* in: tuple on which search done */
 	ulint		mode,		/* in: PAGE_CUR_L, ... */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/* in: memory buffer for persistent
 					cursor */
 	mtr_t*		mtr)		/* in: mtr */
 {
 	btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
 	if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
 		if (btr_pcur_is_after_last_on_page(cursor)) {
 			btr_pcur_move_to_next_user_rec(cursor, mtr);
 		}
 	} else {
 		ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
 		/* Not implemented yet */
 		ut_error;
 	}
 }
--- a/btr/btr0sea.c
+++ b/btr/btr0sea.c
--- a/buf/buf0buddy.c
+++ b/buf/buf0buddy.c
@@ -0,0 +1,664 @@
 /******************************************************
 Binary buddy allocator for compressed pages
 (c) 2006 Innobase Oy
 Created December 2006 by Marko Makela
 *******************************************************/
 #define THIS_MODULE
 #include "buf0buddy.h"
 #ifdef UNIV_NONINL
 # include "buf0buddy.ic"
 #endif
 #undef THIS_MODULE
 #include "buf0buf.h"
 #include "buf0lru.h"
 #include "buf0flu.h"
 #include "page0zip.h"
 /* Statistic counters */
 #ifdef UNIV_DEBUG
 /** Number of frames allocated from the buffer pool to the buddy system.
 Protected by buf_pool_mutex. */
 static ulint buf_buddy_n_frames;
 #endif /* UNIV_DEBUG */
 /** Statistics of the buddy system, indexed by block size.
 Protected by buf_pool_mutex. */
 UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
 /**************************************************************************
 Get the offset of the buddy of a compressed page frame. */
 UNIV_INLINE
 byte*
 buf_buddy_get(
 /*==========*/
 			/* out: the buddy relative of page */
 	byte*	page,	/* in: compressed page */
 	ulint	size)	/* in: page size in bytes */
 {
 	ut_ad(ut_is_2pow(size));
 	ut_ad(size >= BUF_BUDDY_LOW);
 	ut_ad(size < BUF_BUDDY_HIGH);
 	ut_ad(!ut_align_offset(page, size));
 	if (((ulint) page) & size) {
 		return(page - size);
 	} else {
 		return(page + size);
 	}
 }
 /**************************************************************************
 Add a block to the head of the appropriate buddy free list. */
 UNIV_INLINE
 void
 buf_buddy_add_to_free(
 /*==================*/
 	buf_page_t*	bpage,	/* in,own: block to be freed */
 	ulint		i)	/* in: index of buf_pool->zip_free[] */
 {
 #ifdef UNIV_DEBUG_VALGRIND
 	buf_page_t*	b  = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
 	if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
 #endif /* UNIV_DEBUG_VALGRIND */
 	ut_ad(buf_pool->zip_free[i].start != bpage);
 	UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
 #ifdef UNIV_DEBUG_VALGRIND
 	if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
 	UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
 #endif /* UNIV_DEBUG_VALGRIND */
 }
 /**************************************************************************
 Remove a block from the appropriate buddy free list. */
 UNIV_INLINE
 void
 buf_buddy_remove_from_free(
 /*=======================*/
 	buf_page_t*	bpage,	/* in: block to be removed */
 	ulint		i)	/* in: index of buf_pool->zip_free[] */
 {
 #ifdef UNIV_DEBUG_VALGRIND
 	buf_page_t*	prev = UT_LIST_GET_PREV(list, bpage);
 	buf_page_t*	next = UT_LIST_GET_NEXT(list, bpage);
 	if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
 	if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
 	ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
 	ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
 #endif /* UNIV_DEBUG_VALGRIND */
 	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
 	UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
 #ifdef UNIV_DEBUG_VALGRIND
 	if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
 	if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
 #endif /* UNIV_DEBUG_VALGRIND */
 }
 /**************************************************************************
 Try to allocate a block from buf_pool->zip_free[]. */
 static
 void*
 buf_buddy_alloc_zip(
 /*================*/
 			/* out: allocated block, or NULL
 			if buf_pool->zip_free[] was empty */
 	ulint	i)	/* in: index of buf_pool->zip_free[] */
 {
 	buf_page_t*	bpage;
 	ut_ad(buf_pool_mutex_own());
 	ut_a(i < BUF_BUDDY_SIZES);
 #if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
 	/* Valgrind would complain about accessing free memory. */
 	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]);
 #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
 	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
 	if (bpage) {
 		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
 		buf_buddy_remove_from_free(bpage, i);
 	} else if (i + 1 < BUF_BUDDY_SIZES) {
 		/* Attempt to split. */
 		bpage = buf_buddy_alloc_zip(i + 1);
 		if (bpage) {
 			buf_page_t*	buddy = (buf_page_t*)
 				(((char*) bpage) + (BUF_BUDDY_LOW << i));
 			ut_ad(!buf_pool_contains_zip(buddy));
 			ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
 			buddy->state = BUF_BLOCK_ZIP_FREE;
 			buf_buddy_add_to_free(buddy, i);
 		}
 	}
 #ifdef UNIV_DEBUG
 	if (bpage) {
 		memset(bpage, ~i, BUF_BUDDY_LOW << i);
 	}
 #endif /* UNIV_DEBUG */
 	UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
 	return(bpage);
 }
 /**************************************************************************
 Deallocate a buffer frame of UNIV_PAGE_SIZE. */
 static
 void
 buf_buddy_block_free(
 /*=================*/
 	void*	buf)	/* in: buffer frame to deallocate */
 {
 	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
 	buf_page_t*	bpage;
 	buf_block_t*	block;
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
 	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
 		    ((buf_block_t*) bpage)->frame == buf);
 	ut_a(bpage);
 	ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
 	ut_ad(!bpage->in_page_hash);
 	ut_ad(bpage->in_zip_hash);
 	ut_d(bpage->in_zip_hash = FALSE);
 	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
 	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
 	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
 	block = (buf_block_t*) bpage;
 	mutex_enter(&block->mutex);
 	buf_LRU_block_free_non_file_page(block);
 	mutex_exit(&block->mutex);
 	ut_ad(buf_buddy_n_frames > 0);
 	ut_d(buf_buddy_n_frames--);
 }
 /**************************************************************************
 Allocate a buffer block to the buddy allocator. */
 static
 void
 buf_buddy_block_register(
 /*=====================*/
 	buf_block_t*	block)	/* in: buffer frame to allocate */
 {
 	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 	ut_a(block->frame);
 	ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
 	ut_ad(!block->page.in_page_hash);
 	ut_ad(!block->page.in_zip_hash);
 	ut_d(block->page.in_zip_hash = TRUE);
 	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
 	ut_d(buf_buddy_n_frames++);
 }
 /**************************************************************************
 Allocate a block from a bigger object. */
 static
 void*
 buf_buddy_alloc_from(
 /*=================*/
 				/* out: allocated block */
 	void*		buf,	/* in: a block that is free to use */
 	ulint		i,	/* in: index of buf_pool->zip_free[] */
 	ulint		j)	/* in: size of buf as an index
 				of buf_pool->zip_free[] */
 {
 	ulint	offs	= BUF_BUDDY_LOW << j;
 	ut_ad(j <= BUF_BUDDY_SIZES);
 	ut_ad(j >= i);
 	ut_ad(!ut_align_offset(buf, offs));
 	/* Add the unused parts of the block to the free lists. */
 	while (j > i) {
 		buf_page_t*	bpage;
 		offs >>= 1;
 		j--;
 		bpage = (buf_page_t*) ((byte*) buf + offs);
 		ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
 		bpage->state = BUF_BLOCK_ZIP_FREE;
 #if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
 		/* Valgrind would complain about accessing free memory. */
 		UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[j]);
 #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
 		buf_buddy_add_to_free(bpage, j);
 	}
 	return(buf);
 }
 /**************************************************************************
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
 The buf_pool_mutex may only be released and reacquired if lru != NULL. */
 UNIV_INTERN
 void*
 buf_buddy_alloc_low(
 /*================*/
 			/* out: allocated block,
 			possibly NULL if lru==NULL */
 	ulint	i,	/* in: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
 	ibool*	lru)	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
 {
 	buf_block_t*	block;
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	if (i < BUF_BUDDY_SIZES) {
 		/* Try to allocate from the buddy system. */
 		block = buf_buddy_alloc_zip(i);
 		if (block) {
 			goto func_exit;
 		}
 	}
 	/* Try allocating from the buf_pool->free list. */
 	block = buf_LRU_get_free_only();
 	if (block) {
 		goto alloc_big;
 	}
 	if (!lru) {
 		return(NULL);
 	}
 	/* Try replacing an uncompressed page in the buffer pool. */
 	buf_pool_mutex_exit();
 	block = buf_LRU_get_free_block(0);
 	*lru = TRUE;
 	buf_pool_mutex_enter();
 alloc_big:
 	buf_buddy_block_register(block);
 	block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
 func_exit:
 	buf_buddy_stat[i].used++;
 	return(block);
 }
 /**************************************************************************
 Try to relocate the control block of a compressed page. */
 static
 ibool
 buf_buddy_relocate_block(
 /*=====================*/
 				/* out: TRUE if relocated */
 	buf_page_t*	bpage,	/* in: block to relocate */
 	buf_page_t*	dpage)	/* in: free block to relocate to */
 {
 	buf_page_t*	b;
 	ut_ad(buf_pool_mutex_own());
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_FREE:
 	case BUF_BLOCK_NOT_USED:
 	case BUF_BLOCK_READY_FOR_USE:
 	case BUF_BLOCK_FILE_PAGE:
 	case BUF_BLOCK_MEMORY:
 	case BUF_BLOCK_REMOVE_HASH:
 		ut_error;
 	case BUF_BLOCK_ZIP_DIRTY:
 		/* Cannot relocate dirty pages. */
 		return(FALSE);
 	case BUF_BLOCK_ZIP_PAGE:
 		break;
 	}
 	mutex_enter(&buf_pool_zip_mutex);
 	if (!buf_page_can_relocate(bpage)) {
 		mutex_exit(&buf_pool_zip_mutex);
 		return(FALSE);
 	}
 	buf_relocate(bpage, dpage);
 	ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
 	/* relocate buf_pool->zip_clean */
 	b = UT_LIST_GET_PREV(list, dpage);
 	UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
 	if (b) {
 		UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
 	} else {
 		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
 	}
 	mutex_exit(&buf_pool_zip_mutex);
 	return(TRUE);
 }
 /**************************************************************************
 Try to relocate a block. */
 static
 ibool
 buf_buddy_relocate(
 /*===============*/
 			/* out: TRUE if relocated */
 	void*	src,	/* in: block to relocate */
 	void*	dst,	/* in: free block to relocate to */
 	ulint	i)	/* in: index of buf_pool->zip_free[] */
 {
 	buf_page_t*	bpage;
 	const ulint	size	= BUF_BUDDY_LOW << i;
 	ullint		usec	= ut_time_us(NULL);
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_ad(!ut_align_offset(src, size));
 	ut_ad(!ut_align_offset(dst, size));
 	UNIV_MEM_ASSERT_W(dst, size);
 	/* We assume that all memory from buf_buddy_alloc()
 	is used for either compressed pages or buf_page_t
 	objects covering compressed pages. */
 	/* We look inside the allocated objects returned by
 	buf_buddy_alloc() and assume that anything of
 	PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
 	a valid space_id and page_no in the page header.  Should the
 	fields be invalid, we will be unable to relocate the block.
 	We also assume that anything that fits sizeof(buf_page_t)
 	actually is a properly initialized buf_page_t object. */
 	if (size >= PAGE_ZIP_MIN_SIZE) {
 		/* This is a compressed page. */
 		mutex_t*	mutex;
 		/* The src block may be split into smaller blocks,
 		some of which may be free.  Thus, the
 		mach_read_from_4() calls below may attempt to read
 		from free memory.  The memory is "owned" by the buddy
 		allocator (and it has been allocated from the buffer
 		pool), so there is nothing wrong about this.  The
 		mach_read_from_4() calls here will only trigger bogus
 		Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
 		bpage = buf_page_hash_get(
 			mach_read_from_4((const byte*) src
 					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
 			mach_read_from_4((const byte*) src
 					 + FIL_PAGE_OFFSET));
 		if (!bpage || bpage->zip.data != src) {
 			/* The block has probably been freshly
 			allocated by buf_LRU_get_free_block() but not
 			added to buf_pool->page_hash yet.  Obviously,
 			it cannot be relocated. */
 			return(FALSE);
 		}
 		if (page_zip_get_size(&bpage->zip) != size) {
 			/* The block is of different size.  We would
 			have to relocate all blocks covered by src.
 			For the sake of simplicity, give up. */
 			ut_ad(page_zip_get_size(&bpage->zip) < size);
 			return(FALSE);
 		}
 		/* The block must have been allocated, but it may
 		contain uninitialized data. */
 		UNIV_MEM_ASSERT_W(src, size);
 		mutex = buf_page_get_mutex(bpage);
 		mutex_enter(mutex);
 		if (buf_page_can_relocate(bpage)) {
 			/* Relocate the compressed page. */
 			ut_a(bpage->zip.data == src);
 			memcpy(dst, src, size);
 			bpage->zip.data = dst;
 			mutex_exit(mutex);
 success:
 			UNIV_MEM_INVALID(src, size);
 			{
 				buf_buddy_stat_t*	buddy_stat
 					= &buf_buddy_stat[i];
 				buddy_stat->relocated++;
 				buddy_stat->relocated_usec
 					+= ut_time_us(NULL) - usec;
 			}
 			return(TRUE);
 		}
 		mutex_exit(mutex);
 	} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
 		/* This must be a buf_page_t object. */
 		UNIV_MEM_ASSERT_RW(src, size);
 		if (buf_buddy_relocate_block(src, dst)) {
 			goto success;
 		}
 	}
 	return(FALSE);
 }
 /**************************************************************************
 Deallocate a block. */
 UNIV_INTERN
 void
 buf_buddy_free_low(
 /*===============*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
 	ulint	i)	/* in: index of buf_pool->zip_free[] */
 {
 	buf_page_t*	bpage;
 	buf_page_t*	buddy;
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_ad(i <= BUF_BUDDY_SIZES);
 	ut_ad(buf_buddy_stat[i].used > 0);
 	buf_buddy_stat[i].used--;
 recombine:
 	UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
 	ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
 	if (i == BUF_BUDDY_SIZES) {
 		buf_buddy_block_free(buf);
 		return;
 	}
 	ut_ad(i < BUF_BUDDY_SIZES);
 	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
 	ut_ad(!buf_pool_contains_zip(buf));
 	/* Try to combine adjacent blocks. */
 	buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
 #ifndef UNIV_DEBUG_VALGRIND
 	/* Valgrind would complain about accessing free memory. */
 	if (buddy->state != BUF_BLOCK_ZIP_FREE) {
 		goto buddy_nonfree;
 	}
 	/* The field buddy->state can only be trusted for free blocks.
 	If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
 	it is in the free list. */
 #endif /* !UNIV_DEBUG_VALGRIND */
 	for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
 		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
 		ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
 		if (bpage == buddy) {
 buddy_free:
 			/* The buddy is free: recombine */
 			buf_buddy_remove_from_free(bpage, i);
 buddy_free2:
 			ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
 			ut_ad(!buf_pool_contains_zip(buddy));
 			i++;
 			buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
 			goto recombine;
 		}
 		ut_a(bpage != buf);
 		{
 			buf_page_t*	next = UT_LIST_GET_NEXT(list, bpage);
 			UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
 			bpage = next;
 		}
 	}
 #ifndef UNIV_DEBUG_VALGRIND
 buddy_nonfree:
 	/* Valgrind would complain about accessing free memory. */
 	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]));
 #endif /* UNIV_DEBUG_VALGRIND */
 	/* The buddy is not free. Is there a free block of this size? */
 	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
 	if (bpage) {
 		/* Remove the block from the free list, because a successful
 		buf_buddy_relocate() will overwrite bpage->list. */
 		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
 		buf_buddy_remove_from_free(bpage, i);
 		/* Try to relocate the buddy of buf to the free block. */
 		if (buf_buddy_relocate(buddy, bpage, i)) {
 			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
 			goto buddy_free2;
 		}
 		buf_buddy_add_to_free(bpage, i);
 		/* Try to relocate the buddy of the free block to buf. */
 		buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
 						    BUF_BUDDY_LOW << i);
 #if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
 		{
 			const buf_page_t* b;
 			/* The buddy must not be (completely) free, because
 			we always recombine adjacent free blocks.
 			(Parts of the buddy can be free in
 			buf_pool->zip_free[j] with j < i.)*/
 			for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
 			     b; b = UT_LIST_GET_NEXT(list, b)) {
 				ut_a(b != buddy);
 			}
 		}
 #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
 		if (buf_buddy_relocate(buddy, buf, i)) {
 			buf = bpage;
 			UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
 			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
 			goto buddy_free;
 		}
 	}
 	/* Free the block to the buddy list. */
 	bpage = buf;
 #ifdef UNIV_DEBUG
 	if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
 		/* This area has most likely been allocated for at
 		least one compressed-only block descriptor.  Check
 		that there are no live objects in the area.  This is
 		not a complete check: it may yield false positives as
 		well as false negatives.  Also, due to buddy blocks
 		being recombined, it is possible (although unlikely)
 		that this branch is never reached. */
 		char* c;
 # ifndef UNIV_DEBUG_VALGRIND
 		/* Valgrind would complain about accessing
 		uninitialized memory.  Besides, Valgrind performs a
 		more exhaustive check, at every memory access. */
 		const buf_page_t* b = buf;
 		const buf_page_t* const b_end = (buf_page_t*)
 			((char*) b + (BUF_BUDDY_LOW << i));
 		for (; b < b_end; b++) {
 			/* Avoid false positives (and cause false
 			negatives) by checking for b->space < 1000. */
 			if ((b->state == BUF_BLOCK_ZIP_PAGE
 			     || b->state == BUF_BLOCK_ZIP_DIRTY)
 			    && b->space > 0 && b->space < 1000) {
 				fprintf(stderr,
 					"buddy dirty %p %u (%u,%u) %p,%lu\n",
 					(void*) b,
 					b->state, b->space, b->offset,
 					buf, i);
 			}
 		}
 # endif /* !UNIV_DEBUG_VALGRIND */
 		/* Scramble the block.  This should make any pointers
 		invalid and trigger a segmentation violation.  Because
 		the scrambling can be reversed, it may be possible to
 		track down the object pointing to the freed data by
 		dereferencing the unscrambled bpage->LRU or
 		bpage->list pointers. */
 		for (c = (char*) buf + (BUF_BUDDY_LOW << i);
 		     c-- > (char*) buf; ) {
 			*c = ~*c ^ i;
 		}
 	} else {
 		/* Fill large blocks with a constant pattern. */
 		memset(bpage, i, BUF_BUDDY_LOW << i);
 	}
 #endif /* UNIV_DEBUG */
 	bpage->state = BUF_BLOCK_ZIP_FREE;
 	buf_buddy_add_to_free(bpage, i);
 }
--- a/buf/buf0buf.c
+++ b/buf/buf0buf.c
--- a/buf/buf0flu.c
+++ b/buf/buf0flu.c
--- a/buf/buf0lru.c
+++ b/buf/buf0lru.c
--- a/buf/buf0rea.c
+++ b/buf/buf0rea.c
@@ -0,0 +1,793 @@
 /******************************************************
 The database buffer read
 (c) 1995 Innobase Oy
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 #include "buf0rea.h"
 #include "fil0fil.h"
 #include "mtr0mtr.h"
 #include "buf0buf.h"
 #include "buf0flu.h"
 #include "buf0lru.h"
 #include "ibuf0ibuf.h"
 #include "log0recv.h"
 #include "trx0sys.h"
 #include "os0file.h"
 #include "srv0start.h"
 extern ulint srv_read_ahead_rnd;
 extern ulint srv_read_ahead_seq;
 extern ulint srv_buf_pool_reads;
 /* The size in blocks of the area where the random read-ahead algorithm counts
 the accessed pages when deciding whether to read-ahead */
 #define	BUF_READ_AHEAD_RANDOM_AREA	BUF_READ_AHEAD_AREA
 /* There must be at least this many pages in buf_pool in the area to start
 a random read-ahead */
 #define BUF_READ_AHEAD_RANDOM_THRESHOLD	(5 + buf_read_ahead_random_area / 8)
 /* The linear read-ahead area size */
 #define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA
 /* The linear read-ahead threshold */
 #define LINEAR_AREA_THRESHOLD_COEF	5 / 8
 /* If there are buf_pool->curr_size per the number below pending reads, then
 read-ahead is not done: this is to prevent flooding the buffer pool with
 i/o-fixed buffer blocks */
 #define BUF_READ_AHEAD_PEND_LIMIT	2
 /************************************************************************
 Low-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there, in which case does nothing.
 Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
 flag is cleared and the x-lock released by an i/o-handler thread. */
 static
 ulint
 buf_read_page_low(
 /*==============*/
 			/* out: 1 if a read request was queued, 0 if the page
 			already resided in buf_pool, or if the page is in
 			the doublewrite buffer blocks in which case it is never
 			read into the pool, or if the tablespace does not
 			exist or is being dropped */
 	ulint*	err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
 			trying to read from a non-existent tablespace, or a
 			tablespace which is just now being dropped */
 	ibool	sync,	/* in: TRUE if synchronous aio is desired */
 	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ...,
 			ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
 			at read-ahead functions) */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size, or 0 */
 	ibool	unzip,	/* in: TRUE=request uncompressed page */
 	ib_int64_t tablespace_version, /* in: if the space memory object has
 			this timestamp different from what we are giving here,
 			treat the tablespace as dropped; this is a timestamp we
 			use to stop dangling page reads from a tablespace
 			which we have DISCARDed + IMPORTed back */
 	ulint	offset)	/* in: page number */
 {
 	buf_page_t*	bpage;
 	ulint		wake_later;
 	*err = DB_SUCCESS;
 	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
 	mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
 	if (trx_doublewrite && space == TRX_SYS_SPACE
 	    && (   (offset >= trx_doublewrite->block1
 		    && offset < trx_doublewrite->block1
 		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
 		   || (offset >= trx_doublewrite->block2
 		       && offset < trx_doublewrite->block2
 		       + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: Warning: trying to read"
 			" doublewrite buffer page %lu\n",
 			(ulong) offset);
 		return(0);
 	}
 	if (ibuf_bitmap_page(zip_size, offset)
 	    || trx_sys_hdr_page(space, offset)) {
 		/* Trx sys header is so low in the latching order that we play
 		safe and do not leave the i/o-completion to an asynchronous
 		i/o-thread. Ibuf bitmap pages must always be read with
 		syncronous i/o, to make sure they do not get involved in
 		thread deadlocks. */
 		sync = TRUE;
 	}
 	/* The following call will also check if the tablespace does not exist
 	or is being dropped; if we succeed in initing the page in the buffer
 	pool for read, then DISCARD cannot proceed until the read has
 	completed */
 	bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
 				       tablespace_version, offset);
 	if (bpage == NULL) {
 		return(0);
 	}
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints) {
 		fprintf(stderr,
 			"Posting read request for page %lu, sync %lu\n",
 			(ulong) offset,
 			(ulong) sync);
 	}
 #endif
 	ut_ad(buf_page_in_file(bpage));
 	if (zip_size) {
 		*err = fil_io(OS_FILE_READ | wake_later,
 			      sync, space, zip_size, offset, 0, zip_size,
 			      bpage->zip.data, bpage);
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 		*err = fil_io(OS_FILE_READ | wake_later,
 			      sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
 			      ((buf_block_t*) bpage)->frame, bpage);
 	}
 	ut_a(*err == DB_SUCCESS);
 	if (sync) {
 		/* The i/o is already completed when we arrive from
 		fil_read */
 		buf_page_io_complete(bpage);
 	}
 	return(1);
 }
 /************************************************************************
 Applies a random read-ahead in buf_pool if there are at least a threshold
 value of accessed pages from the random read-ahead area. Does not read any
 page, not even the one at the position (space, offset), if the read-ahead
 mechanism is not activated. NOTE 1: the calling thread may own latches on
 pages: to avoid deadlocks this function must be written such that it cannot
 end up waiting for these latches! NOTE 2: the calling thread must want
 access to the page given: this rule is set to prevent unintended read-aheads
 performed by ibuf routines, a situation which could result in a deadlock if
 the OS does not support asynchronous i/o. */
 static
 ulint
 buf_read_ahead_random(
 /*==================*/
 			/* out: number of page read requests issued; NOTE
 			that if we read ibuf pages, it may happen that
 			the page at the given page number does not get
 			read even if we return a value > 0! */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint	offset)	/* in: page number of a page which the current thread
 			wants to access */
 {
 	ib_int64_t	tablespace_version;
 	ulint		recent_blocks	= 0;
 	ulint		count;
 	ulint		LRU_recent_limit;
 	ulint		ibuf_mode;
 	ulint		low, high;
 	ulint		err;
 	ulint		i;
 	ulint		buf_read_ahead_random_area;
 	if (srv_startup_is_before_trx_rollback_phase) {
 		/* No read-ahead to avoid thread deadlocks */
 		return(0);
 	}
 	if (ibuf_bitmap_page(zip_size, offset)
 	    || trx_sys_hdr_page(space, offset)) {
 		/* If it is an ibuf bitmap page or trx sys hdr, we do
 		no read-ahead, as that could break the ibuf page access
 		order */
 		return(0);
 	}
 	/* Remember the tablespace version before we ask te tablespace size
 	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
 	do not try to read outside the bounds of the tablespace! */
 	tablespace_version = fil_space_get_version(space);
 	buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
 	low  = (offset / buf_read_ahead_random_area)
 		* buf_read_ahead_random_area;
 	high = (offset / buf_read_ahead_random_area + 1)
 		* buf_read_ahead_random_area;
 	if (high > fil_space_get_size(space)) {
 		high = fil_space_get_size(space);
 	}
 	/* Get the minimum LRU_position field value for an initial segment
 	of the LRU list, to determine which blocks have recently been added
 	to the start of the list. */
 	LRU_recent_limit = buf_LRU_get_recent_limit();
 	buf_pool_mutex_enter();
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
 		buf_pool_mutex_exit();
 		return(0);
 	}
 	/* Count how many blocks in the area have been recently accessed,
 	that is, reside near the start of the LRU list. */
 	for (i = low; i < high; i++) {
 		const buf_page_t*	bpage = buf_page_hash_get(space, i);
 		if (bpage
 		    && buf_page_is_accessed(bpage)
 		    && (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
 			recent_blocks++;
 			if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
 				buf_pool_mutex_exit();
 				goto read_ahead;
 			}
 		}
 	}
 	buf_pool_mutex_exit();
 	/* Do nothing */
 	return(0);
 read_ahead:
 	/* Read all the suitable blocks within the area */
 	if (ibuf_inside()) {
 		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
 	} else {
 		ibuf_mode = BUF_READ_ANY_PAGE;
 	}
 	count = 0;
 	for (i = low; i < high; i++) {
 		/* It is only sensible to do read-ahead in the non-sync aio
 		mode: hence FALSE as the first parameter */
 		if (!ibuf_bitmap_page(zip_size, i)) {
 			count += buf_read_page_low(
 				&err, FALSE,
 				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
 				space, zip_size, FALSE,
 				tablespace_version, i);
 			if (err == DB_TABLESPACE_DELETED) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
 					"  InnoDB: Warning: in random"
 					" readahead trying to access\n"
 					"InnoDB: tablespace %lu page %lu,\n"
 					"InnoDB: but the tablespace does not"
 					" exist or is just being dropped.\n",
 					(ulong) space, (ulong) i);
 			}
 		}
 	}
 	/* In simulated aio we wake the aio handler threads only after
 	queuing all aio requests, in native aio the following call does
 	nothing: */
 	os_aio_simulated_wake_handler_threads();
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints && (count > 0)) {
 		fprintf(stderr,
 			"Random read-ahead space %lu offset %lu pages %lu\n",
 			(ulong) space, (ulong) offset,
 			(ulong) count);
 	}
 #endif /* UNIV_DEBUG */
 	++srv_read_ahead_rnd;
 	return(count);
 }
 /************************************************************************
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread. Does a random read-ahead if it seems
 sensible. */
 UNIV_INTERN
 ulint
 buf_read_page(
 /*==========*/
 			/* out: number of page read requests issued: this can
 			be > 1 if read-ahead occurred */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint	offset)	/* in: page number */
 {
 	ib_int64_t	tablespace_version;
 	ulint		count;
 	ulint		count2;
 	ulint		err;
 	tablespace_version = fil_space_get_version(space);
 	count = buf_read_ahead_random(space, zip_size, offset);
 	/* We do the i/o in the synchronous aio mode to save thread
 	switches: hence TRUE */
 	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
 				   zip_size, FALSE,
 				   tablespace_version, offset);
 	srv_buf_pool_reads+= count2;
 	if (err == DB_TABLESPACE_DELETED) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: Error: trying to access"
 			" tablespace %lu page no. %lu,\n"
 			"InnoDB: but the tablespace does not exist"
 			" or is just being dropped.\n",
 			(ulong) space, (ulong) offset);
 	}
 	/* Flush pages from the end of the LRU list if necessary */
 	buf_flush_free_margin();
 	/* Increment number of I/O operations used for LRU policy. */
 	buf_LRU_stat_inc_io();
 	return(count + count2);
 }
 /************************************************************************
 Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
 that the the algorithm looks at the 'natural' adjacent successor and
 predecessor of the page, which on the leaf level of a B-tree are the next
 and previous page in the chain of leaves. To know these, the page specified
 in (space, offset) must already be present in the buf_pool. Thus, the
 natural way to use this function is to call it when a page in the buf_pool
 is accessed the first time, calling this function just after it has been
 bufferfixed.
 NOTE 1: as this function looks at the natural predecessor and successor
 fields on the page, what happens, if these are not initialized to any
 sensible value? No problem, before applying read-ahead we check that the
 area to read is within the span of the space, if not, read-ahead is not
 applied. An uninitialized value may result in a useless read operation, but
 only very improbably.
 NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
 function must be written such that it cannot end up waiting for these
 latches!
 NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
 which could result in a deadlock if the OS does not support asynchronous io. */
 UNIV_INTERN
 ulint
 buf_read_ahead_linear(
 /*==================*/
 			/* out: number of page read requests issued */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint	offset)	/* in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
 {
 	ib_int64_t	tablespace_version;
 	buf_page_t*	bpage;
 	buf_frame_t*	frame;
 	buf_page_t*	pred_bpage	= NULL;
 	ulint		pred_offset;
 	ulint		succ_offset;
 	ulint		count;
 	int		asc_or_desc;
 	ulint		new_offset;
 	ulint		fail_count;
 	ulint		ibuf_mode;
 	ulint		low, high;
 	ulint		err;
 	ulint		i;
 	const ulint	buf_read_ahead_linear_area
 		= BUF_READ_AHEAD_LINEAR_AREA;
 	if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
 		/* No read-ahead to avoid thread deadlocks */
 		return(0);
 	}
 	low  = (offset / buf_read_ahead_linear_area)
 		* buf_read_ahead_linear_area;
 	high = (offset / buf_read_ahead_linear_area + 1)
 		* buf_read_ahead_linear_area;
 	if ((offset != low) && (offset != high - 1)) {
 		/* This is not a border page of the area: return */
 		return(0);
 	}
 	if (ibuf_bitmap_page(zip_size, offset)
 	    || trx_sys_hdr_page(space, offset)) {
 		/* If it is an ibuf bitmap page or trx sys hdr, we do
 		no read-ahead, as that could break the ibuf page access
 		order */
 		return(0);
 	}
 	/* Remember the tablespace version before we ask te tablespace size
 	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
 	do not try to read outside the bounds of the tablespace! */
 	tablespace_version = fil_space_get_version(space);
 	buf_pool_mutex_enter();
 	if (high > fil_space_get_size(space)) {
 		buf_pool_mutex_exit();
 		/* The area is not whole, return */
 		return(0);
 	}
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
 		buf_pool_mutex_exit();
 		return(0);
 	}
 	/* Check that almost all pages in the area have been accessed; if
 	offset == low, the accesses must be in a descending order, otherwise,
 	in an ascending order. */
 	asc_or_desc = 1;
 	if (offset == low) {
 		asc_or_desc = -1;
 	}
 	fail_count = 0;
 	for (i = low; i < high; i++) {
 		bpage = buf_page_hash_get(space, i);
 		if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
 			/* Not accessed */
 			fail_count++;
 		} else if (pred_bpage
 			   && (ut_ulint_cmp(
 				       buf_page_get_LRU_position(bpage),
 				       buf_page_get_LRU_position(pred_bpage))
 			       != asc_or_desc)) {
 			/* Accesses not in the right order */
 			fail_count++;
 			pred_bpage = bpage;
 		}
 	}
 	if (fail_count > buf_read_ahead_linear_area
 	    * LINEAR_AREA_THRESHOLD_COEF) {
 		/* Too many failures: return */
 		buf_pool_mutex_exit();
 		return(0);
 	}
 	/* If we got this far, we know that enough pages in the area have
 	been accessed in the right order: linear read-ahead can be sensible */
 	bpage = buf_page_hash_get(space, offset);
 	if (bpage == NULL) {
 		buf_pool_mutex_exit();
 		return(0);
 	}
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_PAGE:
 		frame = bpage->zip.data;
 		break;
 	case BUF_BLOCK_FILE_PAGE:
 		frame = ((buf_block_t*) bpage)->frame;
 		break;
 	default:
 		ut_error;
 		break;
 	}
 	/* Read the natural predecessor and successor page addresses from
 	the page; NOTE that because the calling thread may have an x-latch
 	on the page, we do not acquire an s-latch on the page, this is to
 	prevent deadlocks. Even if we read values which are nonsense, the
 	algorithm will work. */
 	pred_offset = fil_page_get_prev(frame);
 	succ_offset = fil_page_get_next(frame);
 	buf_pool_mutex_exit();
 	if ((offset == low) && (succ_offset == offset + 1)) {
 		/* This is ok, we can continue */
 		new_offset = pred_offset;
 	} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
 		/* This is ok, we can continue */
 		new_offset = succ_offset;
 	} else {
 		/* Successor or predecessor not in the right order */
 		return(0);
 	}
 	low  = (new_offset / buf_read_ahead_linear_area)
 		* buf_read_ahead_linear_area;
 	high = (new_offset / buf_read_ahead_linear_area + 1)
 		* buf_read_ahead_linear_area;
 	if ((new_offset != low) && (new_offset != high - 1)) {
 		/* This is not a border page of the area: return */
 		return(0);
 	}
 	if (high > fil_space_get_size(space)) {
 		/* The area is not whole, return */
 		return(0);
 	}
 	/* If we got this far, read-ahead can be sensible: do it */
 	if (ibuf_inside()) {
 		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
 	} else {
 		ibuf_mode = BUF_READ_ANY_PAGE;
 	}
 	count = 0;
 	/* Since Windows XP seems to schedule the i/o handler thread
 	very eagerly, and consequently it does not wait for the
 	full read batch to be posted, we use special heuristics here */
 	os_aio_simulated_put_read_threads_to_sleep();
 	for (i = low; i < high; i++) {
 		/* It is only sensible to do read-ahead in the non-sync
 		aio mode: hence FALSE as the first parameter */
 		if (!ibuf_bitmap_page(zip_size, i)) {
 			count += buf_read_page_low(
 				&err, FALSE,
 				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
 				space, zip_size, FALSE, tablespace_version, i);
 			if (err == DB_TABLESPACE_DELETED) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
 					"  InnoDB: Warning: in"
 					" linear readahead trying to access\n"
 					"InnoDB: tablespace %lu page %lu,\n"
 					"InnoDB: but the tablespace does not"
 					" exist or is just being dropped.\n",
 					(ulong) space, (ulong) i);
 			}
 		}
 	}
 	/* In simulated aio we wake the aio handler threads only after
 	queuing all aio requests, in native aio the following call does
 	nothing: */
 	os_aio_simulated_wake_handler_threads();
 	/* Flush pages from the end of the LRU list if necessary */
 	buf_flush_free_margin();
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints && (count > 0)) {
 		fprintf(stderr,
 			"LINEAR read-ahead space %lu offset %lu pages %lu\n",
 			(ulong) space, (ulong) offset, (ulong) count);
 	}
 #endif /* UNIV_DEBUG */
 	/* Read ahead is considered one I/O operation for the purpose of
 	LRU policy decision. */
 	buf_LRU_stat_inc_io();
 	++srv_read_ahead_seq;
 	return(count);
 }
 /************************************************************************
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
 UNIV_INTERN
 void
 buf_read_ibuf_merge_pages(
 /*======================*/
 	ibool		sync,		/* in: TRUE if the caller
 					wants this function to wait
 					for the highest address page
 					to get read in, before this
 					function returns */
 	const ulint*	space_ids,	/* in: array of space ids */
 	const ib_int64_t* space_versions,/* in: the spaces must have
 					this version number
 					(timestamp), otherwise we
 					discard the read; we use this
 					to cancel reads if DISCARD +
 					IMPORT may have changed the
 					tablespace size */
 	const ulint*	page_nos,	/* in: array of page numbers
 					to read, with the highest page
 					number the last in the
 					array */
 	ulint		n_stored)	/* in: number of elements
 					in the arrays */
 {
 	ulint	i;
 	ut_ad(!ibuf_inside());
 #ifdef UNIV_IBUF_DEBUG
 	ut_a(n_stored < UNIV_PAGE_SIZE);
 #endif
 	while (buf_pool->n_pend_reads
 	       > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
 		os_thread_sleep(500000);
 	}
 	for (i = 0; i < n_stored; i++) {
 		ulint	zip_size = fil_space_get_zip_size(space_ids[i]);
 		ulint	err;
 		if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
 			goto tablespace_deleted;
 		}
 		buf_read_page_low(&err, sync && (i + 1 == n_stored),
 				  BUF_READ_ANY_PAGE, space_ids[i],
 				  zip_size, TRUE, space_versions[i],
 				  page_nos[i]);
 		if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
 tablespace_deleted:
 			/* We have deleted or are deleting the single-table
 			tablespace: remove the entries for that page */
 			ibuf_merge_or_delete_for_page(NULL, space_ids[i],
 						      page_nos[i],
 						      zip_size, FALSE);
 		}
 	}
 	os_aio_simulated_wake_handler_threads();
 	/* Flush pages from the end of the LRU list if necessary */
 	buf_flush_free_margin();
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints) {
 		fprintf(stderr,
 			"Ibuf merge read-ahead space %lu pages %lu\n",
 			(ulong) space_ids[0], (ulong) n_stored);
 	}
 #endif /* UNIV_DEBUG */
 }
 /************************************************************************
 Issues read requests for pages which recovery wants to read in. */
 UNIV_INTERN
 void
 buf_read_recv_pages(
 /*================*/
 	ibool		sync,		/* in: TRUE if the caller
 					wants this function to wait
 					for the highest address page
 					to get read in, before this
 					function returns */
 	ulint		space,		/* in: space id */
 	ulint		zip_size,	/* in: compressed page size in
 					bytes, or 0 */
 	const ulint*	page_nos,	/* in: array of page numbers
 					to read, with the highest page
 					number the last in the
 					array */
 	ulint		n_stored)	/* in: number of page numbers
 					in the array */
 {
 	ib_int64_t	tablespace_version;
 	ulint		count;
 	ulint		err;
 	ulint		i;
 	zip_size = fil_space_get_zip_size(space);
 	tablespace_version = fil_space_get_version(space);
 	for (i = 0; i < n_stored; i++) {
 		count = 0;
 		os_aio_print_debug = FALSE;
 		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
 			os_aio_simulated_wake_handler_threads();
 			os_thread_sleep(500000);
 			count++;
 			if (count > 100) {
 				fprintf(stderr,
 					"InnoDB: Error: InnoDB has waited for"
 					" 50 seconds for pending\n"
 					"InnoDB: reads to the buffer pool to"
 					" be finished.\n"
 					"InnoDB: Number of pending reads %lu,"
 					" pending pread calls %lu\n",
 					(ulong) buf_pool->n_pend_reads,
 					(ulong)os_file_n_pending_preads);
 				os_aio_print_debug = TRUE;
 			}
 		}
 		os_aio_print_debug = FALSE;
 		if ((i + 1 == n_stored) && sync) {
 			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
 					  zip_size, TRUE, tablespace_version,
 					  page_nos[i]);
 		} else {
 			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
 					  | OS_AIO_SIMULATED_WAKE_LATER,
 					  space, zip_size, TRUE,
 					  tablespace_version, page_nos[i]);
 		}
 	}
 	os_aio_simulated_wake_handler_threads();
 	/* Flush pages from the end of the LRU list if necessary */
 	buf_flush_free_margin();
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints) {
 		fprintf(stderr,
 			"Recovery applies read-ahead pages %lu\n",
 			(ulong) n_stored);
 	}
 #endif /* UNIV_DEBUG */
 }
--- a/9
+++ b/9
@@ -0,0 +1,9 @@
 #! /bin/sh
 path=`dirname $0`
 . "$path/SETUP.sh"
 extra_flags="$pentium_cflags $fast_cflags -g"
 extra_configs="$pentium_configs $static_link --with-plugins=innobase"
 . "$path/FINISH.sh"
--- a/9
+++ b/9
@@ -0,0 +1,9 @@
 #! /bin/sh
 path=`dirname $0`
 . "$path/SETUP.sh" $@ --with-debug=full
 extra_flags="$pentium_cflags $debug_cflags"
 extra_configs="$pentium_configs $debug_configs --with-plugins=innobase"
 . "$path/FINISH.sh"
--- a/data/data0data.c
+++ b/data/data0data.c
@@ -0,0 +1,737 @@
 /************************************************************************
 SQL data field and tuple
 (c) 1994-1996 Innobase Oy
 Created 5/30/1994 Heikki Tuuri
 *************************************************************************/
 #include "data0data.h"
 #ifdef UNIV_NONINL
 #include "data0data.ic"
 #endif
 #include "rem0rec.h"
 #include "rem0cmp.h"
 #include "page0page.h"
 #include "page0zip.h"
 #include "dict0dict.h"
 #include "btr0cur.h"
 #include <ctype.h>
 #ifdef UNIV_DEBUG
 /* data pointers of tuple fields are initialized to point here
 for error checking */
 UNIV_INTERN byte	data_error;
 /* this is used to fool the compiler in dtuple_validate */
 UNIV_INTERN ulint	data_dummy;
 #endif /* UNIV_DEBUG */
 /*************************************************************************
 Tests if dfield data length and content is equal to the given. */
 UNIV_INTERN
 ibool
 dfield_data_is_binary_equal(
 /*========================*/
 				/* out: TRUE if equal */
 	const dfield_t*	field,	/* in: field */
 	ulint		len,	/* in: data length or UNIV_SQL_NULL */
 	const byte*	data)	/* in: data */
 {
 	if (len != dfield_get_len(field)) {
 		return(FALSE);
 	}
 	if (len == UNIV_SQL_NULL) {
 		return(TRUE);
 	}
 	if (0 != memcmp(dfield_get_data(field), data, len)) {
 		return(FALSE);
 	}
 	return(TRUE);
 }
 /****************************************************************
 Compare two data tuples, respecting the collation of character fields. */
 UNIV_INTERN
 int
 dtuple_coll_cmp(
 /*============*/
 				/* out: 1, 0 , -1 if tuple1 is greater, equal,
 				less, respectively, than tuple2 */
 	const dtuple_t*	tuple1,	/* in: tuple 1 */
 	const dtuple_t*	tuple2)	/* in: tuple 2 */
 {
 	ulint	n_fields;
 	ulint	i;
 	ut_ad(tuple1 && tuple2);
 	ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
 	ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
 	ut_ad(dtuple_check_typed(tuple1));
 	ut_ad(dtuple_check_typed(tuple2));
 	n_fields = dtuple_get_n_fields(tuple1);
 	if (n_fields != dtuple_get_n_fields(tuple2)) {
 		return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
 	}
 	for (i = 0; i < n_fields; i++) {
 		int		cmp;
 		const dfield_t*	field1	= dtuple_get_nth_field(tuple1, i);
 		const dfield_t*	field2	= dtuple_get_nth_field(tuple2, i);
 		cmp = cmp_dfield_dfield(field1, field2);
 		if (cmp) {
 			return(cmp);
 		}
 	}
 	return(0);
 }
 /*************************************************************************
 Sets number of fields used in a tuple. Normally this is set in
 dtuple_create, but if you want later to set it smaller, you can use this. */
 UNIV_INTERN
 void
 dtuple_set_n_fields(
 /*================*/
 	dtuple_t*	tuple,		/* in: tuple */
 	ulint		n_fields)	/* in: number of fields */
 {
 	ut_ad(tuple);
 	tuple->n_fields = n_fields;
 	tuple->n_fields_cmp = n_fields;
 }
 /**************************************************************
 Checks that a data field is typed. */
 static
 ibool
 dfield_check_typed_no_assert(
 /*=========================*/
 				/* out: TRUE if ok */
 	const dfield_t*	field)	/* in: data field */
 {
 	if (dfield_get_type(field)->mtype > DATA_MYSQL
 	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
 		fprintf(stderr,
 			"InnoDB: Error: data field type %lu, len %lu\n",
 			(ulong) dfield_get_type(field)->mtype,
 			(ulong) dfield_get_len(field));
 		return(FALSE);
 	}
 	return(TRUE);
 }
 /**************************************************************
 Checks that a data tuple is typed. */
 UNIV_INTERN
 ibool
 dtuple_check_typed_no_assert(
 /*=========================*/
 				/* out: TRUE if ok */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	const dfield_t*	field;
 	ulint		i;
 	if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
 		fprintf(stderr,
 			"InnoDB: Error: index entry has %lu fields\n",
 			(ulong) dtuple_get_n_fields(tuple));
 dump:
 		fputs("InnoDB: Tuple contents: ", stderr);
 		dtuple_print(stderr, tuple);
 		putc('\n', stderr);
 		return(FALSE);
 	}
 	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
 		field = dtuple_get_nth_field(tuple, i);
 		if (!dfield_check_typed_no_assert(field)) {
 			goto dump;
 		}
 	}
 	return(TRUE);
 }
 /**************************************************************
 Checks that a data field is typed. Asserts an error if not. */
 UNIV_INTERN
 ibool
 dfield_check_typed(
 /*===============*/
 				/* out: TRUE if ok */
 	const dfield_t*	field)	/* in: data field */
 {
 	if (dfield_get_type(field)->mtype > DATA_MYSQL
 	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
 		fprintf(stderr,
 			"InnoDB: Error: data field type %lu, len %lu\n",
 			(ulong) dfield_get_type(field)->mtype,
 			(ulong) dfield_get_len(field));
 		ut_error;
 	}
 	return(TRUE);
 }
 /**************************************************************
 Checks that a data tuple is typed. Asserts an error if not. */
 UNIV_INTERN
 ibool
 dtuple_check_typed(
 /*===============*/
 				/* out: TRUE if ok */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	const dfield_t*	field;
 	ulint		i;
 	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
 		field = dtuple_get_nth_field(tuple, i);
 		ut_a(dfield_check_typed(field));
 	}
 	return(TRUE);
 }
 #ifdef UNIV_DEBUG
 /**************************************************************
 Validates the consistency of a tuple which must be complete, i.e,
 all fields must have been set. */
 UNIV_INTERN
 ibool
 dtuple_validate(
 /*============*/
 				/* out: TRUE if ok */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	const dfield_t*	field;
 	const byte*	data;
 	ulint		n_fields;
 	ulint		len;
 	ulint		i;
 	ulint		j;
 	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
 	n_fields = dtuple_get_n_fields(tuple);
 	/* We dereference all the data of each field to test
 	for memory traps */
 	for (i = 0; i < n_fields; i++) {
 		field = dtuple_get_nth_field(tuple, i);
 		len = dfield_get_len(field);
 		if (!dfield_is_null(field)) {
 			data = dfield_get_data(field);
 			UNIV_MEM_ASSERT_RW(data, len);
 			for (j = 0; j < len; j++) {
 				data_dummy  += *data; /* fool the compiler not
 						      to optimize out this
 						      code */
 				data++;
 			}
 		}
 	}
 	ut_a(dtuple_check_typed(tuple));
 	return(TRUE);
 }
 #endif /* UNIV_DEBUG */
 /*****************************************************************
 Pretty prints a dfield value according to its data type. */
 UNIV_INTERN
 void
 dfield_print(
 /*=========*/
 	const dfield_t*	dfield)	/* in: dfield */
 {
 	const byte*	data;
 	ulint		len;
 	ulint		i;
 	len = dfield_get_len(dfield);
 	data = dfield_get_data(dfield);
 	if (dfield_is_null(dfield)) {
 		fputs("NULL", stderr);
 		return;
 	}
 	switch (dtype_get_mtype(dfield_get_type(dfield))) {
 	case DATA_CHAR:
 	case DATA_VARCHAR:
 		for (i = 0; i < len; i++) {
 			int	c = *data++;
 			putc(isprint(c) ? c : ' ', stderr);
 		}
 		if (dfield_is_ext(dfield)) {
 			fputs("(external)", stderr);
 		}
 		break;
 	case DATA_INT:
 		ut_a(len == 4); /* only works for 32-bit integers */
 		fprintf(stderr, "%d", (int)mach_read_from_4(data));
 		break;
 	default:
 		ut_error;
 	}
 }
 /*****************************************************************
 Pretty prints a dfield value according to its data type. Also the hex string
 is printed if a string contains non-printable characters. */
 UNIV_INTERN
 void
 dfield_print_also_hex(
 /*==================*/
 	const dfield_t*	dfield)	/* in: dfield */
 {
 	const byte*	data;
 	ulint		len;
 	ulint		prtype;
 	ulint		i;
 	ibool		print_also_hex;
 	len = dfield_get_len(dfield);
 	data = dfield_get_data(dfield);
 	if (dfield_is_null(dfield)) {
 		fputs("NULL", stderr);
 		return;
 	}
 	prtype = dtype_get_prtype(dfield_get_type(dfield));
 	switch (dtype_get_mtype(dfield_get_type(dfield))) {
 		dulint	id;
 	case DATA_INT:
 		switch (len) {
 			ulint	val;
 		case 1:
 			val = mach_read_from_1(data);
 			if (!(prtype & DATA_UNSIGNED)) {
 				val &= ~0x80;
 				fprintf(stderr, "%ld", (long) val);
 			} else {
 				fprintf(stderr, "%lu", (ulong) val);
 			}
 			break;
 		case 2:
 			val = mach_read_from_2(data);
 			if (!(prtype & DATA_UNSIGNED)) {
 				val &= ~0x8000;
 				fprintf(stderr, "%ld", (long) val);
 			} else {
 				fprintf(stderr, "%lu", (ulong) val);
 			}
 			break;
 		case 3:
 			val = mach_read_from_3(data);
 			if (!(prtype & DATA_UNSIGNED)) {
 				val &= ~0x800000;
 				fprintf(stderr, "%ld", (long) val);
 			} else {
 				fprintf(stderr, "%lu", (ulong) val);
 			}
 			break;
 		case 4:
 			val = mach_read_from_4(data);
 			if (!(prtype & DATA_UNSIGNED)) {
 				val &= ~0x80000000;
 				fprintf(stderr, "%ld", (long) val);
 			} else {
 				fprintf(stderr, "%lu", (ulong) val);
 			}
 			break;
 		case 6:
 			id = mach_read_from_6(data);
 			fprintf(stderr, "{%lu %lu}",
 				ut_dulint_get_high(id),
 				ut_dulint_get_low(id));
 			break;
 		case 7:
 			id = mach_read_from_7(data);
 			fprintf(stderr, "{%lu %lu}",
 				ut_dulint_get_high(id),
 				ut_dulint_get_low(id));
 			break;
 		case 8:
 			id = mach_read_from_8(data);
 			fprintf(stderr, "{%lu %lu}",
 				ut_dulint_get_high(id),
 				ut_dulint_get_low(id));
 			break;
 		default:
 			goto print_hex;
 		}
 		break;
 	case DATA_SYS:
 		switch (prtype & DATA_SYS_PRTYPE_MASK) {
 		case DATA_TRX_ID:
 			id = mach_read_from_6(data);
 			fprintf(stderr, "trx_id " TRX_ID_FMT,
 				TRX_ID_PREP_PRINTF(id));
 			break;
 		case DATA_ROLL_PTR:
 			id = mach_read_from_7(data);
 			fprintf(stderr, "roll_ptr {%lu %lu}",
 				ut_dulint_get_high(id), ut_dulint_get_low(id));
 			break;
 		case DATA_ROW_ID:
 			id = mach_read_from_6(data);
 			fprintf(stderr, "row_id {%lu %lu}",
 				ut_dulint_get_high(id), ut_dulint_get_low(id));
 			break;
 		default:
 			id = mach_dulint_read_compressed(data);
 			fprintf(stderr, "mix_id {%lu %lu}",
 				ut_dulint_get_high(id), ut_dulint_get_low(id));
 		}
 		break;
 	case DATA_CHAR:
 	case DATA_VARCHAR:
 		print_also_hex = FALSE;
 		for (i = 0; i < len; i++) {
 			int c = *data++;
 			if (!isprint(c)) {
 				print_also_hex = TRUE;
 				fprintf(stderr, "\\x%02x", (unsigned char) c);
 			} else {
 				putc(c, stderr);
 			}
 		}
 		if (dfield_is_ext(dfield)) {
 			fputs("(external)", stderr);
 		}
 		if (!print_also_hex) {
 			break;
 		}
 		data = dfield_get_data(dfield);
 		/* fall through */
 	case DATA_BINARY:
 	default:
 print_hex:
 		fputs(" Hex: ",stderr);
 		for (i = 0; i < len; i++) {
 			fprintf(stderr, "%02lx", (ulint) *data++);
 		}
 		if (dfield_is_ext(dfield)) {
 			fputs("(external)", stderr);
 		}
 	}
 }
 /*****************************************************************
 Print a dfield value using ut_print_buf. */
 static
 void
 dfield_print_raw(
 /*=============*/
 	FILE*		f,		/* in: output stream */
 	const dfield_t*	dfield)		/* in: dfield */
 {
 	ulint	len	= dfield_get_len(dfield);
 	if (!dfield_is_null(dfield)) {
 		ulint	print_len = ut_min(len, 1000);
 		ut_print_buf(f, dfield_get_data(dfield), print_len);
 		if (len != print_len) {
 			fprintf(f, "(total %lu bytes%s)",
 				(ulong) len,
 				dfield_is_ext(dfield) ? ", external" : "");
 		}
 	} else {
 		fputs(" SQL NULL", f);
 	}
 }
 /**************************************************************
 The following function prints the contents of a tuple. */
 UNIV_INTERN
 void
 dtuple_print(
 /*=========*/
 	FILE*		f,	/* in: output stream */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	ulint		n_fields;
 	ulint		i;
 	n_fields = dtuple_get_n_fields(tuple);
 	fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
 	for (i = 0; i < n_fields; i++) {
 		fprintf(f, " %lu:", (ulong) i);
 		dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
 		putc(';', f);
 	}
 	putc('\n', f);
 	ut_ad(dtuple_validate(tuple));
 }
 /******************************************************************
 Moves parts of long fields in entry to the big record vector so that
 the size of tuple drops below the maximum record size allowed in the
 database. Moves data only from those fields which are not necessary
 to determine uniquely the insertion place of the tuple in the index. */
 UNIV_INTERN
 big_rec_t*
 dtuple_convert_big_rec(
 /*===================*/
 				/* out, own: created big record vector,
 				NULL if we are not able to shorten
 				the entry enough, i.e., if there are
 				too many fixed-length or short fields
 				in entry or the index is clustered */
 	dict_index_t*	index,	/* in: index */
 	dtuple_t*	entry,	/* in/out: index entry */
 	ulint*		n_ext)	/* in/out: number of
 				externally stored columns */
 {
 	mem_heap_t*	heap;
 	big_rec_t*	vector;
 	dfield_t*	dfield;
 	dict_field_t*	ifield;
 	ulint		size;
 	ulint		n_fields;
 	ulint		local_len;
 	ulint		local_prefix_len;
 	if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
 		return(NULL);
 	}
 	if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
 		/* up to MySQL 5.1: store a 768-byte prefix locally */
 		local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
 	} else {
 		/* new-format table: do not store any BLOB prefix locally */
 		local_len = BTR_EXTERN_FIELD_REF_SIZE;
 	}
 	ut_a(dtuple_check_typed_no_assert(entry));
 	size = rec_get_converted_size(index, entry, *n_ext);
 	if (UNIV_UNLIKELY(size > 1000000000)) {
 		fprintf(stderr,
 			"InnoDB: Warning: tuple size very big: %lu\n",
 			(ulong) size);
 		fputs("InnoDB: Tuple contents: ", stderr);
 		dtuple_print(stderr, entry);
 		putc('\n', stderr);
 	}
 	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
 			       * sizeof(big_rec_field_t) + 1000);
 	vector = mem_heap_alloc(heap, sizeof(big_rec_t));
 	vector->heap = heap;
 	vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
 					* sizeof(big_rec_field_t));
 	/* Decide which fields to shorten: the algorithm is to look for
 	a variable-length field that yields the biggest savings when
 	stored externally */
 	n_fields = 0;
 	while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
 							     *n_ext),
 				      dict_table_is_comp(index->table),
 				      dict_table_zip_size(index->table))) {
 		ulint			i;
 		ulint			longest		= 0;
 		ulint			longest_i	= ULINT_MAX;
 		byte*			data;
 		big_rec_field_t*	b;
 		for (i = dict_index_get_n_unique_in_tree(index);
 		     i < dtuple_get_n_fields(entry); i++) {
 			ulint	savings;
 			dfield = dtuple_get_nth_field(entry, i);
 			ifield = dict_index_get_nth_field(index, i);
 			/* Skip fixed-length, NULL, externally stored,
 			or short columns */
 			if (ifield->fixed_len
 			    || dfield_is_null(dfield)
 			    || dfield_is_ext(dfield)
 			    || dfield_get_len(dfield) <= local_len
 			    || dfield_get_len(dfield)
 			    <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
 				goto skip_field;
 			}
 			savings = dfield_get_len(dfield) - local_len;
 			/* Check that there would be savings */
 			if (longest >= savings) {
 				goto skip_field;
 			}
 			longest_i = i;
 			longest = savings;
 skip_field:
 			continue;
 		}
 		if (!longest) {
 			/* Cannot shorten more */
 			mem_heap_free(heap);
 			return(NULL);
 		}
 		/* Move data from field longest_i to big rec vector.
 		We store the first bytes locally to the record. Then
 		we can calculate all ordering fields in all indexes
 		from locally stored data. */
 		dfield = dtuple_get_nth_field(entry, longest_i);
 		ifield = dict_index_get_nth_field(index, longest_i);
 		local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
 		b = &vector->fields[n_fields];
 		b->field_no = longest_i;
 		b->len = dfield_get_len(dfield) - local_prefix_len;
 		b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
 		/* Allocate the locally stored part of the column. */
 		data = mem_heap_alloc(heap, local_len);
 		/* Copy the local prefix. */
 		memcpy(data, dfield_get_data(dfield), local_prefix_len);
 		/* Clear the extern field reference (BLOB pointer). */
 		memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
 #if 0
 		/* The following would fail the Valgrind checks in
 		page_cur_insert_rec_low() and page_cur_insert_rec_zip().
 		The BLOB pointers in the record will be initialized after
 		the record and the BLOBs have been written. */
 		UNIV_MEM_ALLOC(data + local_prefix_len,
 			       BTR_EXTERN_FIELD_REF_SIZE);
 #endif
 		dfield_set_data(dfield, data, local_len);
 		dfield_set_ext(dfield);
 		n_fields++;
 		(*n_ext)++;
 		ut_ad(n_fields < dtuple_get_n_fields(entry));
 	}
 	vector->n_fields = n_fields;
 	return(vector);
 }
 /******************************************************************
 Puts back to entry the data stored in vector. Note that to ensure the
 fields in entry can accommodate the data, vector must have been created
 from entry with dtuple_convert_big_rec. */
 UNIV_INTERN
 void
 dtuple_convert_back_big_rec(
 /*========================*/
 	dict_index_t*	index __attribute__((unused)),	/* in: index */
 	dtuple_t*	entry,	/* in: entry whose data was put to vector */
 	big_rec_t*	vector)	/* in, own: big rec vector; it is
 				freed in this function */
 {
 	big_rec_field_t*		b	= vector->fields;
 	const big_rec_field_t* const	end	= b + vector->n_fields;
 	for (; b < end; b++) {
 		dfield_t*	dfield;
 		ulint		local_len;
 		dfield = dtuple_get_nth_field(entry, b->field_no);
 		local_len = dfield_get_len(dfield);
 		ut_ad(dfield_is_ext(dfield));
 		ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 		local_len -= BTR_EXTERN_FIELD_REF_SIZE;
 		ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
 		dfield_set_data(dfield,
 				(char*) b->data - local_len,
 				b->len + local_len);
 	}
 	mem_heap_free(vector->heap);
 }
--- a/data/data0type.c
+++ b/data/data0type.c
@@ -0,0 +1,284 @@
 /******************************************************
 Data types
 (c) 1996 Innobase Oy
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
 #include "data0type.h"
 #ifdef UNIV_NONINL
 #include "data0type.ic"
 #endif
 /**********************************************************************
 This function is used to find the storage length in bytes of the first n
 characters for prefix indexes using a multibyte character set. The function
 finds charset information and returns length of prefix_len characters in the
 index field in bytes.
 NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
 this function, you MUST change also the prototype here! */
 UNIV_INTERN
 ulint
 innobase_get_at_most_n_mbchars(
 /*===========================*/
 				/* out: number of bytes occupied by the first
 				n characters */
 	ulint charset_id,	/* in: character set id */
 	ulint prefix_len,	/* in: prefix length in bytes of the index
 				(this has to be divided by mbmaxlen to get the
 				number of CHARACTERS n in the prefix) */
 	ulint data_len,		/* in: length of the string in bytes */
 	const char* str);	/* in: character string */
 /* At the database startup we store the default-charset collation number of
 this MySQL installation to this global variable. If we have < 4.1.2 format
 column definitions, or records in the insert buffer, we use this
 charset-collation code for them. */
 UNIV_INTERN ulint	data_mysql_default_charset_coll;
 /*************************************************************************
 Determine how many bytes the first n characters of the given string occupy.
 If the string is shorter than n characters, returns the number of bytes
 the characters in the string occupy. */
 UNIV_INTERN
 ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
 					/* out: length of the prefix,
 					in bytes */
 	ulint		prtype,		/* in: precise type */
 	ulint		mbminlen,	/* in: minimum length of a
 					multi-byte character */
 	ulint		mbmaxlen,	/* in: maximum length of a
 					multi-byte character */
 	ulint		prefix_len,	/* in: length of the requested
 					prefix, in characters, multiplied by
 					dtype_get_mbmaxlen(dtype) */
 	ulint		data_len,	/* in: length of str (in bytes) */
 	const char*	str)		/* in: the string whose prefix
 					length is being determined */
 {
 #ifndef UNIV_HOTBACKUP
 	ut_a(data_len != UNIV_SQL_NULL);
 	ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
 	if (mbminlen != mbmaxlen) {
 		ut_a(!(prefix_len % mbmaxlen));
 		return(innobase_get_at_most_n_mbchars(
 			dtype_get_charset_coll(prtype),
 			prefix_len, data_len, str));
 	}
 	if (prefix_len < data_len) {
 		return(prefix_len);
 	}
 	return(data_len);
 #else /* UNIV_HOTBACKUP */
 	/* This function depends on MySQL code that is not included in
 	InnoDB Hot Backup builds.  Besides, this function should never
 	be called in InnoDB Hot Backup. */
 	ut_error;
 #endif /* UNIV_HOTBACKUP */
 }
 /*************************************************************************
 Checks if a data main type is a string type. Also a BLOB is considered a
 string type. */
 UNIV_INTERN
 ibool
 dtype_is_string_type(
 /*=================*/
 			/* out: TRUE if string type */
 	ulint	mtype)	/* in: InnoDB main data type code: DATA_CHAR, ... */
 {
 	if (mtype <= DATA_BLOB
 	    || mtype == DATA_MYSQL
 	    || mtype == DATA_VARMYSQL) {
 		return(TRUE);
 	}
 	return(FALSE);
 }
 /*************************************************************************
 Checks if a type is a binary string type. Note that for tables created with
 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
 those DATA_BLOB columns this function currently returns FALSE. */
 UNIV_INTERN
 ibool
 dtype_is_binary_string_type(
 /*========================*/
 			/* out: TRUE if binary string type */
 	ulint	mtype,	/* in: main data type */
 	ulint	prtype)	/* in: precise type */
 {
 	if ((mtype == DATA_FIXBINARY)
 	    || (mtype == DATA_BINARY)
 	    || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
 		return(TRUE);
 	}
 	return(FALSE);
 }
 /*************************************************************************
 Checks if a type is a non-binary string type. That is, dtype_is_string_type is
 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
 For those DATA_BLOB columns this function currently returns TRUE. */
 UNIV_INTERN
 ibool
 dtype_is_non_binary_string_type(
 /*============================*/
 			/* out: TRUE if non-binary string type */
 	ulint	mtype,	/* in: main data type */
 	ulint	prtype)	/* in: precise type */
 {
 	if (dtype_is_string_type(mtype) == TRUE
 	    && dtype_is_binary_string_type(mtype, prtype) == FALSE) {
 		return(TRUE);
 	}
 	return(FALSE);
 }
 /*************************************************************************
 Forms a precise type from the < 4.1.2 format precise type plus the
 charset-collation code. */
 UNIV_INTERN
 ulint
 dtype_form_prtype(
 /*==============*/
 	ulint	old_prtype,	/* in: the MySQL type code and the flags
 				DATA_BINARY_TYPE etc. */
 	ulint	charset_coll)	/* in: MySQL charset-collation code */
 {
 	ut_a(old_prtype < 256 * 256);
 	ut_a(charset_coll < 256);
 	return(old_prtype + (charset_coll << 16));
 }
 /*************************************************************************
 Validates a data type structure. */
 UNIV_INTERN
 ibool
 dtype_validate(
 /*===========*/
 				/* out: TRUE if ok */
 	const dtype_t*	type)	/* in: type struct to validate */
 {
 	ut_a(type);
 	ut_a(type->mtype >= DATA_VARCHAR);
 	ut_a(type->mtype <= DATA_MYSQL);
 	if (type->mtype == DATA_SYS) {
 		ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
 	}
 	ut_a(type->mbminlen <= type->mbmaxlen);
 	return(TRUE);
 }
 /*************************************************************************
 Prints a data type structure. */
 UNIV_INTERN
 void
 dtype_print(
 /*========*/
 	const dtype_t*	type)	/* in: type */
 {
 	ulint	mtype;
 	ulint	prtype;
 	ulint	len;
 	ut_a(type);
 	mtype = type->mtype;
 	prtype = type->prtype;
 	switch (mtype) {
 	case DATA_VARCHAR:
 		fputs("DATA_VARCHAR", stderr);
 		break;
 	case DATA_CHAR:
 		fputs("DATA_CHAR", stderr);
 		break;
 	case DATA_BINARY:
 		fputs("DATA_BINARY", stderr);
 		break;
 	case DATA_FIXBINARY:
 		fputs("DATA_FIXBINARY", stderr);
 		break;
 	case DATA_BLOB:
 		fputs("DATA_BLOB", stderr);
 		break;
 	case DATA_INT:
 		fputs("DATA_INT", stderr);
 		break;
 	case DATA_MYSQL:
 		fputs("DATA_MYSQL", stderr);
 		break;
 	case DATA_SYS:
 		fputs("DATA_SYS", stderr);
 		break;
 	default:
 		fprintf(stderr, "type %lu", (ulong) mtype);
 		break;
 	}
 	len = type->len;
 	if ((type->mtype == DATA_SYS)
 	    || (type->mtype == DATA_VARCHAR)
 	    || (type->mtype == DATA_CHAR)) {
 		putc(' ', stderr);
 		if (prtype == DATA_ROW_ID) {
 			fputs("DATA_ROW_ID", stderr);
 			len = DATA_ROW_ID_LEN;
 		} else if (prtype == DATA_ROLL_PTR) {
 			fputs("DATA_ROLL_PTR", stderr);
 			len = DATA_ROLL_PTR_LEN;
 		} else if (prtype == DATA_TRX_ID) {
 			fputs("DATA_TRX_ID", stderr);
 			len = DATA_TRX_ID_LEN;
 		} else if (prtype == DATA_ENGLISH) {
 			fputs("DATA_ENGLISH", stderr);
 		} else {
 			fprintf(stderr, "prtype %lu", (ulong) prtype);
 		}
 	} else {
 		if (prtype & DATA_UNSIGNED) {
 			fputs(" DATA_UNSIGNED", stderr);
 		}
 		if (prtype & DATA_BINARY_TYPE) {
 			fputs(" DATA_BINARY_TYPE", stderr);
 		}
 		if (prtype & DATA_NOT_NULL) {
 			fputs(" DATA_NOT_NULL", stderr);
 		}
 	}
 	fprintf(stderr, " len %lu", (ulong) len);
 }
--- a/dict/dict0boot.c
+++ b/dict/dict0boot.c
@@ -0,0 +1,441 @@
 /******************************************************
 Data dictionary creation and booting
 (c) 1996 Innobase Oy
 Created 4/18/1996 Heikki Tuuri
 *******************************************************/
 #include "dict0boot.h"
 #ifdef UNIV_NONINL
 #include "dict0boot.ic"
 #endif
 #include "dict0crea.h"
 #include "btr0btr.h"
 #include "dict0load.h"
 #include "dict0load.h"
 #include "trx0trx.h"
 #include "srv0srv.h"
 #include "ibuf0ibuf.h"
 #include "buf0flu.h"
 #include "log0recv.h"
 #include "os0file.h"
 /**************************************************************************
 Gets a pointer to the dictionary header and x-latches its page. */
 UNIV_INTERN
 dict_hdr_t*
 dict_hdr_get(
 /*=========*/
 			/* out: pointer to the dictionary header,
 			page x-latched */
 	mtr_t*	mtr)	/* in: mtr */
 {
 	buf_block_t*	block;
 	dict_hdr_t*	header;
 	block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
 			     RW_X_LATCH, mtr);
 	header = DICT_HDR + buf_block_get_frame(block);
 #ifdef UNIV_SYNC_DEBUG
 	buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
 #endif /* UNIV_SYNC_DEBUG */
 	return(header);
 }
 /**************************************************************************
 Returns a new table, index, or tree id. */
 UNIV_INTERN
 dulint
 dict_hdr_get_new_id(
 /*================*/
 			/* out: the new id */
 	ulint	type)	/* in: DICT_HDR_ROW_ID, ... */
 {
 	dict_hdr_t*	dict_hdr;
 	dulint		id;
 	mtr_t		mtr;
 	ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID));
 	mtr_start(&mtr);
 	dict_hdr = dict_hdr_get(&mtr);
 	id = mtr_read_dulint(dict_hdr + type, &mtr);
 	id = ut_dulint_add(id, 1);
 	mlog_write_dulint(dict_hdr + type, id, &mtr);
 	mtr_commit(&mtr);
 	return(id);
 }
 /**************************************************************************
 Writes the current value of the row id counter to the dictionary header file
 page. */
 UNIV_INTERN
 void
 dict_hdr_flush_row_id(void)
 /*=======================*/
 {
 	dict_hdr_t*	dict_hdr;
 	dulint		id;
 	mtr_t		mtr;
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	id = dict_sys->row_id;
 	mtr_start(&mtr);
 	dict_hdr = dict_hdr_get(&mtr);
 	mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
 	mtr_commit(&mtr);
 }
 /*********************************************************************
 Creates the file page for the dictionary header. This function is
 called only at the database creation. */
 static
 ibool
 dict_hdr_create(
 /*============*/
 			/* out: TRUE if succeed */
 	mtr_t*	mtr)	/* in: mtr */
 {
 	buf_block_t*	block;
 	dict_hdr_t*	dict_header;
 	ulint		root_page_no;
 	ut_ad(mtr);
 	/* Create the dictionary header file block in a new, allocated file
 	segment in the system tablespace */
 	block = fseg_create(DICT_HDR_SPACE, 0,
 			    DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
 	ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
 	dict_header = dict_hdr_get(mtr);
 	/* Start counting row, table, index, and tree ids from
 	DICT_HDR_FIRST_ID */
 	mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
 			  ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
 	mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
 			  ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
 	mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
 			  ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
 	/* Obsolete, but we must initialize it to 0 anyway. */
 	mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
 			  ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
 	/* Create the B-tree roots for the clustered indexes of the basic
 	system tables */
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
 				  DICT_HDR_SPACE, 0, DICT_TABLES_ID,
 				  srv_sys->dummy_ind1, mtr);
 	if (root_page_no == FIL_NULL) {
 		return(FALSE);
 	}
 	mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
 				  DICT_TABLE_IDS_ID,
 				  srv_sys->dummy_ind1, mtr);
 	if (root_page_no == FIL_NULL) {
 		return(FALSE);
 	}
 	mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
 				  DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
 				  srv_sys->dummy_ind1, mtr);
 	if (root_page_no == FIL_NULL) {
 		return(FALSE);
 	}
 	mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
 				  DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
 				  srv_sys->dummy_ind1, mtr);
 	if (root_page_no == FIL_NULL) {
 		return(FALSE);
 	}
 	mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
 				  DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
 				  srv_sys->dummy_ind1, mtr);
 	if (root_page_no == FIL_NULL) {
 		return(FALSE);
 	}
 	mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	return(TRUE);
 }
 /*********************************************************************
 Initializes the data dictionary memory structures when the database is
 started. This function is also called when the data dictionary is created. */
 UNIV_INTERN
 void
 dict_boot(void)
 /*===========*/
 {
 	dict_table_t*	table;
 	dict_index_t*	index;
 	dict_hdr_t*	dict_hdr;
 	mem_heap_t*	heap;
 	mtr_t		mtr;
 	ulint		error;
 	mtr_start(&mtr);
 	/* Create the hash tables etc. */
 	dict_init();
 	heap = mem_heap_create(450);
 	mutex_enter(&(dict_sys->mutex));
 	/* Get the dictionary header */
 	dict_hdr = dict_hdr_get(&mtr);
 	/* Because we only write new row ids to disk-based data structure
 	(dictionary header) when it is divisible by
 	DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
 	the latest value of the row id counter. Therefore we advance
 	the counter at the database startup to avoid overlapping values.
 	Note that when a user after database startup first time asks for
 	a new row id, then because the counter is now divisible by
 	..._MARGIN, it will immediately be updated to the disk-based
 	header. */
 	dict_sys->row_id = ut_dulint_add(
 		ut_dulint_align_up(mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
 						   &mtr),
 				   DICT_HDR_ROW_ID_WRITE_MARGIN),
 		DICT_HDR_ROW_ID_WRITE_MARGIN);
 	/* Insert into the dictionary cache the descriptions of the basic
 	system tables */
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
 	/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
 	dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
 	/* TYPE is either DICT_TABLE_ORDINARY, or (TYPE & DICT_TF_COMPACT)
 	and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
 	table->id = DICT_TABLES_ID;
 	dict_table_add_to_cache(table, heap);
 	dict_sys->sys_tables = table;
 	mem_heap_empty(heap);
 	index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
 				      DICT_HDR_SPACE,
 				      DICT_UNIQUE | DICT_CLUSTERED, 1);
 	dict_mem_index_add_field(index, "NAME", 0);
 	index->id = DICT_TABLES_ID;
 	error = dict_index_add_to_cache(table, index,
 					mtr_read_ulint(dict_hdr
 						       + DICT_HDR_TABLES,
 						       MLOG_4BYTES, &mtr));
 	ut_a(error == DB_SUCCESS);
 	/*-------------------------*/
 	index = dict_mem_index_create("SYS_TABLES", "ID_IND",
 				      DICT_HDR_SPACE, DICT_UNIQUE, 1);
 	dict_mem_index_add_field(index, "ID", 0);
 	index->id = DICT_TABLE_IDS_ID;
 	error = dict_index_add_to_cache(table, index,
 					mtr_read_ulint(dict_hdr
 						       + DICT_HDR_TABLE_IDS,
 						       MLOG_4BYTES, &mtr));
 	ut_a(error == DB_SUCCESS);
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
 	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
 	table->id = DICT_COLUMNS_ID;
 	dict_table_add_to_cache(table, heap);
 	dict_sys->sys_columns = table;
 	mem_heap_empty(heap);
 	index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
 				      DICT_HDR_SPACE,
 				      DICT_UNIQUE | DICT_CLUSTERED, 2);
 	dict_mem_index_add_field(index, "TABLE_ID", 0);
 	dict_mem_index_add_field(index, "POS", 0);
 	index->id = DICT_COLUMNS_ID;
 	error = dict_index_add_to_cache(table, index,
 					mtr_read_ulint(dict_hdr
 						       + DICT_HDR_COLUMNS,
 						       MLOG_4BYTES, &mtr));
 	ut_a(error == DB_SUCCESS);
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
 	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
 	/* The '+ 2' below comes from the 2 system fields */
 #if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
 #error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
 #endif
 #if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
 #error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
 #endif
 #if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
 #error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
 #endif
 	table->id = DICT_INDEXES_ID;
 	dict_table_add_to_cache(table, heap);
 	dict_sys->sys_indexes = table;
 	mem_heap_empty(heap);
 	index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
 				      DICT_HDR_SPACE,
 				      DICT_UNIQUE | DICT_CLUSTERED, 2);
 	dict_mem_index_add_field(index, "TABLE_ID", 0);
 	dict_mem_index_add_field(index, "ID", 0);
 	index->id = DICT_INDEXES_ID;
 	error = dict_index_add_to_cache(table, index,
 					mtr_read_ulint(dict_hdr
 						       + DICT_HDR_INDEXES,
 						       MLOG_4BYTES, &mtr));
 	ut_a(error == DB_SUCCESS);
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
 	dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
 	table->id = DICT_FIELDS_ID;
 	dict_table_add_to_cache(table, heap);
 	dict_sys->sys_fields = table;
 	mem_heap_free(heap);
 	index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
 				      DICT_HDR_SPACE,
 				      DICT_UNIQUE | DICT_CLUSTERED, 2);
 	dict_mem_index_add_field(index, "INDEX_ID", 0);
 	dict_mem_index_add_field(index, "POS", 0);
 	index->id = DICT_FIELDS_ID;
 	error = dict_index_add_to_cache(table, index,
 					mtr_read_ulint(dict_hdr
 						       + DICT_HDR_FIELDS,
 						       MLOG_4BYTES, &mtr));
 	ut_a(error == DB_SUCCESS);
 	mtr_commit(&mtr);
 	/*-------------------------*/
 	/* Initialize the insert buffer table and index for each tablespace */
 	ibuf_init_at_db_start();
 	/* Load definitions of other indexes on system tables */
 	dict_load_sys_table(dict_sys->sys_tables);
 	dict_load_sys_table(dict_sys->sys_columns);
 	dict_load_sys_table(dict_sys->sys_indexes);
 	dict_load_sys_table(dict_sys->sys_fields);
 	mutex_exit(&(dict_sys->mutex));
 }
 /*********************************************************************
 Inserts the basic system table data into themselves in the database
 creation. */
 static
 void
 dict_insert_initial_data(void)
 /*==========================*/
 {
 	/* Does nothing yet */
 }
 /*********************************************************************
 Creates and initializes the data dictionary at the database creation. */
 UNIV_INTERN
 void
 dict_create(void)
 /*=============*/
 {
 	mtr_t	mtr;
 	mtr_start(&mtr);
 	dict_hdr_create(&mtr);
 	mtr_commit(&mtr);
 	dict_boot();
 	dict_insert_initial_data();
 }
--- a/dict/dict0crea.c
+++ b/dict/dict0crea.c
--- a/dict/dict0dict.c
+++ b/dict/dict0dict.c
--- a/dict/dict0load.c
+++ b/dict/dict0load.c
--- a/dict/dict0mem.c
+++ b/dict/dict0mem.c
@@ -0,0 +1,291 @@
 /**********************************************************************
 Data dictionary memory object creation
 (c) 1996 Innobase Oy
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
 #include "dict0mem.h"
 #ifdef UNIV_NONINL
 #include "dict0mem.ic"
 #endif
 #include "rem0rec.h"
 #include "data0type.h"
 #include "mach0data.h"
 #include "dict0dict.h"
 #include "que0que.h"
 #include "pars0pars.h"
 #include "lock0lock.h"
 #define	DICT_HEAP_SIZE		100	/* initial memory heap size when
 					creating a table or index object */
 /**************************************************************************
 Creates a table memory object. */
 UNIV_INTERN
 dict_table_t*
 dict_mem_table_create(
 /*==================*/
 				/* out, own: table object */
 	const char*	name,	/* in: table name */
 	ulint		space,	/* in: space where the clustered index of
 				the table is placed; this parameter is
 				ignored if the table is made a member of
 				a cluster */
 	ulint		n_cols,	/* in: number of columns */
 	ulint		flags)	/* in: table flags */
 {
 	dict_table_t*	table;
 	mem_heap_t*	heap;
 	ut_ad(name);
 	ut_a(!(flags & (~0 << DICT_TF_BITS)));
 	heap = mem_heap_create(DICT_HEAP_SIZE);
 	table = mem_heap_zalloc(heap, sizeof(dict_table_t));
 	table->heap = heap;
 	table->flags = (unsigned int) flags;
 	table->name = mem_heap_strdup(heap, name);
 	table->space = (unsigned int) space;
 	table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
 	table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
 				     * sizeof(dict_col_t));
 	table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
 	mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
 	/* The actual increment value will be set by MySQL, we simply
 	default to 1 here.*/
 	table->autoinc_increment = 1;
 #ifdef UNIV_DEBUG
 	table->magic_n = DICT_TABLE_MAGIC_N;
 #endif /* UNIV_DEBUG */
 	return(table);
 }
 /********************************************************************
 Free a table memory object. */
 UNIV_INTERN
 void
 dict_mem_table_free(
 /*================*/
 	dict_table_t*	table)		/* in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	mutex_free(&(table->autoinc_mutex));
 	mem_heap_free(table->heap);
 }
 /********************************************************************
 Append 'name' to 'col_names' (@see dict_table_t::col_names). */
 static
 const char*
 dict_add_col_name(
 /*==============*/
 					/* out: new column names array */
 	const char*	col_names,	/* in: existing column names, or
 					NULL */
 	ulint		cols,		/* in: number of existing columns */
 	const char*	name,		/* in: new column name */
 	mem_heap_t*	heap)		/* in: heap */
 {
 	ulint	old_len;
 	ulint	new_len;
 	ulint	total_len;
 	char*	res;
 	ut_ad(!cols == !col_names);
 	/* Find out length of existing array. */
 	if (col_names) {
 		const char*	s = col_names;
 		ulint		i;
 		for (i = 0; i < cols; i++) {
 			s += strlen(s) + 1;
 		}
 		old_len = s - col_names;
 	} else {
 		old_len = 0;
 	}
 	new_len = strlen(name) + 1;
 	total_len = old_len + new_len;
 	res = mem_heap_alloc(heap, total_len);
 	if (old_len > 0) {
 		memcpy(res, col_names, old_len);
 	}
 	memcpy(res + old_len, name, new_len);
 	return(res);
 }
 /**************************************************************************
 Adds a column definition to a table. */
 UNIV_INTERN
 void
 dict_mem_table_add_col(
 /*===================*/
 	dict_table_t*	table,	/* in: table */
 	mem_heap_t*	heap,	/* in: temporary memory heap, or NULL */
 	const char*	name,	/* in: column name, or NULL */
 	ulint		mtype,	/* in: main datatype */
 	ulint		prtype,	/* in: precise type */
 	ulint		len)	/* in: precision */
 {
 	dict_col_t*	col;
 	ulint		mbminlen;
 	ulint		mbmaxlen;
 	ulint		i;
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_ad(!heap == !name);
 	i = table->n_def++;
 	if (name) {
 		if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
 			heap = table->heap;
 		}
 		if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) {
 			/* All preceding column names are empty. */
 			char* s = mem_heap_zalloc(heap, table->n_def);
 			table->col_names = s;
 		}
 		table->col_names = dict_add_col_name(table->col_names,
 						     i, name, heap);
 	}
 	col = dict_table_get_nth_col(table, i);
 	col->ind = (unsigned int) i;
 	col->ord_part = 0;
 	col->mtype = (unsigned int) mtype;
 	col->prtype = (unsigned int) prtype;
 	col->len = (unsigned int) len;
 	dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
 	col->mbminlen = (unsigned int) mbminlen;
 	col->mbmaxlen = (unsigned int) mbmaxlen;
 }
 /**************************************************************************
 Creates an index memory object. */
 UNIV_INTERN
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
 					/* out, own: index object */
 	const char*	table_name,	/* in: table name */
 	const char*	index_name,	/* in: index name */
 	ulint		space,		/* in: space where the index tree is
 					placed, ignored if the index is of
 					the clustered type */
 	ulint		type,		/* in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields)	/* in: number of fields */
 {
 	dict_index_t*	index;
 	mem_heap_t*	heap;
 	ut_ad(table_name && index_name);
 	heap = mem_heap_create(DICT_HEAP_SIZE);
 	index = mem_heap_zalloc(heap, sizeof(dict_index_t));
 	index->heap = heap;
 	index->type = type;
 	index->space = (unsigned int) space;
 	index->name = mem_heap_strdup(heap, index_name);
 	index->table_name = table_name;
 	index->n_fields = (unsigned int) n_fields;
 	index->fields = mem_heap_alloc(heap, 1 + n_fields
 				       * sizeof(dict_field_t));
 	/* The '1 +' above prevents allocation
 	of an empty mem block */
 #ifdef UNIV_DEBUG
 	index->magic_n = DICT_INDEX_MAGIC_N;
 #endif /* UNIV_DEBUG */
 	return(index);
 }
 /**************************************************************************
 Creates and initializes a foreign constraint memory object. */
 UNIV_INTERN
 dict_foreign_t*
 dict_mem_foreign_create(void)
 /*=========================*/
 				/* out, own: foreign constraint struct */
 {
 	dict_foreign_t*	foreign;
 	mem_heap_t*	heap;
 	heap = mem_heap_create(100);
 	foreign = mem_heap_zalloc(heap, sizeof(dict_foreign_t));
 	foreign->heap = heap;
 	return(foreign);
 }
 /**************************************************************************
 Adds a field definition to an index. NOTE: does not take a copy
 of the column name if the field is a column. The memory occupied
 by the column name may be released only after publishing the index. */
 UNIV_INTERN
 void
 dict_mem_index_add_field(
 /*=====================*/
 	dict_index_t*	index,		/* in: index */
 	const char*	name,		/* in: column name */
 	ulint		prefix_len)	/* in: 0 or the column prefix length
 					in a MySQL index like
 					INDEX (textcol(25)) */
 {
 	dict_field_t*	field;
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	index->n_def++;
 	field = dict_index_get_nth_field(index, index->n_def - 1);
 	field->name = name;
 	field->prefix_len = (unsigned int) prefix_len;
 }
 /**************************************************************************
 Frees an index memory object. */
 UNIV_INTERN
 void
 dict_mem_index_free(
 /*================*/
 	dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	mem_heap_free(index->heap);
 }
--- a/dyn/dyn0dyn.c
+++ b/dyn/dyn0dyn.c
@@ -0,0 +1,48 @@
 /******************************************************
 The dynamically allocated array
 (c) 1996 Innobase Oy
 Created 2/5/1996 Heikki Tuuri
 *******************************************************/
 #include "dyn0dyn.h"
 #ifdef UNIV_NONINL
 #include "dyn0dyn.ic"
 #endif
 /****************************************************************
 Adds a new block to a dyn array. */
 UNIV_INTERN
 dyn_block_t*
 dyn_array_add_block(
 /*================*/
 				/* out: created block */
 	dyn_array_t*	arr)	/* in: dyn array */
 {
 	mem_heap_t*	heap;
 	dyn_block_t*	block;
 	ut_ad(arr);
 	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
 	if (arr->heap == NULL) {
 		UT_LIST_INIT(arr->base);
 		UT_LIST_ADD_FIRST(list, arr->base, arr);
 		arr->heap = mem_heap_create(sizeof(dyn_block_t));
 	}
 	block = dyn_array_get_last_block(arr);
 	block->used = block->used | DYN_BLOCK_FULL_FLAG;
 	heap = arr->heap;
 	block = mem_heap_alloc(heap, sizeof(dyn_block_t));
 	block->used = 0;
 	UT_LIST_ADD_LAST(list, arr->base, block);
 	return(block);
 }
--- a/eval/eval0eval.c
+++ b/eval/eval0eval.c
@@ -0,0 +1,835 @@
 /******************************************************
 SQL evaluator: evaluates simple data structures, like expressions, in
 a query graph
 (c) 1997 Innobase Oy
 Created 12/29/1997 Heikki Tuuri
 *******************************************************/
 #include "eval0eval.h"
 #ifdef UNIV_NONINL
 #include "eval0eval.ic"
 #endif
 #include "data0data.h"
 #include "row0sel.h"
 /* The RND function seed */
 static ulint	eval_rnd	= 128367121;
 /* Dummy adress used when we should allocate a buffer of size 0 in
 the function below */
 static byte	eval_dummy;
 /*********************************************************************
 Allocate a buffer from global dynamic memory for a value of a que_node.
 NOTE that this memory must be explicitly freed when the query graph is
 freed. If the node already has an allocated buffer, that buffer is freed
 here. NOTE that this is the only function where dynamic memory should be
 allocated for a query node val field. */
 UNIV_INTERN
 byte*
 eval_node_alloc_val_buf(
 /*====================*/
 				/* out: pointer to allocated buffer */
 	que_node_t*	node,	/* in: query graph node; sets the val field
 				data field to point to the new buffer, and
 				len field equal to size */
 	ulint		size)	/* in: buffer size */
 {
 	dfield_t*	dfield;
 	byte*		data;
 	ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
 	      || que_node_get_type(node) == QUE_NODE_FUNC);
 	dfield = que_node_get_val(node);
 	data = dfield_get_data(dfield);
 	if (data && data != &eval_dummy) {
 		mem_free(data);
 	}
 	if (size == 0) {
 		data = &eval_dummy;
 	} else {
 		data = mem_alloc(size);
 	}
 	que_node_set_val_buf_size(node, size);
 	dfield_set_data(dfield, data, size);
 	return(data);
 }
 /*********************************************************************
 Free the buffer from global dynamic memory for a value of a que_node,
 if it has been allocated in the above function. The freeing for pushed
 column values is done in sel_col_prefetch_buf_free. */
 UNIV_INTERN
 void
 eval_node_free_val_buf(
 /*===================*/
 	que_node_t*	node)	/* in: query graph node */
 {
 	dfield_t*	dfield;
 	byte*		data;
 	ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
 	      || que_node_get_type(node) == QUE_NODE_FUNC);
 	dfield = que_node_get_val(node);
 	data = dfield_get_data(dfield);
 	if (que_node_get_val_buf_size(node) > 0) {
 		ut_a(data);
 		mem_free(data);
 	}
 }
 /*********************************************************************
 Evaluates a comparison node. */
 UNIV_INTERN
 ibool
 eval_cmp(
 /*=====*/
 					/* out: the result of the comparison */
 	func_node_t*	cmp_node)	/* in: comparison node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	int		res;
 	ibool		val;
 	int		func;
 	ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
 	arg1 = cmp_node->args;
 	arg2 = que_node_get_next(arg1);
 	res = cmp_dfield_dfield(que_node_get_val(arg1),
 				que_node_get_val(arg2));
 	val = TRUE;
 	func = cmp_node->func;
 	if (func == '=') {
 		if (res != 0) {
 			val = FALSE;
 		}
 	} else if (func == '<') {
 		if (res != -1) {
 			val = FALSE;
 		}
 	} else if (func == PARS_LE_TOKEN) {
 		if (res == 1) {
 			val = FALSE;
 		}
 	} else if (func == PARS_NE_TOKEN) {
 		if (res == 0) {
 			val = FALSE;
 		}
 	} else if (func == PARS_GE_TOKEN) {
 		if (res == -1) {
 			val = FALSE;
 		}
 	} else {
 		ut_ad(func == '>');
 		if (res != 1) {
 			val = FALSE;
 		}
 	}
 	eval_node_set_ibool_val(cmp_node, val);
 	return(val);
 }
 /*********************************************************************
 Evaluates a logical operation node. */
 UNIV_INLINE
 void
 eval_logical(
 /*=========*/
 	func_node_t*	logical_node)	/* in: logical operation node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	ibool		val1;
 	ibool		val2 = 0; /* remove warning */
 	ibool		val = 0;  /* remove warning */
 	int		func;
 	ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
 	arg1 = logical_node->args;
 	arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
 	val1 = eval_node_get_ibool_val(arg1);
 	if (arg2) {
 		val2 = eval_node_get_ibool_val(arg2);
 	}
 	func = logical_node->func;
 	if (func == PARS_AND_TOKEN) {
 		val = val1 & val2;
 	} else if (func == PARS_OR_TOKEN) {
 		val = val1 | val2;
 	} else if (func == PARS_NOT_TOKEN) {
 		val = TRUE - val1;
 	} else {
 		ut_error;
 	}
 	eval_node_set_ibool_val(logical_node, val);
 }
 /*********************************************************************
 Evaluates an arithmetic operation node. */
 UNIV_INLINE
 void
 eval_arith(
 /*=======*/
 	func_node_t*	arith_node)	/* in: arithmetic operation node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	lint		val1;
 	lint		val2 = 0; /* remove warning */
 	lint		val;
 	int		func;
 	ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
 	arg1 = arith_node->args;
 	arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
 	val1 = eval_node_get_int_val(arg1);
 	if (arg2) {
 		val2 = eval_node_get_int_val(arg2);
 	}
 	func = arith_node->func;
 	if (func == '+') {
 		val = val1 + val2;
 	} else if ((func == '-') && arg2) {
 		val = val1 - val2;
 	} else if (func == '-') {
 		val = -val1;
 	} else if (func == '*') {
 		val = val1 * val2;
 	} else {
 		ut_ad(func == '/');
 		val = val1 / val2;
 	}
 	eval_node_set_int_val(arith_node, val);
 }
 /*********************************************************************
 Evaluates an aggregate operation node. */
 UNIV_INLINE
 void
 eval_aggregate(
 /*===========*/
 	func_node_t*	node)	/* in: aggregate operation node */
 {
 	que_node_t*	arg;
 	lint		val;
 	lint		arg_val;
 	int		func;
 	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
 	val = eval_node_get_int_val(node);
 	func = node->func;
 	if (func == PARS_COUNT_TOKEN) {
 		val = val + 1;
 	} else {
 		ut_ad(func == PARS_SUM_TOKEN);
 		arg = node->args;
 		arg_val = eval_node_get_int_val(arg);
 		val = val + arg_val;
 	}
 	eval_node_set_int_val(node, val);
 }
 /*********************************************************************
 Evaluates a predefined function node where the function is not relevant
 in benchmarks. */
 static
 void
 eval_predefined_2(
 /*==============*/
 	func_node_t*	func_node)	/* in: predefined function node */
 {
 	que_node_t*	arg;
 	que_node_t*	arg1;
 	que_node_t*	arg2 = 0; /* remove warning (??? bug ???) */
 	lint		int_val;
 	byte*		data;
 	ulint		len1;
 	ulint		len2;
 	int		func;
 	ulint		i;
 	ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
 	arg1 = func_node->args;
 	if (arg1) {
 		arg2 = que_node_get_next(arg1);
 	}
 	func = func_node->func;
 	if (func == PARS_PRINTF_TOKEN) {
 		arg = arg1;
 		while (arg) {
 			dfield_print(que_node_get_val(arg));
 			arg = que_node_get_next(arg);
 		}
 		putc('\n', stderr);
 	} else if (func == PARS_ASSERT_TOKEN) {
 		if (!eval_node_get_ibool_val(arg1)) {
 			fputs("SQL assertion fails in a stored procedure!\n",
 			      stderr);
 		}
 		ut_a(eval_node_get_ibool_val(arg1));
 		/* This function, or more precisely, a debug procedure,
 		returns no value */
 	} else if (func == PARS_RND_TOKEN) {
 		len1 = (ulint)eval_node_get_int_val(arg1);
 		len2 = (ulint)eval_node_get_int_val(arg2);
 		ut_ad(len2 >= len1);
 		if (len2 > len1) {
 			int_val = (lint) (len1
 					  + (eval_rnd % (len2 - len1 + 1)));
 		} else {
 			int_val = (lint) len1;
 		}
 		eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
 		eval_node_set_int_val(func_node, int_val);
 	} else if (func == PARS_RND_STR_TOKEN) {
 		len1 = (ulint)eval_node_get_int_val(arg1);
 		data = eval_node_ensure_val_buf(func_node, len1);
 		for (i = 0; i < len1; i++) {
 			data[i] = (byte)(97 + (eval_rnd % 3));
 			eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
 		}
 	} else {
 		ut_error;
 	}
 }
 /*********************************************************************
 Evaluates a notfound-function node. */
 UNIV_INLINE
 void
 eval_notfound(
 /*==========*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	sym_node_t*	cursor;
 	sel_node_t*	sel_node;
 	ibool		ibool_val;
 	arg1 = func_node->args;
 	arg2 = que_node_get_next(arg1);
 	ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
 	cursor = arg1;
 	ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
 	if (cursor->token_type == SYM_LIT) {
 		ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
 				"SQL", 3) == 0);
 		sel_node = cursor->sym_table->query_graph->last_sel_node;
 	} else {
 		sel_node = cursor->alias->cursor_def;
 	}
 	if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
 		ibool_val = TRUE;
 	} else {
 		ibool_val = FALSE;
 	}
 	eval_node_set_ibool_val(func_node, ibool_val);
 }
 /*********************************************************************
 Evaluates a substr-function node. */
 UNIV_INLINE
 void
 eval_substr(
 /*========*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	que_node_t*	arg3;
 	dfield_t*	dfield;
 	byte*		str1;
 	ulint		len1;
 	ulint		len2;
 	arg1 = func_node->args;
 	arg2 = que_node_get_next(arg1);
 	ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
 	arg3 = que_node_get_next(arg2);
 	str1 = dfield_get_data(que_node_get_val(arg1));
 	len1 = (ulint)eval_node_get_int_val(arg2);
 	len2 = (ulint)eval_node_get_int_val(arg3);
 	dfield = que_node_get_val(func_node);
 	dfield_set_data(dfield, str1 + len1, len2);
 }
 /*********************************************************************
 Evaluates a replstr-procedure node. */
 static
 void
 eval_replstr(
 /*=========*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	que_node_t*	arg3;
 	que_node_t*	arg4;
 	byte*		str1;
 	byte*		str2;
 	ulint		len1;
 	ulint		len2;
 	arg1 = func_node->args;
 	arg2 = que_node_get_next(arg1);
 	ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
 	arg3 = que_node_get_next(arg2);
 	arg4 = que_node_get_next(arg3);
 	str1 = dfield_get_data(que_node_get_val(arg1));
 	str2 = dfield_get_data(que_node_get_val(arg2));
 	len1 = (ulint)eval_node_get_int_val(arg3);
 	len2 = (ulint)eval_node_get_int_val(arg4);
 	if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
 	    || (dfield_get_len(que_node_get_val(arg2)) < len2)) {
 		ut_error;
 	}
 	ut_memcpy(str1 + len1, str2, len2);
 }
 /*********************************************************************
 Evaluates an instr-function node. */
 static
 void
 eval_instr(
 /*=======*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	dfield_t*	dfield1;
 	dfield_t*	dfield2;
 	lint		int_val;
 	byte*		str1;
 	byte*		str2;
 	byte		match_char;
 	ulint		len1;
 	ulint		len2;
 	ulint		i;
 	ulint		j;
 	arg1 = func_node->args;
 	arg2 = que_node_get_next(arg1);
 	dfield1 = que_node_get_val(arg1);
 	dfield2 = que_node_get_val(arg2);
 	str1 = dfield_get_data(dfield1);
 	str2 = dfield_get_data(dfield2);
 	len1 = dfield_get_len(dfield1);
 	len2 = dfield_get_len(dfield2);
 	if (len2 == 0) {
 		ut_error;
 	}
 	match_char = str2[0];
 	for (i = 0; i < len1; i++) {
 		/* In this outer loop, the number of matched characters is 0 */
 		if (str1[i] == match_char) {
 			if (i + len2 > len1) {
 				break;
 			}
 			for (j = 1;; j++) {
 				/* We have already matched j characters */
 				if (j == len2) {
 					int_val = i + 1;
 					goto match_found;
 				}
 				if (str1[i + j] != str2[j]) {
 					break;
 				}
 			}
 		}
 	}
 	int_val = 0;
 match_found:
 	eval_node_set_int_val(func_node, int_val);
 }
 /*********************************************************************
 Evaluates a predefined function node. */
 UNIV_INLINE
 void
 eval_binary_to_number(
 /*==================*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg1;
 	dfield_t*	dfield;
 	byte*		str1;
 	byte*		str2;
 	ulint		len1;
 	ulint		int_val;
 	arg1 = func_node->args;
 	dfield = que_node_get_val(arg1);
 	str1 = dfield_get_data(dfield);
 	len1 = dfield_get_len(dfield);
 	if (len1 > 4) {
 		ut_error;
 	}
 	if (len1 == 4) {
 		str2 = str1;
 	} else {
 		int_val = 0;
 		str2 = (byte*)&int_val;
 		ut_memcpy(str2 + (4 - len1), str1, len1);
 	}
 	eval_node_copy_and_alloc_val(func_node, str2, 4);
 }
 /*********************************************************************
 Evaluates a predefined function node. */
 static
 void
 eval_concat(
 /*========*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg;
 	dfield_t*	dfield;
 	byte*		data;
 	ulint		len;
 	ulint		len1;
 	arg = func_node->args;
 	len = 0;
 	while (arg) {
 		len1 = dfield_get_len(que_node_get_val(arg));
 		len += len1;
 		arg = que_node_get_next(arg);
 	}
 	data = eval_node_ensure_val_buf(func_node, len);
 	arg = func_node->args;
 	len = 0;
 	while (arg) {
 		dfield = que_node_get_val(arg);
 		len1 = dfield_get_len(dfield);
 		ut_memcpy(data + len, dfield_get_data(dfield), len1);
 		len += len1;
 		arg = que_node_get_next(arg);
 	}
 }
 /*********************************************************************
 Evaluates a predefined function node. If the first argument is an integer,
 this function looks at the second argument which is the integer length in
 bytes, and converts the integer to a VARCHAR.
 If the first argument is of some other type, this function converts it to
 BINARY. */
 UNIV_INLINE
 void
 eval_to_binary(
 /*===========*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	dfield_t*	dfield;
 	byte*		str1;
 	ulint		len;
 	ulint		len1;
 	arg1 = func_node->args;
 	str1 = dfield_get_data(que_node_get_val(arg1));
 	if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
 		len = dfield_get_len(que_node_get_val(arg1));
 		dfield = que_node_get_val(func_node);
 		dfield_set_data(dfield, str1, len);
 		return;
 	}
 	arg2 = que_node_get_next(arg1);
 	len1 = (ulint)eval_node_get_int_val(arg2);
 	if (len1 > 4) {
 		ut_error;
 	}
 	dfield = que_node_get_val(func_node);
 	dfield_set_data(dfield, str1 + (4 - len1), len1);
 }
 /*********************************************************************
 Evaluates a predefined function node. */
 UNIV_INLINE
 void
 eval_predefined(
 /*============*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg1;
 	lint		int_val;
 	byte*		data;
 	int		func;
 	func = func_node->func;
 	arg1 = func_node->args;
 	if (func == PARS_LENGTH_TOKEN) {
 		int_val = (lint)dfield_get_len(que_node_get_val(arg1));
 	} else if (func == PARS_TO_CHAR_TOKEN) {
 		/* Convert number to character string as a
 		signed decimal integer. */
 		ulint	uint_val;
 		int	int_len;
 		int_val = eval_node_get_int_val(arg1);
 		/* Determine the length of the string. */
 		if (int_val == 0) {
 			int_len = 1; /* the number 0 occupies 1 byte */
 		} else {
 			int_len = 0;
 			if (int_val < 0) {
 				uint_val = ((ulint) -int_val - 1) + 1;
 				int_len++; /* reserve space for minus sign */
 			} else {
 				uint_val = (ulint) int_val;
 			}
 			for (; uint_val > 0; int_len++) {
 				uint_val /= 10;
 			}
 		}
 		/* allocate the string */
 		data = eval_node_ensure_val_buf(func_node, int_len + 1);
 		/* add terminating NUL character */
 		data[int_len] = 0;
 		/* convert the number */
 		if (int_val == 0) {
 			data[0] = '0';
 		} else {
 			int tmp;
 			if (int_val < 0) {
 				data[0] = '-'; /* preceding minus sign */
 				uint_val = ((ulint) -int_val - 1) + 1;
 			} else {
 				uint_val = (ulint) int_val;
 			}
 			for (tmp = int_len; uint_val > 0; uint_val /= 10) {
 				data[--tmp] = (byte)
 					('0' + (byte)(uint_val % 10));
 			}
 		}
 		dfield_set_len(que_node_get_val(func_node), int_len);
 		return;
 	} else if (func == PARS_TO_NUMBER_TOKEN) {
 		int_val = atoi((char*)
 			       dfield_get_data(que_node_get_val(arg1)));
 	} else if (func == PARS_SYSDATE_TOKEN) {
 		int_val = (lint)ut_time();
 	} else {
 		eval_predefined_2(func_node);
 		return;
 	}
 	eval_node_set_int_val(func_node, int_val);
 }
 /*********************************************************************
 Evaluates a function node. */
 UNIV_INTERN
 void
 eval_func(
 /*======*/
 	func_node_t*	func_node)	/* in: function node */
 {
 	que_node_t*	arg;
 	ulint		class;
 	ulint		func;
 	ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
 	class = func_node->class;
 	func = func_node->func;
 	arg = func_node->args;
 	/* Evaluate first the argument list */
 	while (arg) {
 		eval_exp(arg);
 		/* The functions are not defined for SQL null argument
 		values, except for eval_cmp and notfound */
 		if (dfield_is_null(que_node_get_val(arg))
 		    && (class != PARS_FUNC_CMP)
 		    && (func != PARS_NOTFOUND_TOKEN)
 		    && (func != PARS_PRINTF_TOKEN)) {
 			ut_error;
 		}
 		arg = que_node_get_next(arg);
 	}
 	if (class == PARS_FUNC_CMP) {
 		eval_cmp(func_node);
 	} else if (class == PARS_FUNC_ARITH) {
 		eval_arith(func_node);
 	} else if (class == PARS_FUNC_AGGREGATE) {
 		eval_aggregate(func_node);
 	} else if (class == PARS_FUNC_PREDEFINED) {
 		if (func == PARS_NOTFOUND_TOKEN) {
 			eval_notfound(func_node);
 		} else if (func == PARS_SUBSTR_TOKEN) {
 			eval_substr(func_node);
 		} else if (func == PARS_REPLSTR_TOKEN) {
 			eval_replstr(func_node);
 		} else if (func == PARS_INSTR_TOKEN) {
 			eval_instr(func_node);
 		} else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
 			eval_binary_to_number(func_node);
 		} else if (func == PARS_CONCAT_TOKEN) {
 			eval_concat(func_node);
 		} else if (func == PARS_TO_BINARY_TOKEN) {
 			eval_to_binary(func_node);
 		} else {
 			eval_predefined(func_node);
 		}
 	} else {
 		ut_ad(class == PARS_FUNC_LOGICAL);
 		eval_logical(func_node);
 	}
 }
--- a/eval/eval0proc.c
+++ b/eval/eval0proc.c
@@ -0,0 +1,278 @@
 /******************************************************
 Executes SQL stored procedures and their control structures
 (c) 1998 Innobase Oy
 Created 1/20/1998 Heikki Tuuri
 *******************************************************/
 #include "eval0proc.h"
 #ifdef UNIV_NONINL
 #include "eval0proc.ic"
 #endif
 /**************************************************************************
 Performs an execution step of an if-statement node. */
 UNIV_INTERN
 que_thr_t*
 if_step(
 /*====*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	if_node_t*	node;
 	elsif_node_t*	elsif_node;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_IF);
 	if (thr->prev_node == que_node_get_parent(node)) {
 		/* Evaluate the condition */
 		eval_exp(node->cond);
 		if (eval_node_get_ibool_val(node->cond)) {
 			/* The condition evaluated to TRUE: start execution
 			from the first statement in the statement list */
 			thr->run_node = node->stat_list;
 		} else if (node->else_part) {
 			thr->run_node = node->else_part;
 		} else if (node->elsif_list) {
 			elsif_node = node->elsif_list;
 			for (;;) {
 				eval_exp(elsif_node->cond);
 				if (eval_node_get_ibool_val(
 					    elsif_node->cond)) {
 					/* The condition evaluated to TRUE:
 					start execution from the first
 					statement in the statement list */
 					thr->run_node = elsif_node->stat_list;
 					break;
 				}
 				elsif_node = que_node_get_next(elsif_node);
 				if (elsif_node == NULL) {
 					thr->run_node = NULL;
 					break;
 				}
 			}
 		} else {
 			thr->run_node = NULL;
 		}
 	} else {
 		/* Move to the next statement */
 		ut_ad(que_node_get_next(thr->prev_node) == NULL);
 		thr->run_node = NULL;
 	}
 	if (thr->run_node == NULL) {
 		thr->run_node = que_node_get_parent(node);
 	}
 	return(thr);
 }
 /**************************************************************************
 Performs an execution step of a while-statement node. */
 UNIV_INTERN
 que_thr_t*
 while_step(
 /*=======*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	while_node_t*	node;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
 	ut_ad((thr->prev_node == que_node_get_parent(node))
 	      || (que_node_get_next(thr->prev_node) == NULL));
 	/* Evaluate the condition */
 	eval_exp(node->cond);
 	if (eval_node_get_ibool_val(node->cond)) {
 		/* The condition evaluated to TRUE: start execution
 		from the first statement in the statement list */
 		thr->run_node = node->stat_list;
 	} else {
 		thr->run_node = que_node_get_parent(node);
 	}
 	return(thr);
 }
 /**************************************************************************
 Performs an execution step of an assignment statement node. */
 UNIV_INTERN
 que_thr_t*
 assign_step(
 /*========*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	assign_node_t*	node;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
 	/* Evaluate the value to assign */
 	eval_exp(node->val);
 	eval_node_copy_val(node->var->alias, node->val);
 	thr->run_node = que_node_get_parent(node);
 	return(thr);
 }
 /**************************************************************************
 Performs an execution step of a for-loop node. */
 UNIV_INTERN
 que_thr_t*
 for_step(
 /*=====*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	for_node_t*	node;
 	que_node_t*	parent;
 	lint		loop_var_value;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
 	parent = que_node_get_parent(node);
 	if (thr->prev_node != parent) {
 		/* Move to the next statement */
 		thr->run_node = que_node_get_next(thr->prev_node);
 		if (thr->run_node != NULL) {
 			return(thr);
 		}
 		/* Increment the value of loop_var */
 		loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
 	} else {
 		/* Initialize the loop */
 		eval_exp(node->loop_start_limit);
 		eval_exp(node->loop_end_limit);
 		loop_var_value = eval_node_get_int_val(node->loop_start_limit);
 		node->loop_end_value
                  = (int) eval_node_get_int_val(node->loop_end_limit);
 	}
 	/* Check if we should do another loop */
 	if (loop_var_value > node->loop_end_value) {
 		/* Enough loops done */
 		thr->run_node = parent;
 	} else {
 		eval_node_set_int_val(node->loop_var, loop_var_value);
 		thr->run_node = node->stat_list;
 	}
 	return(thr);
 }
 /**************************************************************************
 Performs an execution step of an exit statement node. */
 UNIV_INTERN
 que_thr_t*
 exit_step(
 /*======*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	exit_node_t*	node;
 	que_node_t*	loop_node;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_EXIT);
 	/* Loops exit by setting thr->run_node as the loop node's parent, so
 	find our containing loop node and get its parent. */
 	loop_node = que_node_get_containing_loop_node(node);
 	/* If someone uses an EXIT statement outside of a loop, this will
 	trigger. */
 	ut_a(loop_node);
 	thr->run_node = que_node_get_parent(loop_node);
 	return(thr);
 }
 /**************************************************************************
 Performs an execution step of a return-statement node. */
 UNIV_INTERN
 que_thr_t*
 return_step(
 /*========*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	return_node_t*	node;
 	que_node_t*	parent;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
 	parent = node;
 	while (que_node_get_type(parent) != QUE_NODE_PROC) {
 		parent = que_node_get_parent(parent);
 	}
 	ut_a(parent);
 	thr->run_node = que_node_get_parent(parent);
 	return(thr);
 }
--- a/fil/fil0fil.c
+++ b/fil/fil0fil.c
--- a/fsp/fsp0fsp.c
+++ b/fsp/fsp0fsp.c
--- a/fut/fut0fut.c
+++ b/fut/fut0fut.c
@@ -0,0 +1,14 @@
 /**********************************************************************
 File-based utilities
 (c) 1995 Innobase Oy
 Created 12/13/1995 Heikki Tuuri
 ***********************************************************************/
 #include "fut0fut.h"
 #ifdef UNIV_NONINL
 #include "fut0fut.ic"
 #endif
--- a/fut/fut0lst.c
+++ b/fut/fut0lst.c
@@ -0,0 +1,513 @@
 /**********************************************************************
 File-based list utilities
 (c) 1995 Innobase Oy
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
 #include "fut0lst.h"
 #ifdef UNIV_NONINL
 #include "fut0lst.ic"
 #endif
 #include "buf0buf.h"
 #include "page0page.h"
 /************************************************************************
 Adds a node to an empty list. */
 static
 void
 flst_add_to_empty(
 /*==============*/
 	flst_base_node_t*	base,	/* in: pointer to base node of
 					empty list */
 	flst_node_t*		node,	/* in: node to add */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node_addr;
 	ulint		len;
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
 	len = flst_get_len(base, mtr);
 	ut_a(len == 0);
 	buf_ptr_get_fsp_addr(node, &space, &node_addr);
 	/* Update first and last fields of base node */
 	flst_write_addr(base + FLST_FIRST, node_addr, mtr);
 	flst_write_addr(base + FLST_LAST, node_addr, mtr);
 	/* Set prev and next fields of node to add */
 	flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
 	flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
 	/* Update len of base node */
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 /************************************************************************
 Adds a node as the last node in a list. */
 UNIV_INTERN
 void
 flst_add_last(
 /*==========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node,	/* in: node to add */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node_addr;
 	ulint		len;
 	fil_addr_t	last_addr;
 	flst_node_t*	last_node;
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
 	len = flst_get_len(base, mtr);
 	last_addr = flst_get_last(base, mtr);
 	buf_ptr_get_fsp_addr(node, &space, &node_addr);
 	/* If the list is not empty, call flst_insert_after */
 	if (len != 0) {
 		if (last_addr.page == node_addr.page) {
 			last_node = page_align(node) + last_addr.boffset;
 		} else {
 			ulint	zip_size = fil_space_get_zip_size(space);
 			last_node = fut_get_ptr(space, zip_size, last_addr,
 						RW_X_LATCH, mtr);
 		}
 		flst_insert_after(base, last_node, node, mtr);
 	} else {
 		/* else call flst_add_to_empty */
 		flst_add_to_empty(base, node, mtr);
 	}
 }
 /************************************************************************
 Adds a node as the first node in a list. */
 UNIV_INTERN
 void
 flst_add_first(
 /*===========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node,	/* in: node to add */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node_addr;
 	ulint		len;
 	fil_addr_t	first_addr;
 	flst_node_t*	first_node;
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
 	len = flst_get_len(base, mtr);
 	first_addr = flst_get_first(base, mtr);
 	buf_ptr_get_fsp_addr(node, &space, &node_addr);
 	/* If the list is not empty, call flst_insert_before */
 	if (len != 0) {
 		if (first_addr.page == node_addr.page) {
 			first_node = page_align(node) + first_addr.boffset;
 		} else {
 			ulint	zip_size = fil_space_get_zip_size(space);
 			first_node = fut_get_ptr(space, zip_size, first_addr,
 						 RW_X_LATCH, mtr);
 		}
 		flst_insert_before(base, node, first_node, mtr);
 	} else {
 		/* else call flst_add_to_empty */
 		flst_add_to_empty(base, node, mtr);
 	}
 }
 /************************************************************************
 Inserts a node after another in a list. */
 UNIV_INTERN
 void
 flst_insert_after(
 /*==============*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node1,	/* in: node to insert after */
 	flst_node_t*		node2,	/* in: node to add */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node1_addr;
 	fil_addr_t	node2_addr;
 	flst_node_t*	node3;
 	fil_addr_t	node3_addr;
 	ulint		len;
 	ut_ad(mtr && node1 && node2 && base);
 	ut_ad(base != node1);
 	ut_ad(base != node2);
 	ut_ad(node2 != node1);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 	buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
 	node3_addr = flst_get_next_addr(node1, mtr);
 	/* Set prev and next fields of node2 */
 	flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
 	flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
 	if (!fil_addr_is_null(node3_addr)) {
 		/* Update prev field of node3 */
 		ulint	zip_size = fil_space_get_zip_size(space);
 		node3 = fut_get_ptr(space, zip_size,
 				    node3_addr, RW_X_LATCH, mtr);
 		flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
 	} else {
 		/* node1 was last in list: update last field in base */
 		flst_write_addr(base + FLST_LAST, node2_addr, mtr);
 	}
 	/* Set next field of node1 */
 	flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
 	/* Update len of base node */
 	len = flst_get_len(base, mtr);
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 /************************************************************************
 Inserts a node before another in a list. */
 UNIV_INTERN
 void
 flst_insert_before(
 /*===============*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: node to insert */
 	flst_node_t*		node3,	/* in: node to insert before */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ulint		space;
 	flst_node_t*	node1;
 	fil_addr_t	node1_addr;
 	fil_addr_t	node2_addr;
 	fil_addr_t	node3_addr;
 	ulint		len;
 	ut_ad(mtr && node2 && node3 && base);
 	ut_ad(base != node2);
 	ut_ad(base != node3);
 	ut_ad(node2 != node3);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
 	buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
 	node1_addr = flst_get_prev_addr(node3, mtr);
 	/* Set prev and next fields of node2 */
 	flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
 	flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
 	if (!fil_addr_is_null(node1_addr)) {
 		ulint	zip_size = fil_space_get_zip_size(space);
 		/* Update next field of node1 */
 		node1 = fut_get_ptr(space, zip_size, node1_addr,
 				    RW_X_LATCH, mtr);
 		flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
 	} else {
 		/* node3 was first in list: update first field in base */
 		flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
 	}
 	/* Set prev field of node3 */
 	flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
 	/* Update len of base node */
 	len = flst_get_len(base, mtr);
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 /************************************************************************
 Removes a node. */
 UNIV_INTERN
 void
 flst_remove(
 /*========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: node to remove */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ulint		space;
 	ulint		zip_size;
 	flst_node_t*	node1;
 	fil_addr_t	node1_addr;
 	fil_addr_t	node2_addr;
 	flst_node_t*	node3;
 	fil_addr_t	node3_addr;
 	ulint		len;
 	ut_ad(mtr && node2 && base);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
 	zip_size = fil_space_get_zip_size(space);
 	node1_addr = flst_get_prev_addr(node2, mtr);
 	node3_addr = flst_get_next_addr(node2, mtr);
 	if (!fil_addr_is_null(node1_addr)) {
 		/* Update next field of node1 */
 		if (node1_addr.page == node2_addr.page) {
 			node1 = page_align(node2) + node1_addr.boffset;
 		} else {
 			node1 = fut_get_ptr(space, zip_size,
 					    node1_addr, RW_X_LATCH, mtr);
 		}
 		ut_ad(node1 != node2);
 		flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
 	} else {
 		/* node2 was first in list: update first field in base */
 		flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
 	}
 	if (!fil_addr_is_null(node3_addr)) {
 		/* Update prev field of node3 */
 		if (node3_addr.page == node2_addr.page) {
 			node3 = page_align(node2) + node3_addr.boffset;
 		} else {
 			node3 = fut_get_ptr(space, zip_size,
 					    node3_addr, RW_X_LATCH, mtr);
 		}
 		ut_ad(node2 != node3);
 		flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
 	} else {
 		/* node2 was last in list: update last field in base */
 		flst_write_addr(base + FLST_LAST, node1_addr, mtr);
 	}
 	/* Update len of base node */
 	len = flst_get_len(base, mtr);
 	ut_ad(len > 0);
 	mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
 }
 /************************************************************************
 Cuts off the tail of the list, including the node given. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
 UNIV_INTERN
 void
 flst_cut_end(
 /*=========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: first node to remove */
 	ulint			n_nodes,/* in: number of nodes to remove,
 					must be >= 1 */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ulint		space;
 	flst_node_t*	node1;
 	fil_addr_t	node1_addr;
 	fil_addr_t	node2_addr;
 	ulint		len;
 	ut_ad(mtr && node2 && base);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(n_nodes > 0);
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
 	node1_addr = flst_get_prev_addr(node2, mtr);
 	if (!fil_addr_is_null(node1_addr)) {
 		/* Update next field of node1 */
 		if (node1_addr.page == node2_addr.page) {
 			node1 = page_align(node2) + node1_addr.boffset;
 		} else {
 			node1 = fut_get_ptr(space,
 					    fil_space_get_zip_size(space),
 					    node1_addr, RW_X_LATCH, mtr);
 		}
 		flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
 	} else {
 		/* node2 was first in list: update the field in base */
 		flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
 	}
 	flst_write_addr(base + FLST_LAST, node1_addr, mtr);
 	/* Update len of base node */
 	len = flst_get_len(base, mtr);
 	ut_ad(len >= n_nodes);
 	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
 }
 /************************************************************************
 Cuts off the tail of the list, not including the given node. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
 UNIV_INTERN
 void
 flst_truncate_end(
 /*==============*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: first node not to remove */
 	ulint			n_nodes,/* in: number of nodes to remove */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	fil_addr_t	node2_addr;
 	ulint		len;
 	ulint		space;
 	ut_ad(mtr && node2 && base);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 	if (n_nodes == 0) {
 		ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
 		return;
 	}
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
 	/* Update next field of node2 */
 	flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
 	flst_write_addr(base + FLST_LAST, node2_addr, mtr);
 	/* Update len of base node */
 	len = flst_get_len(base, mtr);
 	ut_ad(len >= n_nodes);
 	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
 }
 /************************************************************************
 Validates a file-based list. */
 UNIV_INTERN
 ibool
 flst_validate(
 /*==========*/
 					/* out: TRUE if ok */
 	const flst_base_node_t*	base,	/* in: pointer to base node of list */
 	mtr_t*			mtr1)	/* in: mtr */
 {
 	ulint			space;
 	ulint			zip_size;
 	const flst_node_t*	node;
 	fil_addr_t		node_addr;
 	fil_addr_t		base_addr;
 	ulint			len;
 	ulint			i;
 	mtr_t			mtr2;
 	ut_ad(base);
 	ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
 	/* We use two mini-transaction handles: the first is used to
 	lock the base node, and prevent other threads from modifying the
 	list. The second is used to traverse the list. We cannot run the
 	second mtr without committing it at times, because if the list
 	is long, then the x-locked pages could fill the buffer resulting
 	in a deadlock. */
 	/* Find out the space id */
 	buf_ptr_get_fsp_addr(base, &space, &base_addr);
 	zip_size = fil_space_get_zip_size(space);
 	len = flst_get_len(base, mtr1);
 	node_addr = flst_get_first(base, mtr1);
 	for (i = 0; i < len; i++) {
 		mtr_start(&mtr2);
 		node = fut_get_ptr(space, zip_size,
 				   node_addr, RW_X_LATCH, &mtr2);
 		node_addr = flst_get_next_addr(node, &mtr2);
 		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
 				   becoming full */
 	}
 	ut_a(fil_addr_is_null(node_addr));
 	node_addr = flst_get_last(base, mtr1);
 	for (i = 0; i < len; i++) {
 		mtr_start(&mtr2);
 		node = fut_get_ptr(space, zip_size,
 				   node_addr, RW_X_LATCH, &mtr2);
 		node_addr = flst_get_prev_addr(node, &mtr2);
 		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
 				   becoming full */
 	}
 	ut_a(fil_addr_is_null(node_addr));
 	return(TRUE);
 }
 /************************************************************************
 Prints info of a file-based list. */
 UNIV_INTERN
 void
 flst_print(
 /*=======*/
 	const flst_base_node_t*	base,	/* in: pointer to base node of list */
 	mtr_t*			mtr)	/* in: mtr */
 {
 	const buf_frame_t*	frame;
 	ulint			len;
 	ut_ad(base && mtr);
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	frame = page_align((byte*) base);
 	len = flst_get_len(base, mtr);
 	fprintf(stderr,
 		"FILE-BASED LIST:\n"
 		"Base node in space %lu page %lu byte offset %lu; len %lu\n",
 		(ulong) page_get_space_id(frame),
 		(ulong) page_get_page_no(frame),
 		(ulong) page_offset(base), (ulong) len);
 }
--- a/ha/ha0ha.c
+++ b/ha/ha0ha.c
@@ -0,0 +1,409 @@
 /************************************************************************
 The hash table with external chains
 (c) 1994-1997 Innobase Oy
 Created 8/22/1994 Heikki Tuuri
 *************************************************************************/
 #include "ha0ha.h"
 #ifdef UNIV_NONINL
 #include "ha0ha.ic"
 #endif
 #ifdef UNIV_DEBUG
 # include "buf0buf.h"
 #endif /* UNIV_DEBUG */
 #ifdef UNIV_SYNC_DEBUG
 # include "btr0sea.h"
 #endif /* UNIV_SYNC_DEBUG */
 #include "page0page.h"
 /*****************************************************************
 Creates a hash table with >= n array cells. The actual number of cells is
 chosen to be a prime number slightly bigger than n. */
 UNIV_INTERN
 hash_table_t*
 ha_create_func(
 /*===========*/
 				/* out, own: created table */
 	ulint	n,		/* in: number of array cells */
 #ifdef UNIV_SYNC_DEBUG
 	ulint	mutex_level,	/* in: level of the mutexes in the latching
 				order: this is used in the debug version */
 #endif /* UNIV_SYNC_DEBUG */
 	ulint	n_mutexes)	/* in: number of mutexes to protect the
 				hash table: must be a power of 2, or 0 */
 {
 	hash_table_t*	table;
 	ulint		i;
 	table = hash_create(n);
 #ifdef UNIV_DEBUG
 	table->adaptive = TRUE;
 #endif /* UNIV_DEBUG */
 	/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
 	but in practise it never should in this case, hence the asserts. */
 	if (n_mutexes == 0) {
 		table->heap = mem_heap_create_in_btr_search(
 			ut_min(4096, MEM_MAX_ALLOC_IN_BUF));
 		ut_a(table->heap);
 		return(table);
 	}
 	hash_create_mutexes(table, n_mutexes, mutex_level);
 	table->heaps = mem_alloc(n_mutexes * sizeof(void*));
 	for (i = 0; i < n_mutexes; i++) {
 		table->heaps[i] = mem_heap_create_in_btr_search(4096);
 		ut_a(table->heaps[i]);
 	}
 	return(table);
 }
 /*****************************************************************
 Empties a hash table and frees the memory heaps. */
 UNIV_INTERN
 void
 ha_clear(
 /*=====*/
 	hash_table_t*	table)	/* in, own: hash table */
 {
 	ulint	i;
 	ulint	n;
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
 #endif /* UNIV_SYNC_DEBUG */
 	/* Free the memory heaps. */
 	n = table->n_mutexes;
 	for (i = 0; i < n; i++) {
 		mem_heap_free(table->heaps[i]);
 	}
 	/* Clear the hash table. */
 	n = hash_get_n_cells(table);
 	for (i = 0; i < n; i++) {
 		hash_get_nth_cell(table, i)->node = NULL;
 	}
 }
 /*****************************************************************
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted. */
 UNIV_INTERN
 ibool
 ha_insert_for_fold_func(
 /*====================*/
 				/* out: TRUE if succeed, FALSE if no more
 				memory could be allocated */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of data; if a node with
 				the same fold value already exists, it is
 				updated to point to the same data, and no new
 				node is created! */
 #ifdef UNIV_DEBUG
 	buf_block_t*	block,	/* in: buffer block containing the data */
 #endif /* UNIV_DEBUG */
 	void*		data)	/* in: data, must not be NULL */
 {
 	hash_cell_t*	cell;
 	ha_node_t*	node;
 	ha_node_t*	prev_node;
 	ulint		hash;
 	ut_ad(table && data);
 	ut_ad(block->frame == page_align(data));
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	hash = hash_calc_hash(fold, table);
 	cell = hash_get_nth_cell(table, hash);
 	prev_node = cell->node;
 	while (prev_node != NULL) {
 		if (prev_node->fold == fold) {
 #ifdef UNIV_DEBUG
 			if (table->adaptive) {
 				buf_block_t* prev_block = prev_node->block;
 				ut_a(prev_block->frame
 				     == page_align(prev_node->data));
 				ut_a(prev_block->n_pointers > 0);
 				prev_block->n_pointers--;
 				block->n_pointers++;
 			}
 			prev_node->block = block;
 #endif /* UNIV_DEBUG */
 			prev_node->data = data;
 			return(TRUE);
 		}
 		prev_node = prev_node->next;
 	}
 	/* We have to allocate a new chain node */
 	node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
 	if (node == NULL) {
 		/* It was a btr search type memory heap and at the moment
 		no more memory could be allocated: return */
 		ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
 		return(FALSE);
 	}
 	ha_node_set_data(node, block, data);
 #ifdef UNIV_DEBUG
 	if (table->adaptive) {
 		block->n_pointers++;
 	}
 #endif /* UNIV_DEBUG */
 	node->fold = fold;
 	node->next = NULL;
 	prev_node = cell->node;
 	if (prev_node == NULL) {
 		cell->node = node;
 		return(TRUE);
 	}
 	while (prev_node->next != NULL) {
 		prev_node = prev_node->next;
 	}
 	prev_node->next = node;
 	return(TRUE);
 }
 /***************************************************************
 Deletes a hash node. */
 UNIV_INTERN
 void
 ha_delete_hash_node(
 /*================*/
 	hash_table_t*	table,		/* in: hash table */
 	ha_node_t*	del_node)	/* in: node to be deleted */
 {
 #ifdef UNIV_DEBUG
 	if (table->adaptive) {
 		ut_a(del_node->block->frame = page_align(del_node->data));
 		ut_a(del_node->block->n_pointers > 0);
 		del_node->block->n_pointers--;
 	}
 #endif /* UNIV_DEBUG */
 	HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
 }
 /*****************************************************************
 Deletes an entry from a hash table. */
 UNIV_INTERN
 void
 ha_delete(
 /*======*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of data */
 	void*		data)	/* in: data, must not be NULL and must exist
 				in the hash table */
 {
 	ha_node_t*	node;
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	node = ha_search_with_data(table, fold, data);
 	ut_a(node);
 	ha_delete_hash_node(table, node);
 }
 /*************************************************************
 Looks for an element when we know the pointer to the data, and updates
 the pointer to data, if found. */
 UNIV_INTERN
 void
 ha_search_and_update_if_found_func(
 /*===============================*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of the searched data */
 	void*		data,	/* in: pointer to the data */
 #ifdef UNIV_DEBUG
 	buf_block_t*	new_block,/* in: block containing new_data */
 #endif
 	void*		new_data)/* in: new pointer to the data */
 {
 	ha_node_t*	node;
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	ut_ad(new_block->frame == page_align(new_data));
 	node = ha_search_with_data(table, fold, data);
 	if (node) {
 #ifdef UNIV_DEBUG
 		if (table->adaptive) {
 			ut_a(node->block->n_pointers > 0);
 			node->block->n_pointers--;
 			new_block->n_pointers++;
 		}
 		node->block = new_block;
 #endif /* UNIV_DEBUG */
 		node->data = new_data;
 	}
 }
 /*********************************************************************
 Removes from the chain determined by fold all nodes whose data pointer
 points to the page given. */
 UNIV_INTERN
 void
 ha_remove_all_nodes_to_page(
 /*========================*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: fold value */
 	const page_t*	page)	/* in: buffer page */
 {
 	ha_node_t*	node;
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	node = ha_chain_get_first(table, fold);
 	while (node) {
 		if (page_align(ha_node_get_data(node)) == page) {
 			/* Remove the hash node */
 			ha_delete_hash_node(table, node);
 			/* Start again from the first node in the chain
 			because the deletion may compact the heap of
 			nodes and move other nodes! */
 			node = ha_chain_get_first(table, fold);
 		} else {
 			node = ha_chain_get_next(node);
 		}
 	}
 #ifdef UNIV_DEBUG
 	/* Check that all nodes really got deleted */
 	node = ha_chain_get_first(table, fold);
 	while (node) {
 		ut_a(page_align(ha_node_get_data(node)) != page);
 		node = ha_chain_get_next(node);
 	}
 #endif
 }
 /*****************************************************************
 Validates a given range of the cells in hash table. */
 UNIV_INTERN
 ibool
 ha_validate(
 /*========*/
 					/* out: TRUE if ok */
 	hash_table_t*	table,		/* in: hash table */
 	ulint		start_index,	/* in: start index */
 	ulint		end_index)	/* in: end index */
 {
 	hash_cell_t*	cell;
 	ha_node_t*	node;
 	ibool		ok	= TRUE;
 	ulint		i;
 	ut_a(start_index <= end_index);
 	ut_a(start_index < hash_get_n_cells(table));
 	ut_a(end_index < hash_get_n_cells(table));
 	for (i = start_index; i <= end_index; i++) {
 		cell = hash_get_nth_cell(table, i);
 		node = cell->node;
 		while (node) {
 			if (hash_calc_hash(node->fold, table) != i) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
 					"InnoDB: Error: hash table node"
 					" fold value %lu does not\n"
 					"InnoDB: match the cell number %lu.\n",
 					(ulong) node->fold, (ulong) i);
 				ok = FALSE;
 			}
 			node = node->next;
 		}
 	}
 	return(ok);
 }
 /*****************************************************************
 Prints info of a hash table. */
 UNIV_INTERN
 void
 ha_print_info(
 /*==========*/
 	FILE*		file,	/* in: file where to print */
 	hash_table_t*	table)	/* in: hash table */
 {
 	hash_cell_t*	cell;
 	ulint		cells	= 0;
 	ulint		n_bufs;
 	ulint		i;
 	for (i = 0; i < hash_get_n_cells(table); i++) {
 		cell = hash_get_nth_cell(table, i);
 		if (cell->node) {
 			cells++;
 		}
 	}
 	fprintf(file,
 		"Hash table size %lu, used cells %lu",
 		(ulong) hash_get_n_cells(table), (ulong) cells);
 	if (table->heaps == NULL && table->heap != NULL) {
 		/* This calculation is intended for the adaptive hash
 		index: how many buffer frames we have reserved? */
 		n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
 		if (table->heap->free_block) {
 			n_bufs++;
 		}
 		fprintf(file, ", node heap has %lu buffer(s)\n",
 			(ulong) n_bufs);
 	}
 }
--- a/ha/ha0storage.c
+++ b/ha/ha0storage.c
@@ -0,0 +1,166 @@
 /******************************************************
 Hash storage.
 Provides a data structure that stores chunks of data in
 its own storage, avoiding duplicates.
 (c) 2007 Innobase Oy
 Created September 22, 2007 Vasil Dimov
 *******************************************************/
 #include "univ.i"
 #include "ha0storage.h"
 #include "hash0hash.h"
 #include "mem0mem.h"
 #include "ut0rnd.h"
 #ifdef UNIV_NONINL
 #include "ha0storage.ic"
 #endif
 /***********************************************************************
 Retrieves a data from a storage. If it is present, a pointer to the
 stored copy of data is returned, otherwise NULL is returned. */
 static
 const void*
 ha_storage_get(
 /*===========*/
 	ha_storage_t*	storage,	/* in: hash storage */
 	const void*	data,		/* in: data to check for */
 	ulint		data_len)	/* in: data length */
 {
 	ha_storage_node_t*	node;
 	ulint			fold;
 	/* avoid repetitive calls to ut_fold_binary() in the HASH_SEARCH
 	macro */
 	fold = ut_fold_binary(data, data_len);
 #define IS_FOUND	\
 	node->data_len == data_len && memcmp(node->data, data, data_len) == 0
 	HASH_SEARCH(
 		next,			/* node->"next" */
 		storage->hash,		/* the hash table */
 		fold,			/* key */
 		ha_storage_node_t*,	/* type of node->next */
 		node,			/* auxiliary variable */
 		IS_FOUND);		/* search criteria */
 	if (node == NULL) {
 		return(NULL);
 	}
 	/* else */
 	return(node->data);
 }
 /***********************************************************************
 Copies data into the storage and returns a pointer to the copy. If the
 same data chunk is already present, then pointer to it is returned.
 Data chunks are considered to be equal if len1 == len2 and
 memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
 data_len bytes need to be allocated) and the size of storage is going to
 become more than "memlim" then "data" is not added and NULL is returned.
 To disable this behavior "memlim" can be set to 0, which stands for
 "no limit". */
 UNIV_INTERN
 const void*
 ha_storage_put_memlim(
 /*==================*/
 	ha_storage_t*	storage,	/* in/out: hash storage */
 	const void*	data,		/* in: data to store */
 	ulint		data_len,	/* in: data length */
 	ulint		memlim)		/* in: memory limit to obey */
 {
 	void*			raw;
 	ha_storage_node_t*	node;
 	const void*		data_copy;
 	ulint			fold;
 	/* check if data chunk is already present */
 	data_copy = ha_storage_get(storage, data, data_len);
 	if (data_copy != NULL) {
 		return(data_copy);
 	}
 	/* not present */
 	/* check if we are allowed to allocate data_len bytes */
 	if (memlim > 0
 	    && ha_storage_get_size(storage) + data_len > memlim) {
 		return(NULL);
 	}
 	/* we put the auxiliary node struct and the data itself in one
 	continuous block */
 	raw = mem_heap_alloc(storage->heap,
 			     sizeof(ha_storage_node_t) + data_len);
 	node = (ha_storage_node_t*) raw;
 	data_copy = (byte*) raw + sizeof(*node);
 	memcpy((byte*) raw + sizeof(*node), data, data_len);
 	node->data_len = data_len;
 	node->data = data_copy;
 	/* avoid repetitive calls to ut_fold_binary() in the HASH_INSERT
 	macro */
 	fold = ut_fold_binary(data, data_len);
 	HASH_INSERT(
 		ha_storage_node_t,	/* type used in the hash chain */
 		next,			/* node->"next" */
 		storage->hash,		/* the hash table */
 		fold,			/* key */
 		node);			/* add this data to the hash */
 	/* the output should not be changed because it will spoil the
 	hash table */
 	return(data_copy);
 }
 #ifdef UNIV_COMPILE_TEST_FUNCS
 void
 test_ha_storage()
 {
 	ha_storage_t*	storage;
 	char		buf[1024];
 	int		i;
 	const void*	stored[256];
 	const void*	p;
 	storage = ha_storage_create(0, 0);
 	for (i = 0; i < 256; i++) {
 		memset(buf, i, sizeof(buf));
 		stored[i] = ha_storage_put(storage, buf, sizeof(buf));
 	}
 	//ha_storage_empty(&storage);
 	for (i = 255; i >= 0; i--) {
 		memset(buf, i, sizeof(buf));
 		p = ha_storage_put(storage, buf, sizeof(buf));
 		if (p != stored[i]) {
 			fprintf(stderr, "ha_storage_put() returned %p "
 				"instead of %p, i=%d\n", p, stored[i], i);
 			return;
 		}
 	}
 	fprintf(stderr, "all ok\n");
 	ha_storage_free(storage);
 }
 #endif /* UNIV_COMPILE_TEST_FUNCS */
--- a/ha/hash0hash.c
+++ b/ha/hash0hash.c
@@ -0,0 +1,149 @@
 /******************************************************
 The simple hash table utility
 (c) 1997 Innobase Oy
 Created 5/20/1997 Heikki Tuuri
 *******************************************************/
 #include "hash0hash.h"
 #ifdef UNIV_NONINL
 #include "hash0hash.ic"
 #endif
 #include "mem0mem.h"
 /****************************************************************
 Reserves the mutex for a fold value in a hash table. */
 UNIV_INTERN
 void
 hash_mutex_enter(
 /*=============*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: fold */
 {
 	mutex_enter(hash_get_mutex(table, fold));
 }
 /****************************************************************
 Releases the mutex for a fold value in a hash table. */
 UNIV_INTERN
 void
 hash_mutex_exit(
 /*============*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: fold */
 {
 	mutex_exit(hash_get_mutex(table, fold));
 }
 /****************************************************************
 Reserves all the mutexes of a hash table, in an ascending order. */
 UNIV_INTERN
 void
 hash_mutex_enter_all(
 /*=================*/
 	hash_table_t*	table)	/* in: hash table */
 {
 	ulint	i;
 	for (i = 0; i < table->n_mutexes; i++) {
 		mutex_enter(table->mutexes + i);
 	}
 }
 /****************************************************************
 Releases all the mutexes of a hash table. */
 UNIV_INTERN
 void
 hash_mutex_exit_all(
 /*================*/
 	hash_table_t*	table)	/* in: hash table */
 {
 	ulint	i;
 	for (i = 0; i < table->n_mutexes; i++) {
 		mutex_exit(table->mutexes + i);
 	}
 }
 /*****************************************************************
 Creates a hash table with >= n array cells. The actual number of cells is
 chosen to be a prime number slightly bigger than n. */
 UNIV_INTERN
 hash_table_t*
 hash_create(
 /*========*/
 			/* out, own: created table */
 	ulint	n)	/* in: number of array cells */
 {
 	hash_cell_t*	array;
 	ulint		prime;
 	hash_table_t*	table;
 	prime = ut_find_prime(n);
 	table = mem_alloc(sizeof(hash_table_t));
 	array = ut_malloc(sizeof(hash_cell_t) * prime);
 #ifdef UNIV_DEBUG
 	table->adaptive = FALSE;
 #endif /* UNIV_DEBUG */
 	table->array = array;
 	table->n_cells = prime;
 	table->n_mutexes = 0;
 	table->mutexes = NULL;
 	table->heaps = NULL;
 	table->heap = NULL;
 	table->magic_n = HASH_TABLE_MAGIC_N;
 	/* Initialize the cell array */
 	hash_table_clear(table);
 	return(table);
 }
 /*****************************************************************
 Frees a hash table. */
 UNIV_INTERN
 void
 hash_table_free(
 /*============*/
 	hash_table_t*	table)	/* in, own: hash table */
 {
 	ut_a(table->mutexes == NULL);
 	ut_free(table->array);
 	mem_free(table);
 }
 /*****************************************************************
 Creates a mutex array to protect a hash table. */
 UNIV_INTERN
 void
 hash_create_mutexes_func(
 /*=====================*/
 	hash_table_t*	table,		/* in: hash table */
 #ifdef UNIV_SYNC_DEBUG
 	ulint		sync_level,	/* in: latching order level of the
 					mutexes: used in the debug version */
 #endif /* UNIV_SYNC_DEBUG */
 	ulint		n_mutexes)	/* in: number of mutexes, must be a
 					power of 2 */
 {
 	ulint	i;
 	ut_a(n_mutexes > 0);
 	ut_a(ut_is_2pow(n_mutexes));
 	table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
 	for (i = 0; i < n_mutexes; i++) {
 		mutex_create(table->mutexes + i, sync_level);
 	}
 	table->n_mutexes = n_mutexes;
 }
--- a/handler/ha_innodb.cc
+++ b/handler/ha_innodb.cc
--- a/handler/ha_innodb.h
+++ b/handler/ha_innodb.h
@@ -0,0 +1,267 @@
 /* Copyright (C) 2000-2005 MySQL AB && Innobase Oy
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA */
 /*
  This file is based on ha_berkeley.h of MySQL distribution
  This file defines the Innodb handler: the interface between MySQL and
  Innodb
 */
 #ifdef USE_PRAGMA_INTERFACE
 #pragma interface			/* gcc class implementation */
 #endif
 typedef struct st_innobase_share {
  THR_LOCK lock;
  pthread_mutex_t mutex;
  char *table_name;
  uint table_name_length,use_count;
 } INNOBASE_SHARE;
 struct dict_index_struct;
 struct row_prebuilt_struct;
 typedef struct dict_index_struct dict_index_t;
 typedef struct row_prebuilt_struct row_prebuilt_t;
 /* The class defining a handle to an Innodb table */
 class ha_innobase: public handler
 {
 	row_prebuilt_t*	prebuilt;	/* prebuilt struct in InnoDB, used
 					to save CPU time with prebuilt data
 					structures*/
 	THD*		user_thd;	/* the thread handle of the user
 					currently using the handle; this is
 					set in external_lock function */
 	THR_LOCK_DATA	lock;
 	INNOBASE_SHARE	*share;
 	uchar*		upd_buff;	/* buffer used in updates */
 	uchar*		key_val_buff;	/* buffer used in converting
 					search key values from MySQL format
 					to Innodb format */
 	ulong		upd_and_key_val_buff_len;
 					/* the length of each of the previous
 					two buffers */
 	Table_flags	int_table_flags;
 	uint		primary_key;
 	ulong		start_of_scan;	/* this is set to 1 when we are
 					starting a table scan but have not
 					yet fetched any row, else 0 */
 	uint		last_match_mode;/* match mode of the latest search:
 					ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
 					or undefined */
 	uint		num_write_row;	/* number of write_row() calls */
 	uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
                                   const uchar* record);
 	inline void update_thd(THD* thd);
 	void update_thd();
 	int change_active_index(uint keynr);
 	int general_fetch(uchar* buf, uint direction, uint match_mode);
 	int innobase_read_and_init_auto_inc(ulonglong* ret);
 	ulong innobase_autoinc_lock();
 	ulong innobase_set_max_autoinc(ulonglong auto_inc);
 	ulong innobase_reset_autoinc(ulonglong auto_inc);
 	ulong innobase_get_auto_increment(ulonglong* value);
 	dict_index_t* innobase_get_index(uint keynr);
 	/* Init values for the class: */
 public:
 	ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
 	~ha_innobase();
 	/*
 	  Get the row type from the storage engine.  If this method returns
 	  ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
 	*/
 	enum row_type get_row_type() const;
 	const char* table_type() const;
 	const char* index_type(uint key_number);
 	const char** bas_ext() const;
 	Table_flags table_flags() const;
 	ulong index_flags(uint idx, uint part, bool all_parts) const;
 	uint max_supported_keys() const;
 	uint max_supported_key_length() const;
 	uint max_supported_key_part_length() const;
 	const key_map* keys_to_use_for_scanning();
 	int open(const char *name, int mode, uint test_if_locked);
 	int close(void);
 	double scan_time();
 	double read_time(uint index, uint ranges, ha_rows rows);
 	int write_row(uchar * buf);
 	int update_row(const uchar * old_data, uchar * new_data);
 	int delete_row(const uchar * buf);
 	bool was_semi_consistent_read();
 	void try_semi_consistent_read(bool yes);
 	void unlock_row();
 #ifdef ROW_MERGE_IS_INDEX_USABLE
 	/** Check if an index can be used by this transaction.
 	* @param keynr	key number to check
 	* @return	true if available, false if the index
 	*		does not contain old records that exist
 	*		in the read view of this transaction */
 	bool is_index_available(uint keynr);
 #endif /* ROW_MERGE_IS_INDEX_USABLE */
 	int index_init(uint index, bool sorted);
 	int index_end();
 	int index_read(uchar * buf, const uchar * key,
 		uint key_len, enum ha_rkey_function find_flag);
 	int index_read_idx(uchar * buf, uint index, const uchar * key,
 			   uint key_len, enum ha_rkey_function find_flag);
 	int index_read_last(uchar * buf, const uchar * key, uint key_len);
 	int index_next(uchar * buf);
 	int index_next_same(uchar * buf, const uchar *key, uint keylen);
 	int index_prev(uchar * buf);
 	int index_first(uchar * buf);
 	int index_last(uchar * buf);
 	int rnd_init(bool scan);
 	int rnd_end();
 	int rnd_next(uchar *buf);
 	int rnd_pos(uchar * buf, uchar *pos);
 	void position(const uchar *record);
 	int info(uint);
 	int analyze(THD* thd,HA_CHECK_OPT* check_opt);
 	int optimize(THD* thd,HA_CHECK_OPT* check_opt);
 	int discard_or_import_tablespace(my_bool discard);
 	int extra(enum ha_extra_function operation);
        int reset();
 	int external_lock(THD *thd, int lock_type);
 	int transactional_table_lock(THD *thd, int lock_type);
 	int start_stmt(THD *thd, thr_lock_type lock_type);
 	void position(uchar *record);
 	ha_rows records_in_range(uint inx, key_range *min_key, key_range
 								*max_key);
 	ha_rows estimate_rows_upper_bound();
 	void update_create_info(HA_CREATE_INFO* create_info);
 	int create(const char *name, register TABLE *form,
 					HA_CREATE_INFO *create_info);
 	int delete_all_rows();
 	int delete_table(const char *name);
 	int rename_table(const char* from, const char* to);
 	int check(THD* thd, HA_CHECK_OPT* check_opt);
 	char* update_table_comment(const char* comment);
 	char* get_foreign_key_create_info();
 	int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
 	bool can_switch_engines();
 	uint referenced_by_foreign_key();
 	void free_foreign_key_create_info(char* str);
 	THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
 					enum thr_lock_type lock_type);
 	void init_table_handle_for_HANDLER();
        virtual void get_auto_increment(ulonglong offset, ulonglong increment,
                                        ulonglong nb_desired_values,
                                        ulonglong *first_value,
                                        ulonglong *nb_reserved_values);
 	int reset_auto_increment(ulonglong value);
 	virtual bool get_error_message(int error, String *buf);
 	uint8 table_cache_type();
 	/*
 	  ask handler about permission to cache table during query registration
 	*/
 	my_bool register_query_cache_table(THD *thd, char *table_key,
 					   uint key_length,
 					   qc_engine_callback *call_back,
 					   ulonglong *engine_data);
 	static char *get_mysql_bin_log_name();
 	static ulonglong get_mysql_bin_log_pos();
 	bool primary_key_is_clustered();
 	int cmp_ref(const uchar *ref1, const uchar *ref2);
 	/** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
 	int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
 	int prepare_drop_index(TABLE *table_arg, uint *key_num,
 			       uint num_of_keys);
 	int final_drop_index(TABLE *table_arg);
 	/** @} */
 	bool check_if_incompatible_data(HA_CREATE_INFO *info,
 					uint table_changes);
 };
 /* Some accessor functions which the InnoDB plugin needs, but which
 can not be added to mysql/plugin.h as part of the public interface;
 the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
 #ifndef INNODB_COMPATIBILITY_HOOKS
 #error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
 #endif
 extern "C" {
 struct charset_info_st *thd_charset(MYSQL_THD thd);
 char **thd_query(MYSQL_THD thd);
 /** Get the file name of the MySQL binlog.
 * @return the name of the binlog file
 */
 const char* mysql_bin_log_file_name(void);
 /** Get the current position of the MySQL binlog.
 * @return byte offset from the beginning of the binlog
 */
 ulonglong mysql_bin_log_file_pos(void);
 /**
  Check if a user thread is a replication slave thread
  @param thd  user thread
  @retval 0 the user thread is not a replication slave thread
  @retval 1 the user thread is a replication slave thread
 */
 int thd_slave_thread(const MYSQL_THD thd);
 /**
  Check if a user thread is running a non-transactional update
  @param thd  user thread
  @retval 0 the user thread is not running a non-transactional update
  @retval 1 the user thread is running a non-transactional update
 */
 int thd_non_transactional_update(const MYSQL_THD thd);
 /**
  Get the user thread's binary logging format
  @param thd  user thread
  @return Value to be used as index into the binlog_format_names array
 */
 int thd_binlog_format(const MYSQL_THD thd);
 /**
  Mark transaction to rollback and mark error as fatal to a sub-statement.
  @param  thd   Thread handle
  @param  all   TRUE <=> rollback main transaction.
 */
 void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
 }
 typedef struct trx_struct trx_t;
 /************************************************************************
 Converts an InnoDB error code to a MySQL error code and also tells to MySQL
 about a possible transaction rollback inside InnoDB caused by a lock wait
 timeout or a deadlock. */
 extern "C"
 int
 convert_error_code_to_mysql(
 /*========================*/
 				/* out: MySQL error code */
 	int		error,	/* in: InnoDB error code */
 	ulint		flags,	/* in: InnoDB table flags, or 0 */
 	MYSQL_THD	thd);	/* in: user thread handle or NULL */
--- a/handler/handler0alter.cc
+++ b/handler/handler0alter.cc
--- a/handler/i_s.cc
+++ b/handler/i_s.cc
--- a/handler/i_s.h
+++ b/handler/i_s.h
@@ -0,0 +1,20 @@
 /******************************************************
 InnoDB INFORMATION SCHEMA tables interface to MySQL.
 (c) 2007 Innobase Oy
 Created July 18, 2007 Vasil Dimov
 *******************************************************/
 #ifndef i_s_h
 #define i_s_h
 extern struct st_mysql_plugin	i_s_innodb_trx;
 extern struct st_mysql_plugin	i_s_innodb_locks;
 extern struct st_mysql_plugin	i_s_innodb_lock_waits;
 extern struct st_mysql_plugin	i_s_innodb_cmp;
 extern struct st_mysql_plugin	i_s_innodb_cmp_reset;
 extern struct st_mysql_plugin	i_s_innodb_cmpmem;
 extern struct st_mysql_plugin	i_s_innodb_cmpmem_reset;
 #endif /* i_s_h */
--- a/handler/mysql_addons.cc
+++ b/handler/mysql_addons.cc
@@ -0,0 +1,38 @@
 /******************************************************
 This file contains functions that need to be added to
 MySQL code but have not been added yet.
 Whenever you add a function here submit a MySQL bug
 report (feature request) with the implementation. Then
 write the bug number in the comment before the
 function in this file.
 When MySQL commits the function it can be deleted from
 here. In a perfect world this file exists but is empty.
 (c) 2007 Innobase Oy
 Created November 07, 2007 Vasil Dimov
 *******************************************************/
 #ifndef MYSQL_SERVER
 #define MYSQL_SERVER
 #endif /* MYSQL_SERVER */
 #include <mysql_priv.h>
 #include "mysql_addons.h"
 #include "univ.i"
 /***********************************************************************
 Retrieve THD::thread_id
 http://bugs.mysql.com/30930 */
 extern "C" UNIV_INTERN
 unsigned long
 ib_thd_get_thread_id(
 /*=================*/
 				/* out: THD::thread_id */
 	const void*	thd)	/* in: THD */
 {
 	return((unsigned long) ((THD*) thd)->thread_id);
 }
--- a/ibuf/ibuf0ibuf.c
+++ b/ibuf/ibuf0ibuf.c
--- a/include/btr0btr.h
+++ b/include/btr0btr.h
@@ -0,0 +1,479 @@
 /******************************************************
 The B-tree
 (c) 1994-1996 Innobase Oy
 Created 6/2/1994 Heikki Tuuri
 *******************************************************/
 #ifndef btr0btr_h
 #define btr0btr_h
 #include "univ.i"
 #include "dict0dict.h"
 #include "data0data.h"
 #include "page0cur.h"
 #include "rem0rec.h"
 #include "mtr0mtr.h"
 #include "btr0types.h"
 /* Maximum record size which can be stored on a page, without using the
 special big record storage structure */
 #define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
 /* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
 such; none of the tree operations avoid producing trees bigger than this. It
 is instead a "max depth that other code must work with", useful for e.g.
 fixed-size arrays that must store some information about each level in a
 tree. In other words: if a B-tree with bigger depth than this is
 encountered, it is not acceptable for it to lead to mysterious memory
 corruption, but it is acceptable for the program to die with a clear assert
 failure. */
 #define BTR_MAX_LEVELS		100
 /* Latching modes for btr_cur_search_to_nth_level(). */
 #define BTR_SEARCH_LEAF		RW_S_LATCH
 #define BTR_MODIFY_LEAF		RW_X_LATCH
 #define BTR_NO_LATCHES		RW_NO_LATCH
 #define	BTR_MODIFY_TREE		33
 #define	BTR_CONT_MODIFY_TREE	34
 #define	BTR_SEARCH_PREV		35
 #define	BTR_MODIFY_PREV		36
 /* If this is ORed to the latch mode, it means that the search tuple will be
 inserted to the index, at the searched position */
 #define BTR_INSERT		512
 /* This flag ORed to latch mode says that we do the search in query
 optimization */
 #define BTR_ESTIMATE		1024
 /* This flag ORed to latch mode says that we can ignore possible
 UNIQUE definition on secondary indexes when we decide if we can use the
 insert buffer to speed up inserts */
 #define BTR_IGNORE_SEC_UNIQUE	2048
 /******************************************************************
 Gets the root node of a tree and x-latches it. */
 UNIV_INTERN
 page_t*
 btr_root_get(
 /*=========*/
 				/* out: root page, x-latched */
 	dict_index_t*	index,	/* in: index tree */
 	mtr_t*		mtr);	/* in: mtr */
 /******************************************************************
 Gets a buffer page and declares its latching order level. */
 UNIV_INLINE
 buf_block_t*
 btr_block_get(
 /*==========*/
 	ulint	space,		/* in: space id */
 	ulint	zip_size,	/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	page_no,	/* in: page number */
 	ulint	mode,		/* in: latch mode */
 	mtr_t*	mtr);		/* in: mtr */
 /******************************************************************
 Gets a buffer page and declares its latching order level. */
 UNIV_INLINE
 page_t*
 btr_page_get(
 /*=========*/
 	ulint	space,		/* in: space id */
 	ulint	zip_size,	/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	page_no,	/* in: page number */
 	ulint	mode,		/* in: latch mode */
 	mtr_t*	mtr);		/* in: mtr */
 /******************************************************************
 Gets the index id field of a page. */
 UNIV_INLINE
 dulint
 btr_page_get_index_id(
 /*==================*/
 				/* out: index id */
 	const page_t*	page);	/* in: index page */
 /************************************************************
 Gets the node level field in an index page. */
 UNIV_INLINE
 ulint
 btr_page_get_level_low(
 /*===================*/
 				/* out: level, leaf level == 0 */
 	const page_t*	page);	/* in: index page */
 /************************************************************
 Gets the node level field in an index page. */
 UNIV_INLINE
 ulint
 btr_page_get_level(
 /*===============*/
 				/* out: level, leaf level == 0 */
 	const page_t*	page,	/* in: index page */
 	mtr_t*		mtr);	/* in: mini-transaction handle */
 /************************************************************
 Gets the next index page number. */
 UNIV_INLINE
 ulint
 btr_page_get_next(
 /*==============*/
 				/* out: next page number */
 	const page_t*	page,	/* in: index page */
 	mtr_t*		mtr);	/* in: mini-transaction handle */
 /************************************************************
 Gets the previous index page number. */
 UNIV_INLINE
 ulint
 btr_page_get_prev(
 /*==============*/
 				/* out: prev page number */
 	const page_t*	page,	/* in: index page */
 	mtr_t*		mtr);	/* in: mini-transaction handle */
 /*****************************************************************
 Gets pointer to the previous user record in the tree. It is assumed
 that the caller has appropriate latches on the page and its neighbor. */
 UNIV_INTERN
 rec_t*
 btr_get_prev_user_rec(
 /*==================*/
 			/* out: previous user record, NULL if there is none */
 	rec_t*	rec,	/* in: record on leaf level */
 	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
 			needed, also to the previous page */
 /*****************************************************************
 Gets pointer to the next user record in the tree. It is assumed
 that the caller has appropriate latches on the page and its neighbor. */
 UNIV_INTERN
 rec_t*
 btr_get_next_user_rec(
 /*==================*/
 			/* out: next user record, NULL if there is none */
 	rec_t*	rec,	/* in: record on leaf level */
 	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
 			needed, also to the next page */
 /******************************************************************
 Releases the latch on a leaf page and bufferunfixes it. */
 UNIV_INLINE
 void
 btr_leaf_page_release(
 /*==================*/
 	buf_block_t*	block,		/* in: buffer block */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	mtr_t*		mtr);		/* in: mtr */
 /******************************************************************
 Gets the child node file address in a node pointer. */
 UNIV_INLINE
 ulint
 btr_node_ptr_get_child_page_no(
 /*===========================*/
 				/* out: child node address */
 	const rec_t*	rec,	/* in: node pointer record */
 	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
 /****************************************************************
 Creates the root node for a new index tree. */
 UNIV_INTERN
 ulint
 btr_create(
 /*=======*/
 				/* out: page number of the created root,
 				FIL_NULL if did not succeed */
 	ulint		type,	/* in: type of the index */
 	ulint		space,	/* in: space where created */
 	ulint		zip_size,/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	dulint		index_id,/* in: index id */
 	dict_index_t*	index,	/* in: index */
 	mtr_t*		mtr);	/* in: mini-transaction handle */
 /****************************************************************
 Frees a B-tree except the root page, which MUST be freed after this
 by calling btr_free_root. */
 UNIV_INTERN
 void
 btr_free_but_not_root(
 /*==================*/
 	ulint	space,		/* in: space where created */
 	ulint	zip_size,	/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	root_page_no);	/* in: root page number */
 /****************************************************************
 Frees the B-tree root page. Other tree MUST already have been freed. */
 UNIV_INTERN
 void
 btr_free_root(
 /*==========*/
 	ulint	space,		/* in: space where created */
 	ulint	zip_size,	/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	root_page_no,	/* in: root page number */
 	mtr_t*	mtr);		/* in: a mini-transaction which has already
 				been started */
 /*****************************************************************
 Makes tree one level higher by splitting the root, and inserts
 the tuple. It is assumed that mtr contains an x-latch on the tree.
 NOTE that the operation of this function must always succeed,
 we cannot reverse it: therefore enough free disk space must be
 guaranteed to be available before this function is called. */
 UNIV_INTERN
 rec_t*
 btr_root_raise_and_insert(
 /*======================*/
 				/* out: inserted record */
 	btr_cur_t*	cursor,	/* in: cursor at which to insert: must be
 				on the root page; when the function returns,
 				the cursor is positioned on the predecessor
 				of the inserted record */
 	const dtuple_t*	tuple,	/* in: tuple to insert */
 	ulint		n_ext,	/* in: number of externally stored columns */
 	mtr_t*		mtr);	/* in: mtr */
 /*****************************************************************
 Reorganizes an index page.
 IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
 page of a non-clustered index, the caller must update the insert
 buffer free bits in the same mini-transaction in such a way that the
 modification will be redo-logged. */
 UNIV_INTERN
 ibool
 btr_page_reorganize(
 /*================*/
 				/* out: TRUE on success, FALSE on failure */
 	buf_block_t*	block,	/* in: page to be reorganized */
 	dict_index_t*	index,	/* in: record descriptor */
 	mtr_t*		mtr);	/* in: mtr */
 /*****************************************************************
 Decides if the page should be split at the convergence point of
 inserts converging to left. */
 UNIV_INTERN
 ibool
 btr_page_get_split_rec_to_left(
 /*===========================*/
 				/* out: TRUE if split recommended */
 	btr_cur_t*	cursor,	/* in: cursor at which to insert */
 	rec_t**		split_rec);/* out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
 /*****************************************************************
 Decides if the page should be split at the convergence point of
 inserts converging to right. */
 UNIV_INTERN
 ibool
 btr_page_get_split_rec_to_right(
 /*============================*/
 				/* out: TRUE if split recommended */
 	btr_cur_t*	cursor,	/* in: cursor at which to insert */
 	rec_t**		split_rec);/* out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
 /*****************************************************************
 Splits an index page to halves and inserts the tuple. It is assumed
 that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
 is released within this function! NOTE that the operation of this
 function must always succeed, we cannot reverse it: therefore
 enough free disk space must be guaranteed to be available before
 this function is called. */
 UNIV_INTERN
 rec_t*
 btr_page_split_and_insert(
 /*======================*/
 				/* out: inserted record; NOTE: the tree
 				x-latch is released! NOTE: 2 free disk
 				pages must be available! */
 	btr_cur_t*	cursor,	/* in: cursor at which to insert; when the
 				function returns, the cursor is positioned
 				on the predecessor of the inserted record */
 	const dtuple_t*	tuple,	/* in: tuple to insert */
 	ulint		n_ext,	/* in: number of externally stored columns */
 	mtr_t*		mtr);	/* in: mtr */
 /***********************************************************
 Inserts a data tuple to a tree on a non-leaf level. It is assumed
 that mtr holds an x-latch on the tree. */
 UNIV_INTERN
 void
 btr_insert_on_non_leaf_level(
 /*=========================*/
 	dict_index_t*	index,	/* in: index */
 	ulint		level,	/* in: level, must be > 0 */
 	dtuple_t*	tuple,	/* in: the record to be inserted */
 	mtr_t*		mtr);	/* in: mtr */
 /********************************************************************
 Sets a record as the predefined minimum record. */
 UNIV_INTERN
 void
 btr_set_min_rec_mark(
 /*=================*/
 	rec_t*	rec,	/* in/out: record */
 	mtr_t*	mtr);	/* in: mtr */
 /*****************************************************************
 Deletes on the upper level the node pointer to a page. */
 UNIV_INTERN
 void
 btr_node_ptr_delete(
 /*================*/
 	dict_index_t*	index,	/* in: index tree */
 	buf_block_t*	block,	/* in: page whose node pointer is deleted */
 	mtr_t*		mtr);	/* in: mtr */
 #ifdef UNIV_DEBUG
 /****************************************************************
 Checks that the node pointer to a page is appropriate. */
 UNIV_INTERN
 ibool
 btr_check_node_ptr(
 /*===============*/
 				/* out: TRUE */
 	dict_index_t*	index,	/* in: index tree */
 	buf_block_t*	block,	/* in: index page */
 	mtr_t*		mtr);	/* in: mtr */
 #endif /* UNIV_DEBUG */
 /*****************************************************************
 Tries to merge the page first to the left immediate brother if such a
 brother exists, and the node pointers to the current page and to the
 brother reside on the same page. If the left brother does not satisfy these
 conditions, looks at the right brother. If the page is the only one on that
 level lifts the records of the page to the father page, thus reducing the
 tree height. It is assumed that mtr holds an x-latch on the tree and on the
 page. If cursor is on the leaf level, mtr must also hold x-latches to
 the brothers, if they exist. */
 UNIV_INTERN
 ibool
 btr_compress(
 /*=========*/
 				/* out: TRUE on success */
 	btr_cur_t*	cursor,	/* in: cursor on the page to merge or lift;
 				the page must not be empty: in record delete
 				use btr_discard_page if the page would become
 				empty */
 	mtr_t*		mtr);	/* in: mtr */
 /*****************************************************************
 Discards a page from a B-tree. This is used to remove the last record from
 a B-tree page: the whole page must be removed at the same time. This cannot
 be used for the root page, which is allowed to be empty. */
 UNIV_INTERN
 void
 btr_discard_page(
 /*=============*/
 	btr_cur_t*	cursor,	/* in: cursor on the page to discard: not on
 				the root page */
 	mtr_t*		mtr);	/* in: mtr */
 /********************************************************************
 Parses the redo log record for setting an index record as the predefined
 minimum record. */
 UNIV_INTERN
 byte*
 btr_parse_set_min_rec_mark(
 /*=======================*/
 			/* out: end of log record or NULL */
 	byte*	ptr,	/* in: buffer */
 	byte*	end_ptr,/* in: buffer end */
 	ulint	comp,	/* in: nonzero=compact page format */
 	page_t*	page,	/* in: page or NULL */
 	mtr_t*	mtr);	/* in: mtr or NULL */
 /***************************************************************
 Parses a redo log record of reorganizing a page. */
 UNIV_INTERN
 byte*
 btr_parse_page_reorganize(
 /*======================*/
 				/* out: end of log record or NULL */
 	byte*		ptr,	/* in: buffer */
 	byte*		end_ptr,/* in: buffer end */
 	dict_index_t*	index,	/* in: record descriptor */
 	buf_block_t*	block,	/* in: page to be reorganized, or NULL */
 	mtr_t*		mtr);	/* in: mtr or NULL */
 /******************************************************************
 Gets the number of pages in a B-tree. */
 UNIV_INTERN
 ulint
 btr_get_size(
 /*=========*/
 				/* out: number of pages */
 	dict_index_t*	index,	/* in: index */
 	ulint		flag);	/* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
 /******************************************************************
 Allocates a new file page to be used in an index tree. NOTE: we assume
 that the caller has made the reservation for free extents! */
 UNIV_INTERN
 buf_block_t*
 btr_page_alloc(
 /*===========*/
 					/* out: new allocated block, x-latched;
 					NULL if out of space */
 	dict_index_t*	index,		/* in: index tree */
 	ulint		hint_page_no,	/* in: hint of a good page */
 	byte		file_direction,	/* in: direction where a possible
 					page split is made */
 	ulint		level,		/* in: level where the page is placed
 					in the tree */
 	mtr_t*		mtr);		/* in: mtr */
 /******************************************************************
 Frees a file page used in an index tree. NOTE: cannot free field external
 storage pages because the page must contain info on its level. */
 UNIV_INTERN
 void
 btr_page_free(
 /*==========*/
 	dict_index_t*	index,	/* in: index tree */
 	buf_block_t*	block,	/* in: block to be freed, x-latched */
 	mtr_t*		mtr);	/* in: mtr */
 /******************************************************************
 Frees a file page used in an index tree. Can be used also to BLOB
 external storage pages, because the page level 0 can be given as an
 argument. */
 UNIV_INTERN
 void
 btr_page_free_low(
 /*==============*/
 	dict_index_t*	index,	/* in: index tree */
 	buf_block_t*	block,	/* in: block to be freed, x-latched */
 	ulint		level,	/* in: page level */
 	mtr_t*		mtr);	/* in: mtr */
 #ifdef UNIV_BTR_PRINT
 /*****************************************************************
 Prints size info of a B-tree. */
 UNIV_INTERN
 void
 btr_print_size(
 /*===========*/
 	dict_index_t*	index);	/* in: index tree */
 /******************************************************************
 Prints directories and other info of all nodes in the index. */
 UNIV_INTERN
 void
 btr_print_index(
 /*============*/
 	dict_index_t*	index,	/* in: index */
 	ulint		width);	/* in: print this many entries from start
 				and end */
 #endif /* UNIV_BTR_PRINT */
 /****************************************************************
 Checks the size and number of fields in a record based on the definition of
 the index. */
 UNIV_INTERN
 ibool
 btr_index_rec_validate(
 /*===================*/
 						/* out: TRUE if ok */
 	const rec_t*		rec,		/* in: index record */
 	const dict_index_t*	index,		/* in: index */
 	ibool			dump_on_error);	/* in: TRUE if the function
 						should print hex dump of record
 						and page on error */
 /******************************************************************
 Checks the consistency of an index tree. */
 UNIV_INTERN
 ibool
 btr_validate_index(
 /*===============*/
 				/* out: TRUE if ok */
 	dict_index_t*	index,	/* in: index */
 	trx_t*		trx);	/* in: transaction or NULL */
 #define BTR_N_LEAF_PAGES	1
 #define BTR_TOTAL_SIZE		2
 #ifndef UNIV_NONINL
 #include "btr0btr.ic"
 #endif
 #endif
--- a/include/btr0btr.ic
+++ b/include/btr0btr.ic
@@ -0,0 +1,285 @@
 /******************************************************
 The B-tree
 (c) 1994-1996 Innobase Oy
 Created 6/2/1994 Heikki Tuuri
 *******************************************************/
 #include "mach0data.h"
 #include "mtr0mtr.h"
 #include "mtr0log.h"
 #include "page0zip.h"
 #define BTR_MAX_NODE_LEVEL	50	/* used in debug checking */
 /******************************************************************
 Gets a buffer page and declares its latching order level. */
 UNIV_INLINE
 buf_block_t*
 btr_block_get(
 /*==========*/
 	ulint	space,		/* in: space id */
 	ulint	zip_size,	/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	page_no,	/* in: page number */
 	ulint	mode,		/* in: latch mode */
 	mtr_t*	mtr)		/* in: mtr */
 {
 	buf_block_t*	block;
 	block = buf_page_get(space, zip_size, page_no, mode, mtr);
 #ifdef UNIV_SYNC_DEBUG
 	if (mode != RW_NO_LATCH) {
 		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
 	}
 #endif
 	return(block);
 }
 /******************************************************************
 Gets a buffer page and declares its latching order level. */
 UNIV_INLINE
 page_t*
 btr_page_get(
 /*=========*/
 	ulint	space,		/* in: space id */
 	ulint	zip_size,	/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	page_no,	/* in: page number */
 	ulint	mode,		/* in: latch mode */
 	mtr_t*	mtr)		/* in: mtr */
 {
 	return(buf_block_get_frame(btr_block_get(space, zip_size, page_no,
 						 mode, mtr)));
 }
 /******************************************************************
 Sets the index id field of a page. */
 UNIV_INLINE
 void
 btr_page_set_index_id(
 /*==================*/
 	page_t*		page,	/* in: page to be created */
 	page_zip_des_t*	page_zip,/* in: compressed page whose uncompressed
 				part will be updated, or NULL */
 	dulint		id,	/* in: index id */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	if (UNIV_LIKELY_NULL(page_zip)) {
 		mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
 		page_zip_write_header(page_zip,
 				      page + (PAGE_HEADER + PAGE_INDEX_ID),
 				      8, mtr);
 	} else {
 		mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
 				  id, mtr);
 	}
 }
 /******************************************************************
 Gets the index id field of a page. */
 UNIV_INLINE
 dulint
 btr_page_get_index_id(
 /*==================*/
 				/* out: index id */
 	const page_t*	page)	/* in: index page */
 {
 	return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
 }
 /************************************************************
 Gets the node level field in an index page. */
 UNIV_INLINE
 ulint
 btr_page_get_level_low(
 /*===================*/
 				/* out: level, leaf level == 0 */
 	const page_t*	page)	/* in: index page */
 {
 	ulint	level;
 	ut_ad(page);
 	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
 	ut_ad(level <= BTR_MAX_NODE_LEVEL);
 	return(level);
 }
 /************************************************************
 Gets the node level field in an index page. */
 UNIV_INLINE
 ulint
 btr_page_get_level(
 /*===============*/
 				/* out: level, leaf level == 0 */
 	const page_t*	page,	/* in: index page */
 	mtr_t*		mtr __attribute__((unused)))
 				/* in: mini-transaction handle */
 {
 	ut_ad(page && mtr);
 	return(btr_page_get_level_low(page));
 }
 /************************************************************
 Sets the node level field in an index page. */
 UNIV_INLINE
 void
 btr_page_set_level(
 /*===============*/
 	page_t*		page,	/* in: index page */
 	page_zip_des_t*	page_zip,/* in: compressed page whose uncompressed
 				part will be updated, or NULL */
 	ulint		level,	/* in: level, leaf level == 0 */
 	mtr_t*		mtr)	/* in: mini-transaction handle */
 {
 	ut_ad(page && mtr);
 	ut_ad(level <= BTR_MAX_NODE_LEVEL);
 	if (UNIV_LIKELY_NULL(page_zip)) {
 		mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
 		page_zip_write_header(page_zip,
 				      page + (PAGE_HEADER + PAGE_LEVEL),
 				      2, mtr);
 	} else {
 		mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
 				 MLOG_2BYTES, mtr);
 	}
 }
 /************************************************************
 Gets the next index page number. */
 UNIV_INLINE
 ulint
 btr_page_get_next(
 /*==============*/
 				/* out: next page number */
 	const page_t*	page,	/* in: index page */
 	mtr_t*		mtr __attribute__((unused)))
 				/* in: mini-transaction handle */
 {
 	ut_ad(page && mtr);
 	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
 	      || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
 	return(mach_read_from_4(page + FIL_PAGE_NEXT));
 }
 /************************************************************
 Sets the next index page field. */
 UNIV_INLINE
 void
 btr_page_set_next(
 /*==============*/
 	page_t*		page,	/* in: index page */
 	page_zip_des_t*	page_zip,/* in: compressed page whose uncompressed
 				part will be updated, or NULL */
 	ulint		next,	/* in: next page number */
 	mtr_t*		mtr)	/* in: mini-transaction handle */
 {
 	ut_ad(page && mtr);
 	if (UNIV_LIKELY_NULL(page_zip)) {
 		mach_write_to_4(page + FIL_PAGE_NEXT, next);
 		page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
 	} else {
 		mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
 	}
 }
 /************************************************************
 Gets the previous index page number. */
 UNIV_INLINE
 ulint
 btr_page_get_prev(
 /*==============*/
 				/* out: prev page number */
 	const page_t*	page,	/* in: index page */
 	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
 {
 	ut_ad(page && mtr);
 	return(mach_read_from_4(page + FIL_PAGE_PREV));
 }
 /************************************************************
 Sets the previous index page field. */
 UNIV_INLINE
 void
 btr_page_set_prev(
 /*==============*/
 	page_t*		page,	/* in: index page */
 	page_zip_des_t*	page_zip,/* in: compressed page whose uncompressed
 				part will be updated, or NULL */
 	ulint		prev,	/* in: previous page number */
 	mtr_t*		mtr)	/* in: mini-transaction handle */
 {
 	ut_ad(page && mtr);
 	if (UNIV_LIKELY_NULL(page_zip)) {
 		mach_write_to_4(page + FIL_PAGE_PREV, prev);
 		page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
 	} else {
 		mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
 	}
 }
 /******************************************************************
 Gets the child node file address in a node pointer. */
 UNIV_INLINE
 ulint
 btr_node_ptr_get_child_page_no(
 /*===========================*/
 				/* out: child node address */
 	const rec_t*	rec,	/* in: node pointer record */
 	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
 {
 	const byte*	field;
 	ulint		len;
 	ulint		page_no;
 	ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
 	/* The child address is in the last field */
 	field = rec_get_nth_field(rec, offsets,
 				  rec_offs_n_fields(offsets) - 1, &len);
 	ut_ad(len == 4);
 	page_no = mach_read_from_4(field);
 	if (UNIV_UNLIKELY(page_no == 0)) {
 		fprintf(stderr,
 			"InnoDB: a nonsensical page number 0"
 			" in a node ptr record at offset %lu\n",
 			(ulong) page_offset(rec));
 		buf_page_print(page_align(rec), 0);
 	}
 	return(page_no);
 }
 /******************************************************************
 Releases the latches on a leaf page and bufferunfixes it. */
 UNIV_INLINE
 void
 btr_leaf_page_release(
 /*==================*/
 	buf_block_t*	block,		/* in: buffer block */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	mtr_t*		mtr)		/* in: mtr */
 {
 	ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
 	ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
 	mtr_memo_release(mtr, block,
 			 latch_mode == BTR_SEARCH_LEAF
 			 ? MTR_MEMO_PAGE_S_FIX
 			 : MTR_MEMO_PAGE_X_FIX);
 }
--- a/include/btr0cur.h
+++ b/include/btr0cur.h
@@ -0,0 +1,724 @@
 /******************************************************
 The index tree cursor
 (c) 1994-1996 Innobase Oy
 Created 10/16/1994 Heikki Tuuri
 *******************************************************/
 #ifndef btr0cur_h
 #define btr0cur_h
 #include "univ.i"
 #include "dict0dict.h"
 #include "data0data.h"
 #include "page0cur.h"
 #include "btr0types.h"
 #include "que0types.h"
 #include "row0types.h"
 #include "ha0ha.h"
 /* Mode flags for btr_cur operations; these can be ORed */
 #define BTR_NO_UNDO_LOG_FLAG	1	/* do no undo logging */
 #define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
 #define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
 					update vector or inserted entry */
 #define BTR_CUR_ADAPT
 #define BTR_CUR_HASH_ADAPT
 #ifdef UNIV_DEBUG
 /*************************************************************
 Returns the page cursor component of a tree cursor. */
 UNIV_INLINE
 page_cur_t*
 btr_cur_get_page_cur(
 /*=================*/
 					/* out: pointer to page cursor
 					component */
 	const btr_cur_t*	cursor);/* in: tree cursor */
 #else /* UNIV_DEBUG */
 # define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
 #endif /* UNIV_DEBUG */
 /*************************************************************
 Returns the buffer block on which the tree cursor is positioned. */
 UNIV_INLINE
 buf_block_t*
 btr_cur_get_block(
 /*==============*/
 				/* out: pointer to buffer block */
 	btr_cur_t*	cursor);/* in: tree cursor */
 /*************************************************************
 Returns the record pointer of a tree cursor. */
 UNIV_INLINE
 rec_t*
 btr_cur_get_rec(
 /*============*/
 				/* out: pointer to record */
 	btr_cur_t*	cursor);/* in: tree cursor */
 /*************************************************************
 Returns the compressed page on which the tree cursor is positioned. */
 UNIV_INLINE
 page_zip_des_t*
 btr_cur_get_page_zip(
 /*=================*/
 				/* out: pointer to compressed page,
 				or NULL if the page is not compressed */
 	btr_cur_t*	cursor);/* in: tree cursor */
 /*************************************************************
 Invalidates a tree cursor by setting record pointer to NULL. */
 UNIV_INLINE
 void
 btr_cur_invalidate(
 /*===============*/
 	btr_cur_t*	cursor);/* in: tree cursor */
 /*************************************************************
 Returns the page of a tree cursor. */
 UNIV_INLINE
 page_t*
 btr_cur_get_page(
 /*=============*/
 				/* out: pointer to page */
 	btr_cur_t*	cursor);/* in: tree cursor */
 /*************************************************************
 Returns the index of a cursor. */
 UNIV_INLINE
 dict_index_t*
 btr_cur_get_index(
 /*==============*/
 				/* out: index */
 	btr_cur_t*	cursor);/* in: B-tree cursor */
 /*************************************************************
 Positions a tree cursor at a given record. */
 UNIV_INLINE
 void
 btr_cur_position(
 /*=============*/
 	dict_index_t*	index,	/* in: index */
 	rec_t*		rec,	/* in: record in tree */
 	buf_block_t*	block,	/* in: buffer block of rec */
 	btr_cur_t*	cursor);/* in: cursor */
 /************************************************************************
 Searches an index tree and positions a tree cursor on a given level.
 NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
 to node pointer page number fields on the upper levels of the tree!
 Note that if mode is PAGE_CUR_LE, which is used in inserts, then
 cursor->up_match and cursor->low_match both will have sensible values.
 If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
 UNIV_INTERN
 void
 btr_cur_search_to_nth_level(
 /*========================*/
 	dict_index_t*	index,	/* in: index */
 	ulint		level,	/* in: the tree level of search */
 	const dtuple_t*	tuple,	/* in: data tuple; NOTE: n_fields_cmp in
 				tuple must be set so that it cannot get
 				compared to the node ptr page number field! */
 	ulint		mode,	/* in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be PAGE_CUR_LE,
 				not PAGE_CUR_GE, as the latter may end up on
 				the previous page of the record! Inserts
 				should always be made using PAGE_CUR_LE to
 				search the position! */
 	ulint		latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
 				BTR_INSERT and BTR_ESTIMATE;
 				cursor->left_block is used to store a pointer
 				to the left neighbor page, in the cases
 				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
 				NOTE that if has_search_latch
 				is != 0, we maybe do not have a latch set
 				on the cursor page, we assume
 				the caller uses his search latch
 				to protect the record! */
 	btr_cur_t*	cursor, /* in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
 	ulint		has_search_latch,/* in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
 	mtr_t*		mtr);	/* in: mtr */
 /*********************************************************************
 Opens a cursor at either end of an index. */
 UNIV_INTERN
 void
 btr_cur_open_at_index_side(
 /*=======================*/
 	ibool		from_left,	/* in: TRUE if open to the low end,
 					FALSE if to the high end */
 	dict_index_t*	index,		/* in: index */
 	ulint		latch_mode,	/* in: latch mode */
 	btr_cur_t*	cursor,		/* in: cursor */
 	mtr_t*		mtr);		/* in: mtr */
 /**************************************************************************
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INTERN
 void
 btr_cur_open_at_rnd_pos(
 /*====================*/
 	dict_index_t*	index,		/* in: index */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
 	btr_cur_t*	cursor,		/* in/out: B-tree cursor */
 	mtr_t*		mtr);		/* in: mtr */
 /*****************************************************************
 Tries to perform an insert to a page in an index tree, next to cursor.
 It is assumed that mtr holds an x-latch on the page. The operation does
 not succeed if there is too little space on the page. If there is just
 one record on the page, the insert will always succeed; this is to
 prevent trying to split a page with just one record. */
 UNIV_INTERN
 ulint
 btr_cur_optimistic_insert(
 /*======================*/
 				/* out: DB_SUCCESS, DB_WAIT_LOCK,
 				DB_FAIL, or error number */
 	ulint		flags,	/* in: undo logging and locking flags: if not
 				zero, the parameters index and thr should be
 				specified */
 	btr_cur_t*	cursor,	/* in: cursor on page after which to insert;
 				cursor stays valid */
 	dtuple_t*	entry,	/* in/out: entry to insert */
 	rec_t**		rec,	/* out: pointer to inserted record if
 				succeed */
 	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
 				be stored externally by the caller, or
 				NULL */
 	ulint		n_ext,	/* in: number of externally stored columns */
 	que_thr_t*	thr,	/* in: query thread or NULL */
 	mtr_t*		mtr);	/* in: mtr; if this function returns
 				DB_SUCCESS on a leaf page of a secondary
 				index in a compressed tablespace, the
 				mtr must be committed before latching
 				any further pages */
 /*****************************************************************
 Performs an insert on a page of an index tree. It is assumed that mtr
 holds an x-latch on the tree and on the cursor page. If the insert is
 made on the leaf level, to avoid deadlocks, mtr must also own x-latches
 to brothers of page, if those brothers exist. */
 UNIV_INTERN
 ulint
 btr_cur_pessimistic_insert(
 /*=======================*/
 				/* out: DB_SUCCESS or error number */
 	ulint		flags,	/* in: undo logging and locking flags: if not
 				zero, the parameter thr should be
 				specified; if no undo logging is specified,
 				then the caller must have reserved enough
 				free extents in the file space so that the
 				insertion will certainly succeed */
 	btr_cur_t*	cursor,	/* in: cursor after which to insert;
 				cursor stays valid */
 	dtuple_t*	entry,	/* in/out: entry to insert */
 	rec_t**		rec,	/* out: pointer to inserted record if
 				succeed */
 	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
 				be stored externally by the caller, or
 				NULL */
 	ulint		n_ext,	/* in: number of externally stored columns */
 	que_thr_t*	thr,	/* in: query thread or NULL */
 	mtr_t*		mtr);	/* in: mtr */
 /*****************************************************************
 Updates a record when the update causes no size changes in its fields. */
 UNIV_INTERN
 ulint
 btr_cur_update_in_place(
 /*====================*/
 				/* out: DB_SUCCESS or error number */
 	ulint		flags,	/* in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/* in: cursor on the record to update;
 				cursor stays valid and positioned on the
 				same record */
 	const upd_t*	update,	/* in: update vector */
 	ulint		cmpl_info,/* in: compiler info on secondary index
 				updates */
 	que_thr_t*	thr,	/* in: query thread */
 	mtr_t*		mtr);	/* in: mtr; must be committed before
 				latching any further pages */
 /*****************************************************************
 Tries to update a record on a page in an index tree. It is assumed that mtr
 holds an x-latch on the page. The operation does not succeed if there is too
 little space on the page or if the update would result in too empty a page,
 so that tree compression is recommended. */
 UNIV_INTERN
 ulint
 btr_cur_optimistic_update(
 /*======================*/
 				/* out: DB_SUCCESS, or DB_OVERFLOW if the
 				updated record does not fit, DB_UNDERFLOW
 				if the page would become too empty, or
 				DB_ZIP_OVERFLOW if there is not enough
 				space left on the compressed page */
 	ulint		flags,	/* in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/* in: cursor on the record to update;
 				cursor stays valid and positioned on the
 				same record */
 	const upd_t*	update,	/* in: update vector; this must also
 				contain trx id and roll ptr fields */
 	ulint		cmpl_info,/* in: compiler info on secondary index
 				updates */
 	que_thr_t*	thr,	/* in: query thread */
 	mtr_t*		mtr);	/* in: mtr; must be committed before
 				latching any further pages */
 /*****************************************************************
 Performs an update of a record on a page of a tree. It is assumed
 that mtr holds an x-latch on the tree and on the cursor page. If the
 update is made on the leaf level, to avoid deadlocks, mtr must also
 own x-latches to brothers of page, if those brothers exist. */
 UNIV_INTERN
 ulint
 btr_cur_pessimistic_update(
 /*=======================*/
 				/* out: DB_SUCCESS or error code */
 	ulint		flags,	/* in: undo logging, locking, and rollback
 				flags */
 	btr_cur_t*	cursor,	/* in: cursor on the record to update */
 	mem_heap_t**	heap,	/* in/out: pointer to memory heap, or NULL */
 	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
 	const upd_t*	update,	/* in: update vector; this is allowed also
 				contain trx id and roll ptr fields, but
 				the values in update vector have no effect */
 	ulint		cmpl_info,/* in: compiler info on secondary index
 				updates */
 	que_thr_t*	thr,	/* in: query thread */
 	mtr_t*		mtr);	/* in: mtr; must be committed before
 				latching any further pages */
 /***************************************************************
 Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
 of the deleting transaction, and in the roll ptr field pointer to the
 undo log record created. */
 UNIV_INTERN
 ulint
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
 				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
 				number */
 	ulint		flags,	/* in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/* in: cursor */
 	ibool		val,	/* in: value to set */
 	que_thr_t*	thr,	/* in: query thread */
 	mtr_t*		mtr);	/* in: mtr */
 /***************************************************************
 Sets a secondary index record delete mark to TRUE or FALSE. */
 UNIV_INTERN
 ulint
 btr_cur_del_mark_set_sec_rec(
 /*=========================*/
 				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
 				number */
 	ulint		flags,	/* in: locking flag */
 	btr_cur_t*	cursor,	/* in: cursor */
 	ibool		val,	/* in: value to set */
 	que_thr_t*	thr,	/* in: query thread */
 	mtr_t*		mtr);	/* in: mtr */
 /***************************************************************
 Sets a secondary index record delete mark to FALSE. This function is
 only used by the insert buffer insert merge mechanism. */
 UNIV_INTERN
 void
 btr_cur_del_unmark_for_ibuf(
 /*========================*/
 	rec_t*		rec,		/* in/out: record to delete unmark */
 	page_zip_des_t*	page_zip,	/* in/out: compressed page
 					corresponding to rec, or NULL
 					when the tablespace is
 					uncompressed */
 	mtr_t*		mtr);		/* in: mtr */
 /*****************************************************************
 Tries to compress a page of the tree if it seems useful. It is assumed
 that mtr holds an x-latch on the tree and on the cursor page. To avoid
 deadlocks, mtr must also own x-latches to brothers of page, if those
 brothers exist. NOTE: it is assumed that the caller has reserved enough
 free extents so that the compression will always succeed if done! */
 UNIV_INTERN
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
 				/* out: TRUE if compression occurred */
 	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
 				cursor does not stay valid if compression
 				occurs */
 	mtr_t*		mtr);	/* in: mtr */
 /***********************************************************
 Removes the record on which the tree cursor is positioned. It is assumed
 that the mtr has an x-latch on the page where the cursor is positioned,
 but no latch on the whole tree. */
 UNIV_INTERN
 ibool
 btr_cur_optimistic_delete(
 /*======================*/
 				/* out: TRUE if success, i.e., the page
 				did not become too empty */
 	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
 				cursor stays valid: if deletion succeeds,
 				on function exit it points to the successor
 				of the deleted record */
 	mtr_t*		mtr);	/* in: mtr */
 /*****************************************************************
 Removes the record on which the tree cursor is positioned. Tries
 to compress the page if its fillfactor drops below a threshold
 or if it is the only page on the level. It is assumed that mtr holds
 an x-latch on the tree and on the cursor page. To avoid deadlocks,
 mtr must also own x-latches to brothers of page, if those brothers
 exist. */
 UNIV_INTERN
 ibool
 btr_cur_pessimistic_delete(
 /*=======================*/
 				/* out: TRUE if compression occurred */
 	ulint*		err,	/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
 				the latter may occur because we may have
 				to update node pointers on upper levels,
 				and in the case of variable length keys
 				these may actually grow in size */
 	ibool		has_reserved_extents, /* in: TRUE if the
 				caller has already reserved enough free
 				extents so that he knows that the operation
 				will succeed */
 	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
 				if compression does not occur, the cursor
 				stays valid: it points to successor of
 				deleted record on function exit */
 	ibool		in_rollback,/* in: TRUE if called in rollback */
 	mtr_t*		mtr);	/* in: mtr */
 /***************************************************************
 Parses a redo log record of updating a record in-place. */
 UNIV_INTERN
 byte*
 btr_cur_parse_update_in_place(
 /*==========================*/
 				/* out: end of log record or NULL */
 	byte*		ptr,	/* in: buffer */
 	byte*		end_ptr,/* in: buffer end */
 	page_t*		page,	/* in/out: page or NULL */
 	page_zip_des_t*	page_zip,/* in/out: compressed page, or NULL */
 	dict_index_t*	index);	/* in: index corresponding to page */
 /********************************************************************
 Parses the redo log record for delete marking or unmarking of a clustered
 index record. */
 UNIV_INTERN
 byte*
 btr_cur_parse_del_mark_set_clust_rec(
 /*=================================*/
 				/* out: end of log record or NULL */
 	byte*		ptr,	/* in: buffer */
 	byte*		end_ptr,/* in: buffer end */
 	page_t*		page,	/* in/out: page or NULL */
 	page_zip_des_t*	page_zip,/* in/out: compressed page, or NULL */
 	dict_index_t*	index);	/* in: index corresponding to page */
 /********************************************************************
 Parses the redo log record for delete marking or unmarking of a secondary
 index record. */
 UNIV_INTERN
 byte*
 btr_cur_parse_del_mark_set_sec_rec(
 /*===============================*/
 				/* out: end of log record or NULL */
 	byte*		ptr,	/* in: buffer */
 	byte*		end_ptr,/* in: buffer end */
 	page_t*		page,	/* in/out: page or NULL */
 	page_zip_des_t*	page_zip);/* in/out: compressed page, or NULL */
 /***********************************************************************
 Estimates the number of rows in a given index range. */
 UNIV_INTERN
 ib_int64_t
 btr_estimate_n_rows_in_range(
 /*=========================*/
 				/* out: estimated number of rows */
 	dict_index_t*	index,	/* in: index */
 	const dtuple_t*	tuple1,	/* in: range start, may also be empty tuple */
 	ulint		mode1,	/* in: search mode for range start */
 	const dtuple_t*	tuple2,	/* in: range end, may also be empty tuple */
 	ulint		mode2);	/* in: search mode for range end */
 /***********************************************************************
 Estimates the number of different key values in a given index, for
 each n-column prefix of the index where n <= dict_index_get_n_unique(index).
 The estimates are stored in the array index->stat_n_diff_key_vals. */
 UNIV_INTERN
 void
 btr_estimate_number_of_different_key_vals(
 /*======================================*/
 	dict_index_t*	index);	/* in: index */
 /***********************************************************************
 Marks not updated extern fields as not-owned by this record. The ownership
 is transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
 to free the field. */
 UNIV_INTERN
 void
 btr_cur_mark_extern_inherited_fields(
 /*=================================*/
 	page_zip_des_t*	page_zip,/* in/out: compressed page whose uncompressed
 				part will be updated, or NULL */
 	rec_t*		rec,	/* in/out: record in a clustered index */
 	dict_index_t*	index,	/* in: index of the page */
 	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
 	const upd_t*	update,	/* in: update vector */
 	mtr_t*		mtr);	/* in: mtr, or NULL if not logged */
 /***********************************************************************
 The complement of the previous function: in an update entry may inherit
 some externally stored fields from a record. We must mark them as inherited
 in entry, so that they are not freed in a rollback. */
 UNIV_INTERN
 void
 btr_cur_mark_dtuple_inherited_extern(
 /*=================================*/
 	dtuple_t*	entry,		/* in/out: updated entry to be
 					inserted to clustered index */
 	const upd_t*	update);	/* in: update vector */
 /***********************************************************************
 Marks all extern fields in a dtuple as owned by the record. */
 UNIV_INTERN
 void
 btr_cur_unmark_dtuple_extern_fields(
 /*================================*/
 	dtuple_t*	entry);		/* in/out: clustered index entry */
 /***********************************************************************
 Stores the fields in big_rec_vec to the tablespace and puts pointers to
 them in rec.  The extern flags in rec will have to be set beforehand.
 The fields are stored on pages allocated from leaf node
 file segment of the index tree. */
 UNIV_INTERN
 ulint
 btr_store_big_rec_extern_fields(
 /*============================*/
 					/* out: DB_SUCCESS or error */
 	dict_index_t*	index,		/* in: index of rec; the index tree
 					MUST be X-latched */
 	buf_block_t*	rec_block,	/* in/out: block containing rec */
 	rec_t*		rec,		/* in: record */
 	const ulint*	offsets,	/* in: rec_get_offsets(rec, index);
 					the "external storage" flags in offsets
 					will not correspond to rec when
 					this function returns */
 	big_rec_t*	big_rec_vec,	/* in: vector containing fields
 					to be stored externally */
 	mtr_t*		local_mtr);	/* in: mtr containing the latch to
 					rec and to the tree */
 /***********************************************************************
 Frees the space in an externally stored field to the file space
 management if the field in data is owned the externally stored field,
 in a rollback we may have the additional condition that the field must
 not be inherited. */
 UNIV_INTERN
 void
 btr_free_externally_stored_field(
 /*=============================*/
 	dict_index_t*	index,		/* in: index of the data, the index
 					tree MUST be X-latched; if the tree
 					height is 1, then also the root page
 					must be X-latched! (this is relevant
 					in the case this function is called
 					from purge where 'data' is located on
 					an undo log page, not an index
 					page) */
 	byte*		field_ref,	/* in/out: field reference */
 	const rec_t*	rec,		/* in: record containing field_ref, for
 					page_zip_write_blob_ptr(), or NULL */
 	const ulint*	offsets,	/* in: rec_get_offsets(rec, index),
 					or NULL */
 	page_zip_des_t*	page_zip,	/* in: compressed page corresponding
 					to rec, or NULL if rec == NULL */
 	ulint		i,		/* in: field number of field_ref;
 					ignored if rec == NULL */
 	ibool		do_not_free_inherited,/* in: TRUE if called in a
 					rollback and we do not want to free
 					inherited fields */
 	mtr_t*		local_mtr);	/* in: mtr containing the latch to
 					data an an X-latch to the index
 					tree */
 /***********************************************************************
 Copies the prefix of an externally stored field of a record.  The
 clustered index record must be protected by a lock or a page latch. */
 UNIV_INTERN
 ulint
 btr_copy_externally_stored_field_prefix(
 /*====================================*/
 				/* out: the length of the copied field */
 	byte*		buf,	/* out: the field, or a prefix of it */
 	ulint		len,	/* in: length of buf, in bytes */
 	ulint		zip_size,/* in: nonzero=compressed BLOB page size,
 				zero for uncompressed BLOBs */
 	const byte*	data,	/* in: 'internally' stored part of the
 				field containing also the reference to
 				the external part; must be protected by
 				a lock or a page latch */
 	ulint		local_len);/* in: length of data, in bytes */
 /***********************************************************************
 Copies an externally stored field of a record to mem heap. */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(
 /*=================================*/
 				/* out: the field copied to heap */
 	const rec_t*	rec,	/* in: record in a clustered index;
 				must be protected by a lock or a page latch */
 	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
 	ulint		zip_size,/* in: nonzero=compressed BLOB page size,
 				zero for uncompressed BLOBs */
 	ulint		no,	/* in: field number */
 	ulint*		len,	/* out: length of the field */
 	mem_heap_t*	heap);	/* in: mem heap */
 /***********************************************************************
 Flags the data tuple fields that are marked as extern storage in the
 update vector.  We use this function to remember which fields we must
 mark as extern storage in a record inserted for an update. */
 UNIV_INTERN
 ulint
 btr_push_update_extern_fields(
 /*==========================*/
 				/* out: number of flagged external columns */
 	dtuple_t*	tuple,	/* in/out: data tuple */
 	const upd_t*	update,	/* in: update vector */
 	mem_heap_t*	heap)	/* in: memory heap */
 	__attribute__((nonnull));
 /*######################################################################*/
 /* In the pessimistic delete, if the page data size drops below this
 limit, merging it to a neighbor is tried */
 #define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
 /* A slot in the path array. We store here info on a search path down the
 tree. Each slot contains data on a single level of the tree. */
 typedef struct btr_path_struct	btr_path_t;
 struct btr_path_struct{
 	ulint	nth_rec;	/* index of the record
 				where the page cursor stopped on
 				this level (index in alphabetical
 				order); value ULINT_UNDEFINED
 				denotes array end */
 	ulint	n_recs;		/* number of records on the page */
 };
 #define BTR_PATH_ARRAY_N_SLOTS	250	/* size of path array (in slots) */
 /* The tree cursor: the definition appears here only for the compiler
 to know struct size! */
 struct btr_cur_struct {
 	dict_index_t*	index;		/* index where positioned */
 	page_cur_t	page_cur;	/* page cursor */
 	buf_block_t*	left_block;	/* this field is used to store
 					a pointer to the left neighbor
 					page, in the cases
 					BTR_SEARCH_PREV and
 					BTR_MODIFY_PREV */
 	/*------------------------------*/
 	que_thr_t*	thr;		/* this field is only used when
 					btr_cur_search_... is called for an
 					index entry insertion: the calling
 					query thread is passed here to be
 					used in the insert buffer */
 	/*------------------------------*/
 	/* The following fields are used in btr_cur_search... to pass
 	information: */
 	ulint		flag;		/* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
 					BTR_CUR_BINARY, or
 					BTR_CUR_INSERT_TO_IBUF */
 	ulint		tree_height;	/* Tree height if the search is done
 					for a pessimistic insert or update
 					operation */
 	ulint		up_match;	/* If the search mode was PAGE_CUR_LE,
 					the number of matched fields to the
 					the first user record to the right of
 					the cursor record after
 					btr_cur_search_...;
 					for the mode PAGE_CUR_GE, the matched
 					fields to the first user record AT THE
 					CURSOR or to the right of it;
 					NOTE that the up_match and low_match
 					values may exceed the correct values
 					for comparison to the adjacent user
 					record if that record is on a
 					different leaf page! (See the note in
 					row_ins_duplicate_key.) */
 	ulint		up_bytes;	/* number of matched bytes to the
 					right at the time cursor positioned;
 					only used internally in searches: not
 					defined after the search */
 	ulint		low_match;	/* if search mode was PAGE_CUR_LE,
 					the number of matched fields to the
 					first user record AT THE CURSOR or
 					to the left of it after
 					btr_cur_search_...;
 					NOT defined for PAGE_CUR_GE or any
 					other search modes; see also the NOTE
 					in up_match! */
 	ulint		low_bytes;	/* number of matched bytes to the
 					right at the time cursor positioned;
 					only used internally in searches: not
 					defined after the search */
 	ulint		n_fields;	/* prefix length used in a hash
 					search if hash_node != NULL */
 	ulint		n_bytes;	/* hash prefix bytes if hash_node !=
 					NULL */
 	ulint		fold;		/* fold value used in the search if
 					flag is BTR_CUR_HASH */
 	/*------------------------------*/
 	btr_path_t*	path_arr;	/* in estimating the number of
 					rows in range, we store in this array
 					information of the path through
 					the tree */
 };
 /* Values for the flag documenting the used search method */
 #define BTR_CUR_HASH		1	/* successful shortcut using the hash
 					index */
 #define BTR_CUR_HASH_FAIL	2	/* failure using hash, success using
 					binary search: the misleading hash
 					reference is stored in the field
 					hash_node, and might be necessary to
 					update */
 #define BTR_CUR_BINARY		3	/* success using the binary search */
 #define BTR_CUR_INSERT_TO_IBUF	4	/* performed the intended insert to
 					the insert buffer */
 /* If pessimistic delete fails because of lack of file space,
 there is still a good change of success a little later: try this many times,
 and sleep this many microseconds in between */
 #define BTR_CUR_RETRY_DELETE_N_TIMES	100
 #define BTR_CUR_RETRY_SLEEP_TIME	50000
 /* The reference in a field for which data is stored on a different page.
 The reference is at the end of the 'locally' stored part of the field.
 'Locally' means storage in the index record.
 We store locally a long enough prefix of each column so that we can determine
 the ordering parts of each index record without looking into the externally
 stored part. */
 /*--------------------------------------*/
 #define BTR_EXTERN_SPACE_ID		0	/* space id where stored */
 #define BTR_EXTERN_PAGE_NO		4	/* page no where stored */
 #define BTR_EXTERN_OFFSET		8	/* offset of BLOB header
 						on that page */
 #define BTR_EXTERN_LEN			12	/* 8 bytes containing the
 						length of the externally
 						stored part of the BLOB.
 						The 2 highest bits are
 						reserved to the flags below. */
 /*--------------------------------------*/
 /* #define BTR_EXTERN_FIELD_REF_SIZE	20 // moved to btr0types.h */
 /* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
 at lowest address) is set to 1 if this field does not 'own' the externally
 stored field; only the owner field is allowed to free the field in purge!
 If the 2nd highest bit is 1 then it means that the externally stored field
 was inherited from an earlier version of the row. In rollback we are not
 allowed to free an inherited external field. */
 #define BTR_EXTERN_OWNER_FLAG		128
 #define BTR_EXTERN_INHERITED_FLAG	64
 extern ulint	btr_cur_n_non_sea;
 extern ulint	btr_cur_n_sea;
 extern ulint	btr_cur_n_non_sea_old;
 extern ulint	btr_cur_n_sea_old;
 #ifndef UNIV_NONINL
 #include "btr0cur.ic"
 #endif
 #endif
--- a/include/btr0cur.ic
+++ b/include/btr0cur.ic
@@ -0,0 +1,184 @@
 /******************************************************
 The index tree cursor
 (c) 1994-1996 Innobase Oy
 Created 10/16/1994 Heikki Tuuri
 *******************************************************/
 #include "btr0btr.h"
 #ifdef UNIV_DEBUG
 /*************************************************************
 Returns the page cursor component of a tree cursor. */
 UNIV_INLINE
 page_cur_t*
 btr_cur_get_page_cur(
 /*=================*/
 					/* out: pointer to page cursor
 					component */
 	const btr_cur_t*	cursor)	/* in: tree cursor */
 {
 	return(&((btr_cur_t*) cursor)->page_cur);
 }
 #endif /* UNIV_DEBUG */
 /*************************************************************
 Returns the buffer block on which the tree cursor is positioned. */
 UNIV_INLINE
 buf_block_t*
 btr_cur_get_block(
 /*==============*/
 				/* out: pointer to buffer block */
 	btr_cur_t*	cursor)	/* in: tree cursor */
 {
 	return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
 }
 /*************************************************************
 Returns the record pointer of a tree cursor. */
 UNIV_INLINE
 rec_t*
 btr_cur_get_rec(
 /*============*/
 				/* out: pointer to record */
 	btr_cur_t*	cursor)	/* in: tree cursor */
 {
 	return(page_cur_get_rec(&(cursor->page_cur)));
 }
 /*************************************************************
 Returns the compressed page on which the tree cursor is positioned. */
 UNIV_INLINE
 page_zip_des_t*
 btr_cur_get_page_zip(
 /*=================*/
 				/* out: pointer to compressed page,
 				or NULL if the page is not compressed */
 	btr_cur_t*	cursor)	/* in: tree cursor */
 {
 	return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
 }
 /*************************************************************
 Invalidates a tree cursor by setting record pointer to NULL. */
 UNIV_INLINE
 void
 btr_cur_invalidate(
 /*===============*/
 	btr_cur_t*	cursor)	/* in: tree cursor */
 {
 	page_cur_invalidate(&(cursor->page_cur));
 }
 /*************************************************************
 Returns the page of a tree cursor. */
 UNIV_INLINE
 page_t*
 btr_cur_get_page(
 /*=============*/
 				/* out: pointer to page */
 	btr_cur_t*	cursor)	/* in: tree cursor */
 {
 	return(page_align(page_cur_get_rec(&(cursor->page_cur))));
 }
 /*************************************************************
 Returns the index of a cursor. */
 UNIV_INLINE
 dict_index_t*
 btr_cur_get_index(
 /*==============*/
 				/* out: index */
 	btr_cur_t*	cursor)	/* in: B-tree cursor */
 {
 	return(cursor->index);
 }
 /*************************************************************
 Positions a tree cursor at a given record. */
 UNIV_INLINE
 void
 btr_cur_position(
 /*=============*/
 	dict_index_t*	index,	/* in: index */
 	rec_t*		rec,	/* in: record in tree */
 	buf_block_t*	block,	/* in: buffer block of rec */
 	btr_cur_t*	cursor)	/* out: cursor */
 {
 	ut_ad(page_align(rec) == block->frame);
 	page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
 	cursor->index = index;
 }
 /*************************************************************************
 Checks if compressing an index page where a btr cursor is placed makes
 sense. */
 UNIV_INLINE
 ibool
 btr_cur_compress_recommendation(
 /*============================*/
 				/* out: TRUE if compression is recommended */
 	btr_cur_t*	cursor,	/* in: btr cursor */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	page_t*		page;
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
 	page = btr_cur_get_page(cursor);
 	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
 		/* The page fillfactor has dropped below a predefined
 		minimum value OR the level in the B-tree contains just
 		one page: we recommend compression if this is not the
 		root page. */
 		return(dict_index_get_page(cursor->index)
 		       != page_get_page_no(page));
 	}
 	return(FALSE);
 }
 /*************************************************************************
 Checks if the record on which the cursor is placed can be deleted without
 making tree compression necessary (or, recommended). */
 UNIV_INLINE
 ibool
 btr_cur_can_delete_without_compress(
 /*================================*/
 				/* out: TRUE if can be deleted without
 				recommended compression */
 	btr_cur_t*	cursor,	/* in: btr cursor */
 	ulint		rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
 	mtr_t*		mtr)	/* in: mtr */
 {
 	page_t*		page;
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
 	page = btr_cur_get_page(cursor);
 	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
 	    || (page_get_n_recs(page) < 2)) {
 		/* The page fillfactor will drop below a predefined
 		minimum value, OR the level in the B-tree contains just
 		one page, OR the page will become empty: we recommend
 		compression if this is not the root page. */
 		return(dict_index_get_page(cursor->index)
 		       == page_get_page_no(page));
 	}
 	return(TRUE);
 }
--- a/include/btr0pcur.h
+++ b/include/btr0pcur.h
@@ -0,0 +1,529 @@
 /******************************************************
 The index tree persistent cursor
 (c) 1996 Innobase Oy
 Created 2/23/1996 Heikki Tuuri
 *******************************************************/
 #ifndef btr0pcur_h
 #define btr0pcur_h
 #include "univ.i"
 #include "dict0dict.h"
 #include "data0data.h"
 #include "mtr0mtr.h"
 #include "page0cur.h"
 #include "btr0cur.h"
 #include "btr0btr.h"
 #include "btr0types.h"
 /* Relative positions for a stored cursor position */
 #define BTR_PCUR_ON			1
 #define BTR_PCUR_BEFORE			2
 #define BTR_PCUR_AFTER			3
 /* Note that if the tree is not empty, btr_pcur_store_position does not
 use the following, but only uses the above three alternatives, where the
 position is stored relative to a specific record: this makes implementation
 of a scroll cursor easier */
 #define BTR_PCUR_BEFORE_FIRST_IN_TREE	4	/* in an empty tree */
 #define BTR_PCUR_AFTER_LAST_IN_TREE	5	/* in an empty tree */
 /******************************************************************
 Allocates memory for a persistent cursor object and initializes the cursor. */
 UNIV_INTERN
 btr_pcur_t*
 btr_pcur_create_for_mysql(void);
 /*============================*/
 				/* out, own: persistent cursor */
 /******************************************************************
 Frees the memory for a persistent cursor object. */
 UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
 	btr_pcur_t*	cursor);	/* in, own: persistent cursor */
 /******************************************************************
 Copies the stored position of a pcur to another pcur. */
 UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
 	btr_pcur_t*	pcur_receive,	/* in: pcur which will receive the
 					position info */
 	btr_pcur_t*	pcur_donate);	/* in: pcur from which the info is
 					copied */
 /******************************************************************
 Sets the old_rec_buf field to NULL. */
 UNIV_INLINE
 void
 btr_pcur_init(
 /*==========*/
 	btr_pcur_t*	pcur);	/* in: persistent cursor */
 /******************************************************************
 Initializes and opens a persistent cursor to an index tree. It should be
 closed with btr_pcur_close. */
 UNIV_INLINE
 void
 btr_pcur_open(
 /*==========*/
 	dict_index_t*	index,	/* in: index */
 	const dtuple_t*	tuple,	/* in: tuple on which search done */
 	ulint		mode,	/* in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page from the
 				record! */
 	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
 	mtr_t*		mtr);	/* in: mtr */
 /******************************************************************
 Opens an persistent cursor to an index tree without initializing the
 cursor. */
 UNIV_INLINE
 void
 btr_pcur_open_with_no_init(
 /*=======================*/
 	dict_index_t*	index,	/* in: index */
 	const dtuple_t*	tuple,	/* in: tuple on which search done */
 	ulint		mode,	/* in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page of the
 				record! */
 	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ...;
 				NOTE that if has_search_latch != 0 then
 				we maybe do not acquire a latch on the cursor
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
 	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
 	ulint		has_search_latch,/* in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
 	mtr_t*		mtr);	/* in: mtr */
 /*********************************************************************
 Opens a persistent cursor at either end of an index. */
 UNIV_INLINE
 void
 btr_pcur_open_at_index_side(
 /*========================*/
 	ibool		from_left,	/* in: TRUE if open to the low end,
 					FALSE if to the high end */
 	dict_index_t*	index,		/* in: index */
 	ulint		latch_mode,	/* in: latch mode */
 	btr_pcur_t*	pcur,		/* in: cursor */
 	ibool		do_init,	/* in: TRUE if should be initialized */
 	mtr_t*		mtr);		/* in: mtr */
 /******************************************************************
 Gets the up_match value for a pcur after a search. */
 UNIV_INLINE
 ulint
 btr_pcur_get_up_match(
 /*==================*/
 				/* out: number of matched fields at the cursor
 				or to the right if search mode was PAGE_CUR_GE,
 				otherwise undefined */
 	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
 /******************************************************************
 Gets the low_match value for a pcur after a search. */
 UNIV_INLINE
 ulint
 btr_pcur_get_low_match(
 /*===================*/
 				/* out: number of matched fields at the cursor
 				or to the right if search mode was PAGE_CUR_LE,
 				otherwise undefined */
 	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
 /******************************************************************
 If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
 user record satisfying the search condition, in the case PAGE_CUR_L or
 PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
 UNIV_INTERN
 void
 btr_pcur_open_on_user_rec(
 /*======================*/
 	dict_index_t*	index,		/* in: index */
 	const dtuple_t*	tuple,		/* in: tuple on which search done */
 	ulint		mode,		/* in: PAGE_CUR_L, ... */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/* in: memory buffer for persistent
 					cursor */
 	mtr_t*		mtr);		/* in: mtr */
 /**************************************************************************
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INLINE
 void
 btr_pcur_open_at_rnd_pos(
 /*=====================*/
 	dict_index_t*	index,		/* in: index */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
 	mtr_t*		mtr);		/* in: mtr */
 /******************************************************************
 Frees the possible old_rec_buf buffer of a persistent cursor and sets the
 latch mode of the persistent cursor to BTR_NO_LATCHES. */
 UNIV_INLINE
 void
 btr_pcur_close(
 /*===========*/
 	btr_pcur_t*	cursor);	/* in: persistent cursor */
 /******************************************************************
 The position of the cursor is stored by taking an initial segment of the
 record the cursor is positioned on, before, or after, and copying it to the
 cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
 UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
 	btr_pcur_t*	cursor, /* in: persistent cursor */
 	mtr_t*		mtr);	/* in: mtr */
 /******************************************************************
 Restores the stored position of a persistent cursor bufferfixing the page and
 obtaining the specified latches. If the cursor position was saved when the
 (1) cursor was positioned on a user record: this function restores the position
 to the last record LESS OR EQUAL to the stored record;
 (2) cursor was positioned on a page infimum record: restores the position to
 the last record LESS than the user record which was the successor of the page
 infimum;
 (3) cursor was positioned on the page supremum: restores to the first record
 GREATER than the user record which was the predecessor of the supremum.
 (4) cursor was positioned before the first or after the last in an empty tree:
 restores to before first or after the last in the tree. */
 UNIV_INTERN
 ibool
 btr_pcur_restore_position(
 /*======================*/
 					/* out: TRUE if the cursor position
 					was stored when it was on a user record
 					and it can be restored on a user record
 					whose ordering fields are identical to
 					the ones of the original user record */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/* in: detached persistent cursor */
 	mtr_t*		mtr);		/* in: mtr */
 /******************************************************************
 If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
 releases the page latch and bufferfix reserved by the cursor.
 NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
 made by the current mini-transaction to the data protected by the
 cursor latch, as then the latch must not be released until mtr_commit. */
 UNIV_INTERN
 void
 btr_pcur_release_leaf(
 /*==================*/
 	btr_pcur_t*	cursor, /* in: persistent cursor */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Gets the rel_pos field for a cursor whose position has been stored. */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
 /*=================*/
 					/* out: BTR_PCUR_ON, ... */
 	const btr_pcur_t*	cursor);/* in: persistent cursor */
 /*************************************************************
 Sets the mtr field for a pcur. */
 UNIV_INLINE
 void
 btr_pcur_set_mtr(
 /*=============*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr);	/* in, own: mtr */
 /*************************************************************
 Gets the mtr field for a pcur. */
 UNIV_INLINE
 mtr_t*
 btr_pcur_get_mtr(
 /*=============*/
 				/* out: mtr */
 	btr_pcur_t*	cursor);	/* in: persistent cursor */
 /******************************************************************
 Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
 that is, the cursor becomes detached. If there have been modifications
 to the page where pcur is positioned, this can be used instead of
 btr_pcur_release_leaf. Function btr_pcur_store_position should be used
 before calling this, if restoration of cursor is wanted later. */
 UNIV_INLINE
 void
 btr_pcur_commit(
 /*============*/
 	btr_pcur_t*	pcur);	/* in: persistent cursor */
 /******************************************************************
 Differs from btr_pcur_commit in that we can specify the mtr to commit. */
 UNIV_INLINE
 void
 btr_pcur_commit_specify_mtr(
 /*========================*/
 	btr_pcur_t*	pcur,	/* in: persistent cursor */
 	mtr_t*		mtr);	/* in: mtr to commit */
 /******************************************************************
 Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
 UNIV_INLINE
 ibool
 btr_pcur_is_detached(
 /*=================*/
 				/* out: TRUE if detached */
 	btr_pcur_t*	pcur);	/* in: persistent cursor */
 /*************************************************************
 Moves the persistent cursor to the next record in the tree. If no records are
 left, the cursor stays 'after last in tree'. */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
 /*==================*/
 				/* out: TRUE if the cursor was not after last
 				in tree */
 	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
 				function may release the page latch */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Moves the persistent cursor to the previous record in the tree. If no records
 are left, the cursor stays 'before first in tree'. */
 UNIV_INTERN
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
 				/* out: TRUE if the cursor was not before first
 				in tree */
 	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
 				function may release the page latch */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Moves the persistent cursor to the last record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_last_on_page(
 /*==========================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Moves the persistent cursor to the next user record in the tree. If no user
 records are left, the cursor ends up 'after last in tree'. */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
 /*===========================*/
 				/* out: TRUE if the cursor moved forward,
 				ending on a user record */
 	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
 				function may release the page latch */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Moves the persistent cursor to the first record on the next page.
 Releases the latch on the current page, and bufferunfixes it.
 Note that there must not be modifications on the current page,
 as then the x-latch can be released only in mtr_commit. */
 UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor; must be on the
 				last record of the current page */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Moves the persistent cursor backward if it is on the first record
 of the page. Releases the latch on the current page, and bufferunfixes
 it. Note that to prevent a possible deadlock, the operation first
 stores the position of the cursor, releases the leaf latch, acquires
 necessary latches and restores the cursor position again before returning.
 The alphabetical position of the cursor is guaranteed to be sensible
 on return, but it may happen that the cursor is not positioned on the
 last record of any page, because the structure of the tree may have
 changed while the cursor had no latches. */
 UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor, must be on the
 				first record of the current page */
 	mtr_t*		mtr);	/* in: mtr */
 #ifdef UNIV_DEBUG
 /*************************************************************
 Returns the btr cursor component of a persistent cursor. */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
 /*=================*/
 						/* out: pointer to
 						btr cursor component */
 	const btr_pcur_t*	cursor);	/* in: persistent cursor */
 /*************************************************************
 Returns the page cursor component of a persistent cursor. */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
 /*==================*/
 						/* out: pointer to
 						page cursor component */
 	const btr_pcur_t*	cursor);	/* in: persistent cursor */
 #else /* UNIV_DEBUG */
 # define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
 # define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
 #endif /* UNIV_DEBUG */
 /*************************************************************
 Returns the page of a persistent cursor. */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
 /*==============*/
 				/* out: pointer to the page */
 	btr_pcur_t*	cursor);/* in: persistent cursor */
 /*************************************************************
 Returns the buffer block of a persistent cursor. */
 UNIV_INLINE
 buf_block_t*
 btr_pcur_get_block(
 /*===============*/
 				/* out: pointer to the block */
 	btr_pcur_t*	cursor);/* in: persistent cursor */
 /*************************************************************
 Returns the record of a persistent cursor. */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
 /*=============*/
 				/* out: pointer to the record */
 	btr_pcur_t*	cursor);/* in: persistent cursor */
 /*************************************************************
 Checks if the persistent cursor is on a user record. */
 UNIV_INLINE
 ibool
 btr_pcur_is_on_user_rec(
 /*====================*/
 	const btr_pcur_t*	cursor);/* in: persistent cursor */
 /*************************************************************
 Checks if the persistent cursor is after the last user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_on_page(
 /*===========================*/
 	const btr_pcur_t*	cursor);/* in: persistent cursor */
 /*************************************************************
 Checks if the persistent cursor is before the first user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_on_page(
 /*=============================*/
 	const btr_pcur_t*	cursor);/* in: persistent cursor */
 /*************************************************************
 Checks if the persistent cursor is before the first user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_in_tree(
 /*=============================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Checks if the persistent cursor is after the last user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_in_tree(
 /*===========================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr);	/* in: mtr */
 /*************************************************************
 Moves the persistent cursor to the next record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_next_on_page(
 /*==========================*/
 	btr_pcur_t*	cursor);/* in/out: persistent cursor */
 /*************************************************************
 Moves the persistent cursor to the previous record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_prev_on_page(
 /*==========================*/
 	btr_pcur_t*	cursor);/* in/out: persistent cursor */
 /* The persistent B-tree cursor structure. This is used mainly for SQL
 selects, updates, and deletes. */
 struct btr_pcur_struct{
 	btr_cur_t	btr_cur;	/* a B-tree cursor */
 	ulint		latch_mode;	/* see TODO note below!
 					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
 					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
 					depending on the latching state of
 					the page and tree where the cursor is
 					positioned; the last value means that
 					the cursor is not currently positioned:
 					we say then that the cursor is
 					detached; it can be restored to
 					attached if the old position was
 					stored in old_rec */
 	ulint		old_stored;	/* BTR_PCUR_OLD_STORED
 					or BTR_PCUR_OLD_NOT_STORED */
 	rec_t*		old_rec;	/* if cursor position is stored,
 					contains an initial segment of the
 					latest record cursor was positioned
 					either on, before, or after */
 	ulint		old_n_fields;	/* number of fields in old_rec */
 	ulint		rel_pos;	/* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
 					BTR_PCUR_AFTER, depending on whether
 					cursor was on, before, or after the
 					old_rec record */
 	buf_block_t*	block_when_stored;/* buffer block when the position was
 					stored */
 	ib_uint64_t	modify_clock;	/* the modify clock value of the
 					buffer block when the cursor position
 					was stored */
 	ulint		pos_state;	/* see TODO note below!
 					BTR_PCUR_IS_POSITIONED,
 					BTR_PCUR_WAS_POSITIONED,
 					BTR_PCUR_NOT_POSITIONED */
 	ulint		search_mode;	/* PAGE_CUR_G, ... */
 	trx_t*		trx_if_known;	/* the transaction, if we know it;
 					otherwise this field is not defined;
 					can ONLY BE USED in error prints in
 					fatal assertion failures! */
 	/*-----------------------------*/
 	/* NOTE that the following fields may possess dynamically allocated
 	memory which should be freed if not needed anymore! */
 	mtr_t*		mtr;		/* NULL, or this field may contain
 					a mini-transaction which holds the
 					latch on the cursor page */
 	byte*		old_rec_buf;	/* NULL, or a dynamically allocated
 					buffer for old_rec */
 	ulint		buf_size;	/* old_rec_buf size if old_rec_buf
 					is not NULL */
 };
 #define BTR_PCUR_IS_POSITIONED	1997660512	/* TODO: currently, the state
 						can be BTR_PCUR_IS_POSITIONED,
 						though it really should be
 						BTR_PCUR_WAS_POSITIONED,
 						because we have no obligation
 						to commit the cursor with
 						mtr; similarly latch_mode may
 						be out of date. This can
 						lead to problems if btr_pcur
 						is not used the right way;
 						all current code should be
 						ok. */
 #define BTR_PCUR_WAS_POSITIONED	1187549791
 #define BTR_PCUR_NOT_POSITIONED 1328997689
 #define BTR_PCUR_OLD_STORED	908467085
 #define BTR_PCUR_OLD_NOT_STORED	122766467
 #ifndef UNIV_NONINL
 #include "btr0pcur.ic"
 #endif
 #endif
--- a/include/btr0pcur.ic
+++ b/include/btr0pcur.ic
@@ -0,0 +1,640 @@
 /******************************************************
 The index tree persistent cursor
 (c) 1996 Innobase Oy
 Created 2/23/1996 Heikki Tuuri
 *******************************************************/
 /*************************************************************
 Gets the rel_pos field for a cursor whose position has been stored. */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
 /*=================*/
 					/* out: BTR_PCUR_ON, ... */
 	const btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor);
 	ut_ad(cursor->old_rec);
 	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
 	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
 	      || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	return(cursor->rel_pos);
 }
 /*************************************************************
 Sets the mtr field for a pcur. */
 UNIV_INLINE
 void
 btr_pcur_set_mtr(
 /*=============*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr)	/* in, own: mtr */
 {
 	ut_ad(cursor);
 	cursor->mtr = mtr;
 }
 /*************************************************************
 Gets the mtr field for a pcur. */
 UNIV_INLINE
 mtr_t*
 btr_pcur_get_mtr(
 /*=============*/
 				/* out: mtr */
 	btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor);
 	return(cursor->mtr);
 }
 #ifdef UNIV_DEBUG
 /*************************************************************
 Returns the btr cursor component of a persistent cursor. */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
 /*=================*/
 					/* out: pointer to
 					btr cursor component */
 	const btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	const btr_cur_t*	btr_cur = &cursor->btr_cur;
 	return((btr_cur_t*) btr_cur);
 }
 /*************************************************************
 Returns the page cursor component of a persistent cursor. */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
 /*==================*/
 					/* out: pointer to page cursor
 					component */
 	const btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
 }
 #endif /* UNIV_DEBUG */
 /*************************************************************
 Returns the page of a persistent cursor. */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
 /*==============*/
 				/* out: pointer to the page */
 	btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
 }
 /*************************************************************
 Returns the buffer block of a persistent cursor. */
 UNIV_INLINE
 buf_block_t*
 btr_pcur_get_block(
 /*===============*/
 				/* out: pointer to the block */
 	btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
 }
 /*************************************************************
 Returns the record of a persistent cursor. */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
 /*=============*/
 				/* out: pointer to the record */
 	btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
 }
 /******************************************************************
 Gets the up_match value for a pcur after a search. */
 UNIV_INLINE
 ulint
 btr_pcur_get_up_match(
 /*==================*/
 				/* out: number of matched fields at the cursor
 				or to the right if search mode was PAGE_CUR_GE,
 				otherwise undefined */
 	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
 {
 	btr_cur_t*	btr_cursor;
 	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
 	      || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 	ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
 	return(btr_cursor->up_match);
 }
 /******************************************************************
 Gets the low_match value for a pcur after a search. */
 UNIV_INLINE
 ulint
 btr_pcur_get_low_match(
 /*===================*/
 				/* out: number of matched fields at the cursor
 				or to the right if search mode was PAGE_CUR_LE,
 				otherwise undefined */
 	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
 {
 	btr_cur_t*	btr_cursor;
 	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
 	      || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 	ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
 	return(btr_cursor->low_match);
 }
 /*************************************************************
 Checks if the persistent cursor is after the last user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_on_page(
 /*===========================*/
 	const btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
 }
 /*************************************************************
 Checks if the persistent cursor is before the first user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_on_page(
 /*=============================*/
 	const btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
 }
 /*************************************************************
 Checks if the persistent cursor is on a user record. */
 UNIV_INLINE
 ibool
 btr_pcur_is_on_user_rec(
 /*====================*/
 	const btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	if (btr_pcur_is_before_first_on_page(cursor)
 	    || btr_pcur_is_after_last_on_page(cursor)) {
 		return(FALSE);
 	}
 	return(TRUE);
 }
 /*************************************************************
 Checks if the persistent cursor is before the first user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_in_tree(
 /*=============================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
 		return(FALSE);
 	}
 	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
 }
 /*************************************************************
 Checks if the persistent cursor is after the last user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_in_tree(
 /*===========================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
 		return(FALSE);
 	}
 	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
 }
 /*************************************************************
 Moves the persistent cursor to the next record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_next_on_page(
 /*==========================*/
 	btr_pcur_t*	cursor)	/* in/out: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 /*************************************************************
 Moves the persistent cursor to the previous record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_prev_on_page(
 /*==========================*/
 	btr_pcur_t*	cursor)	/* in/out: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 /*************************************************************
 Moves the persistent cursor to the last record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_last_on_page(
 /*==========================*/
 	btr_pcur_t*	cursor,	/* in: persistent cursor */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	UT_NOT_USED(mtr);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	page_cur_set_after_last(btr_pcur_get_block(cursor),
 				btr_pcur_get_page_cur(cursor));
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 /*************************************************************
 Moves the persistent cursor to the next user record in the tree. If no user
 records are left, the cursor ends up 'after last in tree'. */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
 /*===========================*/
 				/* out: TRUE if the cursor moved forward,
 				ending on a user record */
 	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
 				function may release the page latch */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 loop:
 	if (btr_pcur_is_after_last_on_page(cursor)) {
 		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
 			return(FALSE);
 		}
 		btr_pcur_move_to_next_page(cursor, mtr);
 	} else {
 		btr_pcur_move_to_next_on_page(cursor);
 	}
 	if (btr_pcur_is_on_user_rec(cursor)) {
 		return(TRUE);
 	}
 	goto loop;
 }
 /*************************************************************
 Moves the persistent cursor to the next record in the tree. If no records are
 left, the cursor stays 'after last in tree'. */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
 /*==================*/
 				/* out: TRUE if the cursor was not after last
 				in tree */
 	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
 				function may release the page latch */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	if (btr_pcur_is_after_last_on_page(cursor)) {
 		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
 			return(FALSE);
 		}
 		btr_pcur_move_to_next_page(cursor, mtr);
 		return(TRUE);
 	}
 	btr_pcur_move_to_next_on_page(cursor);
 	return(TRUE);
 }
 /******************************************************************
 Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
 that is, the cursor becomes detached. If there have been modifications
 to the page where pcur is positioned, this can be used instead of
 btr_pcur_release_leaf. Function btr_pcur_store_position should be used
 before calling this, if restoration of cursor is wanted later. */
 UNIV_INLINE
 void
 btr_pcur_commit(
 /*============*/
 	btr_pcur_t*	pcur)	/* in: persistent cursor */
 {
 	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
 	pcur->latch_mode = BTR_NO_LATCHES;
 	mtr_commit(pcur->mtr);
 	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 /******************************************************************
 Differs from btr_pcur_commit in that we can specify the mtr to commit. */
 UNIV_INLINE
 void
 btr_pcur_commit_specify_mtr(
 /*========================*/
 	btr_pcur_t*	pcur,	/* in: persistent cursor */
 	mtr_t*		mtr)	/* in: mtr to commit */
 {
 	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
 	pcur->latch_mode = BTR_NO_LATCHES;
 	mtr_commit(mtr);
 	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 /******************************************************************
 Sets the pcur latch mode to BTR_NO_LATCHES. */
 UNIV_INLINE
 void
 btr_pcur_detach(
 /*============*/
 	btr_pcur_t*	pcur)	/* in: persistent cursor */
 {
 	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
 	pcur->latch_mode = BTR_NO_LATCHES;
 	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 /******************************************************************
 Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
 UNIV_INLINE
 ibool
 btr_pcur_is_detached(
 /*=================*/
 				/* out: TRUE if detached */
 	btr_pcur_t*	pcur)	/* in: persistent cursor */
 {
 	if (pcur->latch_mode == BTR_NO_LATCHES) {
 		return(TRUE);
 	}
 	return(FALSE);
 }
 /******************************************************************
 Sets the old_rec_buf field to NULL. */
 UNIV_INLINE
 void
 btr_pcur_init(
 /*==========*/
 	btr_pcur_t*	pcur)	/* in: persistent cursor */
 {
 	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	pcur->old_rec_buf = NULL;
 	pcur->old_rec = NULL;
 }
 /******************************************************************
 Initializes and opens a persistent cursor to an index tree. It should be
 closed with btr_pcur_close. */
 UNIV_INLINE
 void
 btr_pcur_open(
 /*==========*/
 	dict_index_t*	index,	/* in: index */
 	const dtuple_t*	tuple,	/* in: tuple on which search done */
 	ulint		mode,	/* in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page from the
 				record! */
 	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	btr_cur_t*	btr_cursor;
 	/* Initialize the cursor */
 	btr_pcur_init(cursor);
 	cursor->latch_mode = latch_mode;
 	cursor->search_mode = mode;
 	/* Search with the tree cursor */
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
 				    btr_cursor, 0, mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 	cursor->trx_if_known = NULL;
 }
 /******************************************************************
 Opens an persistent cursor to an index tree without initializing the
 cursor. */
 UNIV_INLINE
 void
 btr_pcur_open_with_no_init(
 /*=======================*/
 	dict_index_t*	index,	/* in: index */
 	const dtuple_t*	tuple,	/* in: tuple on which search done */
 	ulint		mode,	/* in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page of the
 				record! */
 	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ...;
 				NOTE that if has_search_latch != 0 then
 				we maybe do not acquire a latch on the cursor
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
 	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
 	ulint		has_search_latch,/* in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	btr_cur_t*	btr_cursor;
 	cursor->latch_mode = latch_mode;
 	cursor->search_mode = mode;
 	/* Search with the tree cursor */
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
 				    btr_cursor, has_search_latch, mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	cursor->trx_if_known = NULL;
 }
 /*********************************************************************
 Opens a persistent cursor at either end of an index. */
 UNIV_INLINE
 void
 btr_pcur_open_at_index_side(
 /*========================*/
 	ibool		from_left,	/* in: TRUE if open to the low end,
 					FALSE if to the high end */
 	dict_index_t*	index,		/* in: index */
 	ulint		latch_mode,	/* in: latch mode */
 	btr_pcur_t*	pcur,		/* in: cursor */
 	ibool		do_init,	/* in: TRUE if should be initialized */
 	mtr_t*		mtr)		/* in: mtr */
 {
 	pcur->latch_mode = latch_mode;
 	if (from_left) {
 		pcur->search_mode = PAGE_CUR_G;
 	} else {
 		pcur->search_mode = PAGE_CUR_L;
 	}
 	if (do_init) {
 		btr_pcur_init(pcur);
 	}
 	btr_cur_open_at_index_side(from_left, index, latch_mode,
 				   btr_pcur_get_btr_cur(pcur), mtr);
 	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
 	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	pcur->trx_if_known = NULL;
 }
 /**************************************************************************
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INLINE
 void
 btr_pcur_open_at_rnd_pos(
 /*=====================*/
 	dict_index_t*	index,		/* in: index */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
 	mtr_t*		mtr)		/* in: mtr */
 {
 	/* Initialize the cursor */
 	cursor->latch_mode = latch_mode;
 	cursor->search_mode = PAGE_CUR_G;
 	btr_pcur_init(cursor);
 	btr_cur_open_at_rnd_pos(index, latch_mode,
 				btr_pcur_get_btr_cur(cursor), mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	cursor->trx_if_known = NULL;
 }
 /******************************************************************
 Frees the possible memory heap of a persistent cursor and sets the latch
 mode of the persistent cursor to BTR_NO_LATCHES. */
 UNIV_INLINE
 void
 btr_pcur_close(
 /*===========*/
 	btr_pcur_t*	cursor)	/* in: persistent cursor */
 {
 	if (cursor->old_rec_buf != NULL) {
 		mem_free(cursor->old_rec_buf);
 		cursor->old_rec = NULL;
 		cursor->old_rec_buf = NULL;
 	}
 	cursor->btr_cur.page_cur.rec = NULL;
 	cursor->btr_cur.page_cur.block = NULL;
 	cursor->old_rec = NULL;
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	cursor->latch_mode = BTR_NO_LATCHES;
 	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
 	cursor->trx_if_known = NULL;
 }
--- a/include/btr0sea.h
+++ b/include/btr0sea.h
@@ -0,0 +1,265 @@
 /************************************************************************
 The index tree adaptive search
 (c) 1996 Innobase Oy
 Created 2/17/1996 Heikki Tuuri
 *************************************************************************/
 #ifndef btr0sea_h
 #define btr0sea_h
 #include "univ.i"
 #include "rem0rec.h"
 #include "dict0dict.h"
 #include "btr0types.h"
 #include "mtr0mtr.h"
 #include "ha0ha.h"
 /*********************************************************************
 Creates and initializes the adaptive search system at a database start. */
 UNIV_INTERN
 void
 btr_search_sys_create(
 /*==================*/
 	ulint	hash_size);	/* in: hash index hash table size */
 /************************************************************************
 Disable the adaptive hash search system and empty the index. */
 UNIV_INTERN
 void
 btr_search_disable(void);
 /*====================*/
 /************************************************************************
 Enable the adaptive hash search system. */
 UNIV_INTERN
 void
 btr_search_enable(void);
 /*====================*/
 /************************************************************************
 Returns search info for an index. */
 UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
 /*================*/
 				/* out: search info; search mutex reserved */
 	dict_index_t*	index);	/* in: index */
 /*********************************************************************
 Creates and initializes a search info struct. */
 UNIV_INTERN
 btr_search_t*
 btr_search_info_create(
 /*===================*/
 				/* out, own: search info struct */
 	mem_heap_t*	heap);	/* in: heap where created */
 /*************************************************************************
 Updates the search info. */
 UNIV_INLINE
 void
 btr_search_info_update(
 /*===================*/
 	dict_index_t*	index,	/* in: index of the cursor */
 	btr_cur_t*	cursor);/* in: cursor which was just positioned */
 /**********************************************************************
 Tries to guess the right search position based on the hash search info
 of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
 and the function returns TRUE, then cursor->up_match and cursor->low_match
 both have sensible values. */
 UNIV_INTERN
 ibool
 btr_search_guess_on_hash(
 /*=====================*/
 					/* out: TRUE if succeeded */
 	dict_index_t*	index,		/* in: index */
 	btr_search_t*	info,		/* in: index search info */
 	const dtuple_t*	tuple,		/* in: logical record */
 	ulint		mode,		/* in: PAGE_CUR_L, ... */
 	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
 	btr_cur_t*	cursor,		/* out: tree cursor */
 	ulint		has_search_latch,/* in: latch mode the caller
 					currently has on btr_search_latch:
 					RW_S_LATCH, RW_X_LATCH, or 0 */
 	mtr_t*		mtr);		/* in: mtr */
 /************************************************************************
 Moves or deletes hash entries for moved records. If new_page is already hashed,
 then the hash index for page, if any, is dropped. If new_page is not hashed,
 and page is hashed, then a new hash index is built to new_page with the same
 parameters as page (this often happens when a page is split). */
 UNIV_INTERN
 void
 btr_search_move_or_delete_hash_entries(
 /*===================================*/
 	buf_block_t*	new_block,	/* in: records are copied
 					to this page */
 	buf_block_t*	block,		/* in: index page from which
 					records were copied, and the
 					copied records will be deleted
 					from this page */
 	dict_index_t*	index);		/* in: record descriptor */
 /************************************************************************
 Drops a page hash index. */
 UNIV_INTERN
 void
 btr_search_drop_page_hash_index(
 /*============================*/
 	buf_block_t*	block);	/* in: block containing index page,
 				s- or x-latched, or an index page
 				for which we know that
 				block->buf_fix_count == 0 */
 /************************************************************************
 Drops a page hash index when a page is freed from a fseg to the file system.
 Drops possible hash index if the page happens to be in the buffer pool. */
 UNIV_INTERN
 void
 btr_search_drop_page_hash_when_freed(
 /*=================================*/
 	ulint	space,		/* in: space id */
 	ulint	zip_size,	/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	page_no);	/* in: page number */
 /************************************************************************
 Updates the page hash index when a single record is inserted on a page. */
 UNIV_INTERN
 void
 btr_search_update_hash_node_on_insert(
 /*==================================*/
 	btr_cur_t*	cursor);/* in: cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
 /************************************************************************
 Updates the page hash index when a single record is inserted on a page. */
 UNIV_INTERN
 void
 btr_search_update_hash_on_insert(
 /*=============================*/
 	btr_cur_t*	cursor);/* in: cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
 /************************************************************************
 Updates the page hash index when a single record is deleted from a page. */
 UNIV_INTERN
 void
 btr_search_update_hash_on_delete(
 /*=============================*/
 	btr_cur_t*	cursor);/* in: cursor which was positioned on the
 				record to delete using btr_cur_search_...,
 				the record is not yet deleted */
 /************************************************************************
 Validates the search system. */
 UNIV_INTERN
 ibool
 btr_search_validate(void);
 /*======================*/
 				/* out: TRUE if ok */
 /* Flag: has the search system been disabled? */
 extern ibool btr_search_disabled;
 /* The search info struct in an index */
 struct btr_search_struct{
 	/* The following fields are not protected by any latch.
 	Unfortunately, this means that they must be aligned to
 	the machine word, i.e., they cannot be turned into bit-fields. */
 	buf_block_t* root_guess;/* the root page frame when it was last time
 				fetched, or NULL */
 	ulint	hash_analysis;	/* when this exceeds BTR_SEARCH_HASH_ANALYSIS,
 				the hash analysis starts; this is reset if no
 				success noticed */
 	ibool	last_hash_succ;	/* TRUE if the last search would have
 				succeeded, or did succeed, using the hash
 				index; NOTE that the value here is not exact:
 				it is not calculated for every search, and the
 				calculation itself is not always accurate! */
 	ulint	n_hash_potential;
 				/* number of consecutive searches
 				which would have succeeded, or did succeed,
 				using the hash index;
 				the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
 	/*----------------------*/
 	ulint	n_fields;	/* recommended prefix length for hash search:
 				number of full fields */
 	ulint	n_bytes;	/* recommended prefix: number of bytes in
 				an incomplete field;
 				see also BTR_PAGE_MAX_REC_SIZE */
 	ibool	left_side;	/* TRUE or FALSE, depending on whether
 				the leftmost record of several records with
 				the same prefix should be indexed in the
 				hash index */
 	/*----------------------*/
 #ifdef UNIV_SEARCH_PERF_STAT
 	ulint	n_hash_succ;	/* number of successful hash searches thus
 				far */
 	ulint	n_hash_fail;	/* number of failed hash searches */
 	ulint	n_patt_succ;	/* number of successful pattern searches thus
 				far */
 	ulint	n_searches;	/* number of searches */
 #endif /* UNIV_SEARCH_PERF_STAT */
 #ifdef UNIV_DEBUG
 	ulint	magic_n;	/* magic number */
 # define BTR_SEARCH_MAGIC_N	1112765
 #endif /* UNIV_DEBUG */
 };
 /* The hash index system */
 typedef struct btr_search_sys_struct	btr_search_sys_t;
 struct btr_search_sys_struct{
 	hash_table_t*	hash_index;
 };
 extern btr_search_sys_t*	btr_search_sys;
 /* The latch protecting the adaptive search system: this latch protects the
 (1) hash index;
 (2) columns of a record to which we have a pointer in the hash index;
 but does NOT protect:
 (3) next record offset field in a record;
 (4) next or previous records on the same page.
 Bear in mind (3) and (4) when using the hash index.
 */
 extern rw_lock_t*	btr_search_latch_temp;
 #define btr_search_latch	(*btr_search_latch_temp)
 #ifdef UNIV_SEARCH_PERF_STAT
 extern ulint	btr_search_n_succ;
 extern ulint	btr_search_n_hash_fail;
 #endif /* UNIV_SEARCH_PERF_STAT */
 /* After change in n_fields or n_bytes in info, this many rounds are waited
 before starting the hash analysis again: this is to save CPU time when there
 is no hope in building a hash index. */
 #define BTR_SEARCH_HASH_ANALYSIS	17
 /* Limit of consecutive searches for trying a search shortcut on the search
 pattern */
 #define BTR_SEARCH_ON_PATTERN_LIMIT	3
 /* Limit of consecutive searches for trying a search shortcut using the hash
 index */
 #define BTR_SEARCH_ON_HASH_LIMIT	3
 /* We do this many searches before trying to keep the search latch over calls
 from MySQL. If we notice someone waiting for the latch, we again set this
 much timeout. This is to reduce contention. */
 #define BTR_SEA_TIMEOUT			10000
 #ifndef UNIV_NONINL
 #include "btr0sea.ic"
 #endif
 #endif
--- a/include/btr0sea.ic
+++ b/include/btr0sea.ic
@@ -0,0 +1,67 @@
 /************************************************************************
 The index tree adaptive search
 (c) 1996 Innobase Oy
 Created 2/17/1996 Heikki Tuuri
 *************************************************************************/
 #include "dict0mem.h"
 #include "btr0cur.h"
 #include "buf0buf.h"
 /*************************************************************************
 Updates the search info. */
 UNIV_INTERN
 void
 btr_search_info_update_slow(
 /*========================*/
 	btr_search_t*	info,	/* in/out: search info */
 	btr_cur_t*	cursor);/* in: cursor which was just positioned */
 /************************************************************************
 Returns search info for an index. */
 UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
 /*================*/
 				/* out: search info; search mutex reserved */
 	dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	return(index->search_info);
 }
 /*************************************************************************
 Updates the search info. */
 UNIV_INLINE
 void
 btr_search_info_update(
 /*===================*/
 	dict_index_t*	index,	/* in: index of the cursor */
 	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
 {
 	btr_search_t*	info;
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 	info = btr_search_get_info(index);
 	info->hash_analysis++;
 	if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
 		/* Do nothing */
 		return;
 	}
 	ut_ad(cursor->flag != BTR_CUR_HASH);
 	btr_search_info_update_slow(info, cursor);
 }
--- a/include/btr0types.h
+++ b/include/btr0types.h
@@ -0,0 +1,31 @@
 /************************************************************************
 The index tree general types
 (c) 1996 Innobase Oy
 Created 2/17/1996 Heikki Tuuri
 *************************************************************************/
 #ifndef btr0types_h
 #define btr0types_h
 #include "univ.i"
 #include "rem0types.h"
 #include "page0types.h"
 typedef struct btr_pcur_struct		btr_pcur_t;
 typedef struct btr_cur_struct		btr_cur_t;
 typedef struct btr_search_struct	btr_search_t;
 /* The size of a reference to data stored on a different page.
 The reference is stored at the end of the prefix of the field
 in the index record. */
 #define BTR_EXTERN_FIELD_REF_SIZE	20
 /* A BLOB field reference full of zero, for use in assertions and tests.
 Initially, BLOB field references are set to zero, in
 dtuple_convert_big_rec(). */
 extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
 #endif
--- a/include/buf0buddy.h
+++ b/include/buf0buddy.h
@@ -0,0 +1,73 @@
 /******************************************************
 Binary buddy allocator for compressed pages
 (c) 2006 Innobase Oy
 Created December 2006 by Marko Makela
 *******************************************************/
 #ifndef buf0buddy_h
 #define buf0buddy_h
 #ifdef UNIV_MATERIALIZE
 # undef UNIV_INLINE
 # define UNIV_INLINE
 #endif
 #include "univ.i"
 #include "buf0types.h"
 /**************************************************************************
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any
 block->mutex.  The buf_pool_mutex may only be released and reacquired
 if lru != NULL.  This function should only be used for allocating
 compressed page frames or control blocks (buf_page_t).  Allocated
 control blocks must be properly initialized immediately after
 buf_buddy_alloc() has returned the memory, before releasing
 buf_pool_mutex. */
 UNIV_INLINE
 void*
 buf_buddy_alloc(
 /*============*/
 			/* out: allocated block,
 			possibly NULL if lru == NULL */
 	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
 	ibool*	lru)	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
 	__attribute__((malloc));
 /**************************************************************************
 Release a block. */
 UNIV_INLINE
 void
 buf_buddy_free(
 /*===========*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
 	ulint	size)	/* in: block size, up to UNIV_PAGE_SIZE */
 	__attribute__((nonnull));
 /** Statistics of buddy blocks of a given size. */
 struct buf_buddy_stat_struct {
 	/** Number of blocks allocated from the buddy system. */
 	ulint		used;
 	/** Number of blocks relocated by the buddy system. */
 	ib_uint64_t	relocated;
 	/** Total duration of block relocations, in microseconds. */
 	ib_uint64_t	relocated_usec;
 };
 typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
 /** Statistics of the buddy system, indexed by block size.
 Protected by buf_pool_mutex. */
 extern buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
 #ifndef UNIV_NONINL
 # include "buf0buddy.ic"
 #endif
 #endif /* buf0buddy_h */
--- a/include/buf0buddy.ic
+++ b/include/buf0buddy.ic
@@ -0,0 +1,112 @@
 /******************************************************
 Binary buddy allocator for compressed pages
 (c) 2006 Innobase Oy
 Created December 2006 by Marko Makela
 *******************************************************/
 #ifdef UNIV_MATERIALIZE
 # undef UNIV_INLINE
 # define UNIV_INLINE
 #endif
 #include "buf0buf.h"
 #include "buf0buddy.h"
 #include "ut0ut.h"
 #include "sync0sync.h"
 /**************************************************************************
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
 The buf_pool_mutex may only be released and reacquired if lru != NULL. */
 UNIV_INTERN
 void*
 buf_buddy_alloc_low(
 /*================*/
 			/* out: allocated block,
 			possibly NULL if lru==NULL */
 	ulint	i,	/* in: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
 	ibool*	lru)	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
 	__attribute__((malloc));
 /**************************************************************************
 Deallocate a block. */
 UNIV_INTERN
 void
 buf_buddy_free_low(
 /*===============*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
 	ulint	i)	/* in: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
 	__attribute__((nonnull));
 /**************************************************************************
 Get the index of buf_pool->zip_free[] for a given block size. */
 UNIV_INLINE
 ulint
 buf_buddy_get_slot(
 /*===============*/
 			/* out: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
 	ulint	size)	/* in: block size */
 {
 	ulint	i;
 	ulint	s;
 	for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1);
 	ut_ad(i <= BUF_BUDDY_SIZES);
 	return(i);
 }
 /**************************************************************************
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any
 block->mutex.  The buf_pool_mutex may only be released and reacquired
 if lru != NULL.  This function should only be used for allocating
 compressed page frames or control blocks (buf_page_t).  Allocated
 control blocks must be properly initialized immediately after
 buf_buddy_alloc() has returned the memory, before releasing
 buf_pool_mutex. */
 UNIV_INLINE
 void*
 buf_buddy_alloc(
 /*============*/
 			/* out: allocated block,
 			possibly NULL if lru == NULL */
 	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
 	ibool*	lru)	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
 {
 	ut_ad(buf_pool_mutex_own());
 	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
 }
 /**************************************************************************
 Deallocate a block. */
 UNIV_INLINE
 void
 buf_buddy_free(
 /*===========*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
 	ulint	size)	/* in: block size, up to UNIV_PAGE_SIZE */
 {
 	ut_ad(buf_pool_mutex_own());
 	buf_buddy_free_low(buf, buf_buddy_get_slot(size));
 }
 #ifdef UNIV_MATERIALIZE
 # undef UNIV_INLINE
 # define UNIV_INLINE	UNIV_INLINE_ORIGINAL
 #endif
--- a/include/buf0buf.h
+++ b/include/buf0buf.h
--- a/include/buf0buf.ic
+++ b/include/buf0buf.ic
--- a/include/buf0flu.h
+++ b/include/buf0flu.h
@@ -0,0 +1,141 @@
 /******************************************************
 The database buffer pool flush algorithm
 (c) 1995 Innobase Oy
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 #ifndef buf0flu_h
 #define buf0flu_h
 #include "univ.i"
 #include "buf0types.h"
 #include "ut0byte.h"
 #include "mtr0types.h"
 /************************************************************************
 Inserts a modified block into the flush list. */
 UNIV_INTERN
 void
 buf_flush_insert_into_flush_list(
 /*=============================*/
 	buf_page_t*	bpage);	/* in: block which is modified */
 /************************************************************************
 Remove a block from the flush list of modified blocks. */
 UNIV_INTERN
 void
 buf_flush_remove(
 /*=============*/
 	buf_page_t*	bpage);	/* in: pointer to the block in question */
 /************************************************************************
 Updates the flush system data structures when a write is completed. */
 UNIV_INTERN
 void
 buf_flush_write_complete(
 /*=====================*/
 	buf_page_t*	bpage);	/* in: pointer to the block in question */
 /*************************************************************************
 Flushes pages from the end of the LRU list if there is too small
 a margin of replaceable pages there. */
 UNIV_INTERN
 void
 buf_flush_free_margin(void);
 /*=======================*/
 /************************************************************************
 Initializes a page for writing to the tablespace. */
 UNIV_INTERN
 void
 buf_flush_init_for_writing(
 /*=======================*/
 	byte*		page,		/* in/out: page */
 	void*		page_zip_,	/* in/out: compressed page, or NULL */
 	ib_uint64_t	newest_lsn);	/* in: newest modification lsn
 					to the page */
 /***********************************************************************
 This utility flushes dirty blocks from the end of the LRU list or flush_list.
 NOTE 1: in the case of an LRU flush the calling thread may own latches to
 pages: to avoid deadlocks, this function must be written so that it cannot
 end up waiting for these latches! NOTE 2: in the case of a flush list flush,
 the calling thread is not allowed to own any latches on pages! */
 UNIV_INTERN
 ulint
 buf_flush_batch(
 /*============*/
 					/* out: number of blocks for which the
 					write request was queued;
 					ULINT_UNDEFINED if there was a flush
 					of the same type already running */
 	enum buf_flush	flush_type,	/* in: BUF_FLUSH_LRU or
 					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
 					then the caller must not own any
 					latches on pages */
 	ulint		min_n,		/* in: wished minimum mumber of blocks
 					flushed (it is not guaranteed that the
 					actual number is that big, though) */
 	ib_uint64_t	lsn_limit);	/* in the case BUF_FLUSH_LIST all
 					blocks whose oldest_modification is
 					smaller than this should be flushed
 					(if their number does not exceed
 					min_n), otherwise ignored */
 /**********************************************************************
 Waits until a flush batch of the given type ends */
 UNIV_INTERN
 void
 buf_flush_wait_batch_end(
 /*=====================*/
 	enum buf_flush	type);	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 /************************************************************************
 This function should be called at a mini-transaction commit, if a page was
 modified in it. Puts the block to the list of modified blocks, if it not
 already in it. */
 UNIV_INLINE
 void
 buf_flush_note_modification(
 /*========================*/
 	buf_block_t*	block,	/* in: block which is modified */
 	mtr_t*		mtr);	/* in: mtr */
 /************************************************************************
 This function should be called when recovery has modified a buffer page. */
 UNIV_INLINE
 void
 buf_flush_recv_note_modification(
 /*=============================*/
 	buf_block_t*	block,		/* in: block which is modified */
 	ib_uint64_t	start_lsn,	/* in: start lsn of the first mtr in a
 					set of mtr's */
 	ib_uint64_t	end_lsn);	/* in: end lsn of the last mtr in the
 					set of mtr's */
 /************************************************************************
 Returns TRUE if the file page block is immediately suitable for replacement,
 i.e., transition FILE_PAGE => NOT_USED allowed. */
 UNIV_INTERN
 ibool
 buf_flush_ready_for_replace(
 /*========================*/
 				/* out: TRUE if can replace immediately */
 	buf_page_t*	bpage);	/* in: buffer control block, must be
 				buf_page_in_file(bpage) and in the LRU list */
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************
 Validates the flush list. */
 UNIV_INTERN
 ibool
 buf_flush_validate(void);
 /*====================*/
 		/* out: TRUE if ok */
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 /* When buf_flush_free_margin is called, it tries to make this many blocks
 available to replacement in the free list and at the end of the LRU list (to
 make sure that a read-ahead batch can be read efficiently in a single
 sweep). */
 #define BUF_FLUSH_FREE_BLOCK_MARGIN	(5 + BUF_READ_AHEAD_AREA)
 #define BUF_FLUSH_EXTRA_MARGIN		(BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
 #ifndef UNIV_NONINL
 #include "buf0flu.ic"
 #endif
 #endif
--- a/include/buf0flu.ic
+++ b/include/buf0flu.ic
@@ -0,0 +1,97 @@
 /******************************************************
 The database buffer pool flush algorithm
 (c) 1995 Innobase Oy
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 #include "buf0buf.h"
 #include "mtr0mtr.h"
 /************************************************************************
 Inserts a modified block into the flush list in the right sorted position.
 This function is used by recovery, because there the modifications do not
 necessarily come in the order of lsn's. */
 UNIV_INTERN
 void
 buf_flush_insert_sorted_into_flush_list(
 /*====================================*/
 	buf_page_t*	bpage);	/* in: block which is modified */
 /************************************************************************
 This function should be called at a mini-transaction commit, if a page was
 modified in it. Puts the block to the list of modified blocks, if it is not
 already in it. */
 UNIV_INLINE
 void
 buf_flush_note_modification(
 /*========================*/
 	buf_block_t*	block,	/* in: block which is modified */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ut_ad(block);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(mtr->start_lsn != 0);
 	ut_ad(mtr->modifications);
 	ut_ad(block->page.newest_modification <= mtr->end_lsn);
 	block->page.newest_modification = mtr->end_lsn;
 	if (!block->page.oldest_modification) {
 		block->page.oldest_modification = mtr->start_lsn;
 		ut_ad(block->page.oldest_modification != 0);
 		buf_flush_insert_into_flush_list(&block->page);
 	} else {
 		ut_ad(block->page.oldest_modification <= mtr->start_lsn);
 	}
 	++srv_buf_pool_write_requests;
 }
 /************************************************************************
 This function should be called when recovery has modified a buffer page. */
 UNIV_INLINE
 void
 buf_flush_recv_note_modification(
 /*=============================*/
 	buf_block_t*	block,		/* in: block which is modified */
 	ib_uint64_t	start_lsn,	/* in: start lsn of the first mtr in a
 					set of mtr's */
 	ib_uint64_t	end_lsn)	/* in: end lsn of the last mtr in the
 					set of mtr's */
 {
 	ut_ad(block);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 	buf_pool_mutex_enter();
 	ut_ad(block->page.newest_modification <= end_lsn);
 	block->page.newest_modification = end_lsn;
 	if (!block->page.oldest_modification) {
 		block->page.oldest_modification = start_lsn;
 		ut_ad(block->page.oldest_modification != 0);
 		buf_flush_insert_sorted_into_flush_list(&block->page);
 	} else {
 		ut_ad(block->page.oldest_modification <= start_lsn);
 	}
 	buf_pool_mutex_exit();
 }
--- a/include/buf0lru.h
+++ b/include/buf0lru.h
@@ -0,0 +1,243 @@
 /******************************************************
 The database buffer pool LRU replacement algorithm
 (c) 1995 Innobase Oy
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 #ifndef buf0lru_h
 #define buf0lru_h
 #include "univ.i"
 #include "ut0byte.h"
 #include "buf0types.h"
 /** The return type of buf_LRU_free_block() */
 enum buf_lru_free_block_status {
 	/** freed */
 	BUF_LRU_FREED = 0,
 	/** not freed because the caller asked to remove the
 	uncompressed frame but the control block cannot be
 	relocated */
 	BUF_LRU_CANNOT_RELOCATE,
 	/** not freed because of some other reason */
 	BUF_LRU_NOT_FREED
 };
 /**********************************************************************
 Tries to remove LRU flushed blocks from the end of the LRU list and put them
 to the free list. This is beneficial for the efficiency of the insert buffer
 operation, as flushed pages from non-unique non-clustered indexes are here
 taken out of the buffer pool, and their inserts redirected to the insert
 buffer. Otherwise, the flushed blocks could get modified again before read
 operations need new buffer blocks, and the i/o work done in flushing would be
 wasted. */
 UNIV_INTERN
 void
 buf_LRU_try_free_flushed_blocks(void);
 /*==================================*/
 /**********************************************************************
 Returns TRUE if less than 25 % of the buffer pool is available. This can be
 used in heuristics to prevent huge transactions eating up the whole buffer
 pool for their locks. */
 UNIV_INTERN
 ibool
 buf_LRU_buf_pool_running_out(void);
 /*==============================*/
 				/* out: TRUE if less than 25 % of buffer pool
 				left */
 /*#######################################################################
 These are low-level functions
 #########################################################################*/
 /* Minimum LRU list length for which the LRU_old pointer is defined */
 #define BUF_LRU_OLD_MIN_LEN	80
 #define BUF_LRU_FREE_SEARCH_LEN		(5 + 2 * BUF_READ_AHEAD_AREA)
 /**********************************************************************
 Invalidates all pages belonging to a given tablespace when we are deleting
 the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
 what guarantees that it will not try to read in pages after this operation has
 completed? */
 UNIV_INTERN
 void
 buf_LRU_invalidate_tablespace(
 /*==========================*/
 	ulint	id);	/* in: space id */
 /**********************************************************************
 Gets the minimum LRU_position field for the blocks in an initial segment
 (determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
 guaranteed to be precise, because the ulint_clock may wrap around. */
 UNIV_INTERN
 ulint
 buf_LRU_get_recent_limit(void);
 /*==========================*/
 			/* out: the limit; zero if could not determine it */
 /************************************************************************
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
 UNIV_INTERN
 void
 buf_LRU_insert_zip_clean(
 /*=====================*/
 	buf_page_t*	bpage);	/* in: pointer to the block in question */
 /**********************************************************************
 Try to free a block.  If bpage is a descriptor of a compressed-only
 page, the descriptor object will be freed as well.  If this function
 returns BUF_LRU_FREED, it will not temporarily release
 buf_pool_mutex. */
 UNIV_INTERN
 enum buf_lru_free_block_status
 buf_LRU_free_block(
 /*===============*/
 				/* out: BUF_LRU_FREED if freed,
 				BUF_LRU_CANNOT_RELOCATE or
 				BUF_LRU_NOT_FREED otherwise. */
 	buf_page_t*	bpage,	/* in: block to be freed */
 	ibool		zip,	/* in: TRUE if should remove also the
 				compressed page of an uncompressed page */
 	ibool*		buf_pool_mutex_released);
 				/* in: pointer to a variable that will
 				be assigned TRUE if buf_pool_mutex
 				was temporarily released, or NULL */
 /**********************************************************************
 Try to free a replaceable block. */
 UNIV_INTERN
 ibool
 buf_LRU_search_and_free_block(
 /*==========================*/
 				/* out: TRUE if found and freed */
 	ulint	n_iterations);	/* in: how many times this has been called
 				repeatedly without result: a high value means
 				that we should search farther; if
 				n_iterations < 10, then we search
 				n_iterations / 10 * buf_pool->curr_size
 				pages from the end of the LRU list; if
 				n_iterations < 5, then we will also search
 				n_iterations / 5 of the unzip_LRU list. */
 /**********************************************************************
 Returns a free block from the buf_pool.  The block is taken off the
 free list.  If it is empty, returns NULL. */
 UNIV_INTERN
 buf_block_t*
 buf_LRU_get_free_only(void);
 /*=======================*/
 				/* out: a free control block, or NULL
 				if the buf_block->free list is empty */
 /**********************************************************************
 Returns a free block from the buf_pool. The block is taken off the
 free list. If it is empty, blocks are moved from the end of the
 LRU list to the free list. */
 UNIV_INTERN
 buf_block_t*
 buf_LRU_get_free_block(
 /*===================*/
 				/* out: the free control block,
 				in state BUF_BLOCK_READY_FOR_USE */
 	ulint	zip_size);	/* in: compressed page size in bytes,
 				or 0 if uncompressed tablespace */
 /**********************************************************************
 Puts a block back to the free list. */
 UNIV_INTERN
 void
 buf_LRU_block_free_non_file_page(
 /*=============================*/
 	buf_block_t*	block);	/* in: block, must not contain a file page */
 /**********************************************************************
 Adds a block to the LRU list. */
 UNIV_INTERN
 void
 buf_LRU_add_block(
 /*==============*/
 	buf_page_t*	bpage,	/* in: control block */
 	ibool		old);	/* in: TRUE if should be put to the old
 				blocks in the LRU list, else put to the
 				start; if the LRU list is very short, added to
 				the start regardless of this parameter */
 /**********************************************************************
 Adds a block to the LRU list of decompressed zip pages. */
 UNIV_INTERN
 void
 buf_unzip_LRU_add_block(
 /*====================*/
 	buf_block_t*	block,	/* in: control block */
 	ibool		old);	/* in: TRUE if should be put to the end
 				of the list, else put to the start */
 /**********************************************************************
 Moves a block to the start of the LRU list. */
 UNIV_INTERN
 void
 buf_LRU_make_block_young(
 /*=====================*/
 	buf_page_t*	bpage);	/* in: control block */
 /**********************************************************************
 Moves a block to the end of the LRU list. */
 UNIV_INTERN
 void
 buf_LRU_make_block_old(
 /*===================*/
 	buf_page_t*	bpage);	/* in: control block */
 /************************************************************************
 Update the historical stats that we are collecting for LRU eviction
 policy at the end of each interval. */
 UNIV_INTERN
 void
 buf_LRU_stat_update(void);
 /*=====================*/
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**************************************************************************
 Validates the LRU list. */
 UNIV_INTERN
 ibool
 buf_LRU_validate(void);
 /*==================*/
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**************************************************************************
 Prints the LRU list. */
 UNIV_INTERN
 void
 buf_LRU_print(void);
 /*===============*/
 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
 /**********************************************************************
 These statistics are not 'of' LRU but 'for' LRU.  We keep count of I/O
 and page_zip_decompress() operations.  Based on the statistics we decide
 if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
 /** Statistics for selecting the LRU list for eviction. */
 struct buf_LRU_stat_struct
 {
 	ulint	io;	/**< Counter of buffer pool I/O operations. */
 	ulint	unzip;	/**< Counter of page_zip_decompress operations. */
 };
 typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
 /** Current operation counters.  Not protected by any mutex.
 Cleared by buf_LRU_stat_update(). */
 extern buf_LRU_stat_t	buf_LRU_stat_cur;
 /** Running sum of past values of buf_LRU_stat_cur.
 Updated by buf_LRU_stat_update().  Protected by buf_pool_mutex. */
 extern buf_LRU_stat_t	buf_LRU_stat_sum;
 /************************************************************************
 Increments the I/O counter in buf_LRU_stat_cur. */
 #define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
 /************************************************************************
 Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
 #define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
 #ifndef UNIV_NONINL
 #include "buf0lru.ic"
 #endif
 #endif
--- a/include/buf0lru.ic
+++ b/include/buf0lru.ic
@@ -0,0 +1,8 @@
 /******************************************************
 The database buffer replacement algorithm
 (c) 1995 Innobase Oy
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
--- a/include/buf0rea.h
+++ b/include/buf0rea.h
@@ -0,0 +1,120 @@
 /******************************************************
 The database buffer read
 (c) 1995 Innobase Oy
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 #ifndef buf0rea_h
 #define buf0rea_h
 #include "univ.i"
 #include "buf0types.h"
 /************************************************************************
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread. Does a random read-ahead if it seems
 sensible. */
 UNIV_INTERN
 ulint
 buf_read_page(
 /*==========*/
 			/* out: number of page read requests issued: this can
 			be > 1 if read-ahead occurred */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint	offset);/* in: page number */
 /************************************************************************
 Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
 that the the algorithm looks at the 'natural' adjacent successor and
 predecessor of the page, which on the leaf level of a B-tree are the next
 and previous page in the chain of leaves. To know these, the page specified
 in (space, offset) must already be present in the buf_pool. Thus, the
 natural way to use this function is to call it when a page in the buf_pool
 is accessed the first time, calling this function just after it has been
 bufferfixed.
 NOTE 1: as this function looks at the natural predecessor and successor
 fields on the page, what happens, if these are not initialized to any
 sensible value? No problem, before applying read-ahead we check that the
 area to read is within the span of the space, if not, read-ahead is not
 applied. An uninitialized value may result in a useless read operation, but
 only very improbably.
 NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
 function must be written such that it cannot end up waiting for these
 latches!
 NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
 which could result in a deadlock if the OS does not support asynchronous io. */
 UNIV_INTERN
 ulint
 buf_read_ahead_linear(
 /*==================*/
 			/* out: number of page read requests issued */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint	offset);/* in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
 /************************************************************************
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
 UNIV_INTERN
 void
 buf_read_ibuf_merge_pages(
 /*======================*/
 	ibool		sync,		/* in: TRUE if the caller
 					wants this function to wait
 					for the highest address page
 					to get read in, before this
 					function returns */
 	const ulint*	space_ids,	/* in: array of space ids */
 	const ib_int64_t* space_versions,/* in: the spaces must have
 					this version number
 					(timestamp), otherwise we
 					discard the read; we use this
 					to cancel reads if DISCARD +
 					IMPORT may have changed the
 					tablespace size */
 	const ulint*	page_nos,	/* in: array of page numbers
 					to read, with the highest page
 					number the last in the
 					array */
 	ulint		n_stored);	/* in: number of elements
 					in the arrays */
 /************************************************************************
 Issues read requests for pages which recovery wants to read in. */
 UNIV_INTERN
 void
 buf_read_recv_pages(
 /*================*/
 	ibool		sync,		/* in: TRUE if the caller
 					wants this function to wait
 					for the highest address page
 					to get read in, before this
 					function returns */
 	ulint		space,		/* in: space id */
 	ulint		zip_size,	/* in: compressed page size in
 					bytes, or 0 */
 	const ulint*	page_nos,	/* in: array of page numbers
 					to read, with the highest page
 					number the last in the
 					array */
 	ulint		n_stored);	/* in: number of page numbers
 					in the array */
 /* The size in pages of the area which the read-ahead algorithms read if
 invoked */
 #define	BUF_READ_AHEAD_AREA					\
 	ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
 /* Modes used in read-ahead */
 #define BUF_READ_IBUF_PAGES_ONLY	131
 #define BUF_READ_ANY_PAGE		132
 #endif
--- a/include/buf0types.h
+++ b/include/buf0types.h
@@ -0,0 +1,54 @@
 /******************************************************
 The database buffer pool global types for the directory
 (c) 1995 Innobase Oy
 Created 11/17/1995 Heikki Tuuri
 *******************************************************/
 #ifndef buf0types_h
 #define buf0types_h
 typedef	struct buf_page_struct		buf_page_t;
 typedef	struct buf_block_struct		buf_block_t;
 typedef struct buf_chunk_struct		buf_chunk_t;
 typedef	struct buf_pool_struct		buf_pool_t;
 /* The 'type' used of a buffer frame */
 typedef	byte	buf_frame_t;
 /* Flags for flush types */
 enum buf_flush {
 	BUF_FLUSH_LRU = 0,
 	BUF_FLUSH_SINGLE_PAGE,
 	BUF_FLUSH_LIST,
 	BUF_FLUSH_N_TYPES		/* index of last element + 1  */
 };
 /* Flags for io_fix types */
 enum buf_io_fix {
 	BUF_IO_NONE = 0,		/**< no pending I/O */
 	BUF_IO_READ,			/**< read pending */
 	BUF_IO_WRITE			/**< write pending */
 };
 /* Parameters of binary buddy system for compressed pages (buf0buddy.h) */
 #if UNIV_WORD_SIZE <= 4 /* 32-bit system */
 # define BUF_BUDDY_LOW_SHIFT	6
 #else /* 64-bit system */
 # define BUF_BUDDY_LOW_SHIFT	7
 #endif
 #define BUF_BUDDY_LOW		(1 << BUF_BUDDY_LOW_SHIFT)
 					/* minimum block size in the binary
 					buddy system; must be at least
 					sizeof(buf_page_t) */
 #define BUF_BUDDY_SIZES		(UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
 					/* number of buddy sizes */
 /* twice the maximum block size of the buddy system;
 the underlying memory is aligned by this amount:
 this must be equal to UNIV_PAGE_SIZE */
 #define BUF_BUDDY_HIGH	(BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
 #endif
--- a/include/data0data.h
+++ b/include/data0data.h
@@ -0,0 +1,464 @@
 /************************************************************************
 SQL data field and tuple
 (c) 1994-1996 Innobase Oy
 Created 5/30/1994 Heikki Tuuri
 *************************************************************************/
 #ifndef data0data_h
 #define data0data_h
 #include "univ.i"
 #include "data0types.h"
 #include "data0type.h"
 #include "mem0mem.h"
 #include "dict0types.h"
 typedef struct big_rec_struct		big_rec_t;
 #ifdef UNIV_DEBUG
 /*************************************************************************
 Gets pointer to the type struct of SQL data field. */
 UNIV_INLINE
 dtype_t*
 dfield_get_type(
 /*============*/
 				/* out: pointer to the type struct */
 	const dfield_t*	field);	/* in: SQL data field */
 /*************************************************************************
 Gets pointer to the data in a field. */
 UNIV_INLINE
 void*
 dfield_get_data(
 /*============*/
 				/* out: pointer to data */
 	const dfield_t* field);	/* in: field */
 #else /* UNIV_DEBUG */
 # define dfield_get_type(field) (&(field)->type)
 # define dfield_get_data(field) ((field)->data)
 #endif /* UNIV_DEBUG */
 /*************************************************************************
 Sets the type struct of SQL data field. */
 UNIV_INLINE
 void
 dfield_set_type(
 /*============*/
 	dfield_t*	field,	/* in: SQL data field */
 	dtype_t*	type);	/* in: pointer to data type struct */
 /*************************************************************************
 Gets length of field data. */
 UNIV_INLINE
 ulint
 dfield_get_len(
 /*===========*/
 				/* out: length of data; UNIV_SQL_NULL if
 				SQL null data */
 	const dfield_t* field);	/* in: field */
 /*************************************************************************
 Sets length in a field. */
 UNIV_INLINE
 void
 dfield_set_len(
 /*===========*/
 	dfield_t*	field,	/* in: field */
 	ulint		len);	/* in: length or UNIV_SQL_NULL */
 /*************************************************************************
 Determines if a field is SQL NULL */
 UNIV_INLINE
 ulint
 dfield_is_null(
 /*===========*/
 				/* out: nonzero if SQL null data */
 	const dfield_t* field);	/* in: field */
 /*************************************************************************
 Determines if a field is externally stored */
 UNIV_INLINE
 ulint
 dfield_is_ext(
 /*==========*/
 				/* out: nonzero if externally stored */
 	const dfield_t* field);	/* in: field */
 /*************************************************************************
 Sets the "external storage" flag */
 UNIV_INLINE
 void
 dfield_set_ext(
 /*===========*/
 	dfield_t*	field);	/* in/out: field */
 /*************************************************************************
 Sets pointer to the data and length in a field. */
 UNIV_INLINE
 void
 dfield_set_data(
 /*============*/
 	dfield_t*	field,	/* in: field */
 	const void*	data,	/* in: data */
 	ulint		len);	/* in: length or UNIV_SQL_NULL */
 /*************************************************************************
 Sets a data field to SQL NULL. */
 UNIV_INLINE
 void
 dfield_set_null(
 /*============*/
 	dfield_t*	field);	/* in/out: field */
 /**************************************************************************
 Writes an SQL null field full of zeros. */
 UNIV_INLINE
 void
 data_write_sql_null(
 /*================*/
 	byte*	data,	/* in: pointer to a buffer of size len */
 	ulint	len);	/* in: SQL null size in bytes */
 /*************************************************************************
 Copies the data and len fields. */
 UNIV_INLINE
 void
 dfield_copy_data(
 /*=============*/
 	dfield_t*	field1,	/* out: field to copy to */
 	const dfield_t*	field2);/* in: field to copy from */
 /*************************************************************************
 Copies a data field to another. */
 UNIV_INLINE
 void
 dfield_copy(
 /*========*/
 	dfield_t*	field1,	/* out: field to copy to */
 	const dfield_t*	field2);/* in: field to copy from */
 /*************************************************************************
 Copies the data pointed to by a data field. */
 UNIV_INLINE
 void
 dfield_dup(
 /*=======*/
 	dfield_t*	field,	/* in/out: data field */
 	mem_heap_t*	heap);	/* in: memory heap where allocated */
 /*************************************************************************
 Tests if data length and content is equal for two dfields. */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
 /*==========================*/
 				/* out: TRUE if equal */
 	const dfield_t*	field1,	/* in: field */
 	const dfield_t*	field2);/* in: field */
 /*************************************************************************
 Tests if dfield data length and content is equal to the given. */
 UNIV_INTERN
 ibool
 dfield_data_is_binary_equal(
 /*========================*/
 				/* out: TRUE if equal */
 	const dfield_t*	field,	/* in: field */
 	ulint		len,	/* in: data length or UNIV_SQL_NULL */
 	const byte*	data);	/* in: data */
 /*************************************************************************
 Gets number of fields in a data tuple. */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields(
 /*================*/
 				/* out: number of fields */
 	const dtuple_t*	tuple);	/* in: tuple */
 #ifdef UNIV_DEBUG
 /*************************************************************************
 Gets nth field of a tuple. */
 UNIV_INLINE
 dfield_t*
 dtuple_get_nth_field(
 /*=================*/
 				/* out: nth field */
 	const dtuple_t*	tuple,	/* in: tuple */
 	ulint		n);	/* in: index of field */
 #else /* UNIV_DEBUG */
 # define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
 #endif /* UNIV_DEBUG */
 /*************************************************************************
 Gets info bits in a data tuple. */
 UNIV_INLINE
 ulint
 dtuple_get_info_bits(
 /*=================*/
 				/* out: info bits */
 	const dtuple_t*	tuple);	/* in: tuple */
 /*************************************************************************
 Sets info bits in a data tuple. */
 UNIV_INLINE
 void
 dtuple_set_info_bits(
 /*=================*/
 	dtuple_t*	tuple,		/* in: tuple */
 	ulint		info_bits);	/* in: info bits */
 /*************************************************************************
 Gets number of fields used in record comparisons. */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields_cmp(
 /*====================*/
 				/* out: number of fields used in comparisons
 				in rem0cmp.* */
 	const dtuple_t*	tuple);	/* in: tuple */
 /*************************************************************************
 Gets number of fields used in record comparisons. */
 UNIV_INLINE
 void
 dtuple_set_n_fields_cmp(
 /*====================*/
 	dtuple_t*	tuple,		/* in: tuple */
 	ulint		n_fields_cmp);	/* in: number of fields used in
 					comparisons in rem0cmp.* */
 /**************************************************************
 Creates a data tuple to a memory heap. The default value for number
 of fields used in record comparisons for this tuple is n_fields. */
 UNIV_INLINE
 dtuple_t*
 dtuple_create(
 /*==========*/
 				/* out, own: created tuple */
 	mem_heap_t*	heap,	/* in: memory heap where the tuple
 				is created */
 	ulint		n_fields); /* in: number of fields */
 /**************************************************************
 Wrap data fields in a tuple. The default value for number
 of fields used in record comparisons for this tuple is n_fields. */
 UNIV_INLINE
 const dtuple_t*
 dtuple_from_fields(
 /*===============*/
 					/* out: data tuple */
 	dtuple_t*	tuple,		/* in: storage for data tuple */
 	const dfield_t*	fields,		/* in: fields */
 	ulint		n_fields);	/* in: number of fields */
 /*************************************************************************
 Sets number of fields used in a tuple. Normally this is set in
 dtuple_create, but if you want later to set it smaller, you can use this. */
 UNIV_INTERN
 void
 dtuple_set_n_fields(
 /*================*/
 	dtuple_t*	tuple,		/* in: tuple */
 	ulint		n_fields);	/* in: number of fields */
 /*************************************************************************
 Copies a data tuple to another.  This is a shallow copy; if a deep copy
 is desired, dfield_dup() will have to be invoked on each field. */
 UNIV_INLINE
 dtuple_t*
 dtuple_copy(
 /*========*/
 				/* out, own: copy of tuple */
 	const dtuple_t*	tuple,	/* in: tuple to copy from */
 	mem_heap_t*	heap);	/* in: memory heap
 				where the tuple is created */
 /**************************************************************
 The following function returns the sum of data lengths of a tuple. The space
 occupied by the field structs or the tuple struct is not counted. */
 UNIV_INLINE
 ulint
 dtuple_get_data_size(
 /*=================*/
 				/* out: sum of data lens */
 	const dtuple_t*	tuple);	/* in: typed data tuple */
 /*************************************************************************
 Computes the number of externally stored fields in a data tuple. */
 UNIV_INLINE
 ulint
 dtuple_get_n_ext(
 /*=============*/
 				/* out: number of fields */
 	const dtuple_t*	tuple);	/* in: tuple */
 /****************************************************************
 Compare two data tuples, respecting the collation of character fields. */
 UNIV_INTERN
 int
 dtuple_coll_cmp(
 /*============*/
 				/* out: 1, 0 , -1 if tuple1 is greater, equal,
 				less, respectively, than tuple2 */
 	const dtuple_t*	tuple1,	/* in: tuple 1 */
 	const dtuple_t*	tuple2);/* in: tuple 2 */
 /****************************************************************
 Folds a prefix given as the number of fields of a tuple. */
 UNIV_INLINE
 ulint
 dtuple_fold(
 /*========*/
 				/* out: the folded value */
 	const dtuple_t*	tuple,	/* in: the tuple */
 	ulint		n_fields,/* in: number of complete fields to fold */
 	ulint		n_bytes,/* in: number of bytes to fold in an
 				incomplete last field */
 	dulint		tree_id)/* in: index tree id */
 	__attribute__((pure));
 /***********************************************************************
 Sets types of fields binary in a tuple. */
 UNIV_INLINE
 void
 dtuple_set_types_binary(
 /*====================*/
 	dtuple_t*	tuple,	/* in: data tuple */
 	ulint		n);	/* in: number of fields to set */
 /**************************************************************************
 Checks if a dtuple contains an SQL null value. */
 UNIV_INLINE
 ibool
 dtuple_contains_null(
 /*=================*/
 				/* out: TRUE if some field is SQL null */
 	const dtuple_t*	tuple);	/* in: dtuple */
 /**************************************************************
 Checks that a data field is typed. Asserts an error if not. */
 UNIV_INTERN
 ibool
 dfield_check_typed(
 /*===============*/
 				/* out: TRUE if ok */
 	const dfield_t*	field);	/* in: data field */
 /**************************************************************
 Checks that a data tuple is typed. Asserts an error if not. */
 UNIV_INTERN
 ibool
 dtuple_check_typed(
 /*===============*/
 				/* out: TRUE if ok */
 	const dtuple_t*	tuple);	/* in: tuple */
 /**************************************************************
 Checks that a data tuple is typed. */
 UNIV_INTERN
 ibool
 dtuple_check_typed_no_assert(
 /*=========================*/
 				/* out: TRUE if ok */
 	const dtuple_t*	tuple);	/* in: tuple */
 #ifdef UNIV_DEBUG
 /**************************************************************
 Validates the consistency of a tuple which must be complete, i.e,
 all fields must have been set. */
 UNIV_INTERN
 ibool
 dtuple_validate(
 /*============*/
 				/* out: TRUE if ok */
 	const dtuple_t*	tuple);	/* in: tuple */
 #endif /* UNIV_DEBUG */
 /*****************************************************************
 Pretty prints a dfield value according to its data type. */
 UNIV_INTERN
 void
 dfield_print(
 /*=========*/
 	const dfield_t*	dfield);/* in: dfield */
 /*****************************************************************
 Pretty prints a dfield value according to its data type. Also the hex string
 is printed if a string contains non-printable characters. */
 UNIV_INTERN
 void
 dfield_print_also_hex(
 /*==================*/
 	const dfield_t*	dfield);	 /* in: dfield */
 /**************************************************************
 The following function prints the contents of a tuple. */
 UNIV_INTERN
 void
 dtuple_print(
 /*=========*/
 	FILE*		f,	/* in: output stream */
 	const dtuple_t*	tuple);	/* in: tuple */
 /******************************************************************
 Moves parts of long fields in entry to the big record vector so that
 the size of tuple drops below the maximum record size allowed in the
 database. Moves data only from those fields which are not necessary
 to determine uniquely the insertion place of the tuple in the index. */
 UNIV_INTERN
 big_rec_t*
 dtuple_convert_big_rec(
 /*===================*/
 				/* out, own: created big record vector,
 				NULL if we are not able to shorten
 				the entry enough, i.e., if there are
 				too many fixed-length or short fields
 				in entry or the index is clustered */
 	dict_index_t*	index,	/* in: index */
 	dtuple_t*	entry,	/* in/out: index entry */
 	ulint*		n_ext);	/* in/out: number of
 				externally stored columns */
 /******************************************************************
 Puts back to entry the data stored in vector. Note that to ensure the
 fields in entry can accommodate the data, vector must have been created
 from entry with dtuple_convert_big_rec. */
 UNIV_INTERN
 void
 dtuple_convert_back_big_rec(
 /*========================*/
 	dict_index_t*	index,	/* in: index */
 	dtuple_t*	entry,	/* in: entry whose data was put to vector */
 	big_rec_t*	vector);/* in, own: big rec vector; it is
 				freed in this function */
 /******************************************************************
 Frees the memory in a big rec vector. */
 UNIV_INLINE
 void
 dtuple_big_rec_free(
 /*================*/
 	big_rec_t*	vector);	/* in, own: big rec vector; it is
 				freed in this function */
 /*######################################################################*/
 /* Structure for an SQL data field */
 struct dfield_struct{
 	void*		data;	/* pointer to data */
 	unsigned	ext:1;	/* TRUE=externally stored, FALSE=local */
 	unsigned	len:32;	/* data length; UNIV_SQL_NULL if SQL null */
 	dtype_t		type;	/* type of data */
 };
 struct dtuple_struct {
 	ulint		info_bits;	/* info bits of an index record:
 					the default is 0; this field is used
 					if an index record is built from
 					a data tuple */
 	ulint		n_fields;	/* number of fields in dtuple */
 	ulint		n_fields_cmp;	/* number of fields which should
 					be used in comparison services
 					of rem0cmp.*; the index search
 					is performed by comparing only these
 					fields, others are ignored; the
 					default value in dtuple creation is
 					the same value as n_fields */
 	dfield_t*	fields;		/* fields */
 	UT_LIST_NODE_T(dtuple_t) tuple_list;
 					/* data tuples can be linked into a
 					list using this field */
 #ifdef UNIV_DEBUG
 	ulint		magic_n;
 # define		DATA_TUPLE_MAGIC_N	65478679
 #endif /* UNIV_DEBUG */
 };
 /* A slot for a field in a big rec vector */
 typedef struct big_rec_field_struct	big_rec_field_t;
 struct big_rec_field_struct {
 	ulint		field_no;	/* field number in record */
 	ulint		len;		/* stored data len */
 	const void*	data;		/* stored data */
 };
 /* Storage format for overflow data in a big record, that is, a record
 which needs external storage of data fields */
 struct big_rec_struct {
 	mem_heap_t*	heap;		/* memory heap from which allocated */
 	ulint		n_fields;	/* number of stored fields */
 	big_rec_field_t* fields;	/* stored fields */
 };
 #ifndef UNIV_NONINL
 #include "data0data.ic"
 #endif
 #endif
--- a/include/data0data.ic
+++ b/include/data0data.ic
@@ -0,0 +1,592 @@
 /************************************************************************
 SQL data field and tuple
 (c) 1994-1996 Innobase Oy
 Created 5/30/1994 Heikki Tuuri
 *************************************************************************/
 #include "mem0mem.h"
 #include "ut0rnd.h"
 #ifdef UNIV_DEBUG
 extern byte data_error;
 /*************************************************************************
 Gets pointer to the type struct of SQL data field. */
 UNIV_INLINE
 dtype_t*
 dfield_get_type(
 /*============*/
 				/* out: pointer to the type struct */
 	const dfield_t*	field)	/* in: SQL data field */
 {
 	ut_ad(field);
 	return((dtype_t*) &(field->type));
 }
 #endif /* UNIV_DEBUG */
 /*************************************************************************
 Sets the type struct of SQL data field. */
 UNIV_INLINE
 void
 dfield_set_type(
 /*============*/
 	dfield_t*	field,	/* in: SQL data field */
 	dtype_t*	type)	/* in: pointer to data type struct */
 {
 	ut_ad(field && type);
 	field->type = *type;
 }
 #ifdef UNIV_DEBUG
 /*************************************************************************
 Gets pointer to the data in a field. */
 UNIV_INLINE
 void*
 dfield_get_data(
 /*============*/
 				/* out: pointer to data */
 	const dfield_t* field)	/* in: field */
 {
 	ut_ad(field);
 	ut_ad((field->len == UNIV_SQL_NULL)
 	      || (field->data != &data_error));
 	return((void*) field->data);
 }
 #endif /* UNIV_DEBUG */
 /*************************************************************************
 Gets length of field data. */
 UNIV_INLINE
 ulint
 dfield_get_len(
 /*===========*/
 				/* out: length of data; UNIV_SQL_NULL if
 				SQL null data */
 	const dfield_t*	field)	/* in: field */
 {
 	ut_ad(field);
 	ut_ad((field->len == UNIV_SQL_NULL)
 	      || (field->data != &data_error));
 	return(field->len);
 }
 /*************************************************************************
 Sets length in a field. */
 UNIV_INLINE
 void
 dfield_set_len(
 /*===========*/
 	dfield_t*	field,	/* in: field */
 	ulint		len)	/* in: length or UNIV_SQL_NULL */
 {
 	ut_ad(field);
 #ifdef UNIV_VALGRIND_DEBUG
 	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
 #endif /* UNIV_VALGRIND_DEBUG */
 	field->ext = 0;
 	field->len = len;
 }
 /*************************************************************************
 Determines if a field is SQL NULL */
 UNIV_INLINE
 ulint
 dfield_is_null(
 /*===========*/
 				/* out: nonzero if SQL null data */
 	const dfield_t* field)	/* in: field */
 {
 	ut_ad(field);
 	return(field->len == UNIV_SQL_NULL);
 }
 /*************************************************************************
 Determines if a field is externally stored */
 UNIV_INLINE
 ulint
 dfield_is_ext(
 /*==========*/
 				/* out: nonzero if externally stored */
 	const dfield_t* field)	/* in: field */
 {
 	ut_ad(field);
 	return(UNIV_UNLIKELY(field->ext));
 }
 /*************************************************************************
 Sets the "external storage" flag */
 UNIV_INLINE
 void
 dfield_set_ext(
 /*===========*/
 	dfield_t*	field)	/* in/out: field */
 {
 	ut_ad(field);
 	field->ext = 1;
 }
 /*************************************************************************
 Sets pointer to the data and length in a field. */
 UNIV_INLINE
 void
 dfield_set_data(
 /*============*/
 	dfield_t*	field,	/* in: field */
 	const void*	data,	/* in: data */
 	ulint		len)	/* in: length or UNIV_SQL_NULL */
 {
 	ut_ad(field);
 #ifdef UNIV_VALGRIND_DEBUG
 	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
 #endif /* UNIV_VALGRIND_DEBUG */
 	field->data = (void*) data;
 	field->ext = 0;
 	field->len = len;
 }
 /*************************************************************************
 Sets a data field to SQL NULL. */
 UNIV_INLINE
 void
 dfield_set_null(
 /*============*/
 	dfield_t*	field)	/* in/out: field */
 {
 	dfield_set_data(field, NULL, UNIV_SQL_NULL);
 }
 /*************************************************************************
 Copies the data and len fields. */
 UNIV_INLINE
 void
 dfield_copy_data(
 /*=============*/
 	dfield_t*	field1,	/* out: field to copy to */
 	const dfield_t*	field2)	/* in: field to copy from */
 {
 	ut_ad(field1 && field2);
 	field1->data = field2->data;
 	field1->len = field2->len;
 	field1->ext = field2->ext;
 }
 /*************************************************************************
 Copies a data field to another. */
 UNIV_INLINE
 void
 dfield_copy(
 /*========*/
 	dfield_t*	field1,	/* out: field to copy to */
 	const dfield_t*	field2)	/* in: field to copy from */
 {
 	*field1 = *field2;
 }
 /*************************************************************************
 Copies the data pointed to by a data field. */
 UNIV_INLINE
 void
 dfield_dup(
 /*=======*/
 	dfield_t*	field,	/* in/out: data field */
 	mem_heap_t*	heap)	/* in: memory heap where allocated */
 {
 	if (!dfield_is_null(field)) {
 		UNIV_MEM_ASSERT_RW(field->data, field->len);
 		field->data = mem_heap_dup(heap, field->data, field->len);
 	}
 }
 /*************************************************************************
 Tests if data length and content is equal for two dfields. */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
 /*==========================*/
 				/* out: TRUE if equal */
 	const dfield_t*	field1,	/* in: field */
 	const dfield_t*	field2)	/* in: field */
 {
 	ulint	len;
 	len = field1->len;
 	return(len == field2->len
 	       && (len == UNIV_SQL_NULL
 		   || !memcmp(field1->data, field2->data, len)));
 }
 /*************************************************************************
 Gets info bits in a data tuple. */
 UNIV_INLINE
 ulint
 dtuple_get_info_bits(
 /*=================*/
 				/* out: info bits */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	ut_ad(tuple);
 	return(tuple->info_bits);
 }
 /*************************************************************************
 Sets info bits in a data tuple. */
 UNIV_INLINE
 void
 dtuple_set_info_bits(
 /*=================*/
 	dtuple_t*	tuple,		/* in: tuple */
 	ulint		info_bits)	/* in: info bits */
 {
 	ut_ad(tuple);
 	tuple->info_bits = info_bits;
 }
 /*************************************************************************
 Gets number of fields used in record comparisons. */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields_cmp(
 /*====================*/
 				/* out: number of fields used in comparisons
 				in rem0cmp.* */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	ut_ad(tuple);
 	return(tuple->n_fields_cmp);
 }
 /*************************************************************************
 Sets number of fields used in record comparisons. */
 UNIV_INLINE
 void
 dtuple_set_n_fields_cmp(
 /*====================*/
 	dtuple_t*	tuple,		/* in: tuple */
 	ulint		n_fields_cmp)	/* in: number of fields used in
 					comparisons in rem0cmp.* */
 {
 	ut_ad(tuple);
 	ut_ad(n_fields_cmp <= tuple->n_fields);
 	tuple->n_fields_cmp = n_fields_cmp;
 }
 /*************************************************************************
 Gets number of fields in a data tuple. */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields(
 /*================*/
 				/* out: number of fields */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	ut_ad(tuple);
 	return(tuple->n_fields);
 }
 #ifdef UNIV_DEBUG
 /*************************************************************************
 Gets nth field of a tuple. */
 UNIV_INLINE
 dfield_t*
 dtuple_get_nth_field(
 /*=================*/
 				/* out: nth field */
 	const dtuple_t*	tuple,	/* in: tuple */
 	ulint		n)	/* in: index of field */
 {
 	ut_ad(tuple);
 	ut_ad(n < tuple->n_fields);
 	return((dfield_t*) tuple->fields + n);
 }
 #endif /* UNIV_DEBUG */
 /**************************************************************
 Creates a data tuple to a memory heap. The default value for number
 of fields used in record comparisons for this tuple is n_fields. */
 UNIV_INLINE
 dtuple_t*
 dtuple_create(
 /*==========*/
 				/* out, own: created tuple */
 	mem_heap_t*	heap,	/* in: memory heap where the tuple
 				is created */
 	ulint		n_fields) /* in: number of fields */
 {
 	dtuple_t*	tuple;
 	ut_ad(heap);
 	tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
 					   + n_fields * sizeof(dfield_t));
 	tuple->info_bits = 0;
 	tuple->n_fields = n_fields;
 	tuple->n_fields_cmp = n_fields;
 	tuple->fields = (dfield_t*) &tuple[1];
 #ifdef UNIV_DEBUG
 	tuple->magic_n = DATA_TUPLE_MAGIC_N;
 	{	/* In the debug version, initialize fields to an error value */
 		ulint	i;
 		for (i = 0; i < n_fields; i++) {
 			dfield_t*       field;
 			field = dtuple_get_nth_field(tuple, i);
 			dfield_set_len(field, UNIV_SQL_NULL);
 			field->data = &data_error;
 			dfield_get_type(field)->mtype = DATA_ERROR;
 		}
 	}
 	UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
 #endif
 	return(tuple);
 }
 /**************************************************************
 Wrap data fields in a tuple. The default value for number
 of fields used in record comparisons for this tuple is n_fields. */
 UNIV_INLINE
 const dtuple_t*
 dtuple_from_fields(
 /*===============*/
 					/* out: data tuple */
 	dtuple_t*	tuple,		/* in: storage for data tuple */
 	const dfield_t*	fields,		/* in: fields */
 	ulint		n_fields)	/* in: number of fields */
 {
 	tuple->info_bits = 0;
 	tuple->n_fields = tuple->n_fields_cmp = n_fields;
 	tuple->fields = (dfield_t*) fields;
 	ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
 	return(tuple);
 }
 /*************************************************************************
 Copies a data tuple to another.  This is a shallow copy; if a deep copy
 is desired, dfield_dup() will have to be invoked on each field. */
 UNIV_INLINE
 dtuple_t*
 dtuple_copy(
 /*========*/
 				/* out, own: copy of tuple */
 	const dtuple_t*	tuple,	/* in: tuple to copy from */
 	mem_heap_t*	heap)	/* in: memory heap
 				where the tuple is created */
 {
 	ulint		n_fields	= dtuple_get_n_fields(tuple);
 	dtuple_t*	new_tuple	= dtuple_create(heap, n_fields);
 	ulint		i;
 	for (i = 0; i < n_fields; i++) {
 		dfield_copy(dtuple_get_nth_field(new_tuple, i),
 			    dtuple_get_nth_field(tuple, i));
 	}
 	return(new_tuple);
 }
 /**************************************************************
 The following function returns the sum of data lengths of a tuple. The space
 occupied by the field structs or the tuple struct is not counted. Neither
 is possible space in externally stored parts of the field. */
 UNIV_INLINE
 ulint
 dtuple_get_data_size(
 /*=================*/
 				/* out: sum of data lengths */
 	const dtuple_t*	tuple)	/* in: typed data tuple */
 {
 	const dfield_t*	field;
 	ulint		n_fields;
 	ulint		len;
 	ulint		i;
 	ulint		sum	= 0;
 	ut_ad(tuple);
 	ut_ad(dtuple_check_typed(tuple));
 	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
 	n_fields = tuple->n_fields;
 	for (i = 0; i < n_fields; i++) {
 		field = dtuple_get_nth_field(tuple,  i);
 		len = dfield_get_len(field);
 		if (len == UNIV_SQL_NULL) {
 			len = dtype_get_sql_null_size(dfield_get_type(field));
 		}
 		sum += len;
 	}
 	return(sum);
 }
 /*************************************************************************
 Computes the number of externally stored fields in a data tuple. */
 UNIV_INLINE
 ulint
 dtuple_get_n_ext(
 /*=============*/
 				/* out: number of externally stored fields */
 	const dtuple_t*	tuple)	/* in: tuple */
 {
 	ulint	n_ext		= 0;
 	ulint	n_fields	= tuple->n_fields;
 	ulint	i;
 	ut_ad(tuple);
 	ut_ad(dtuple_check_typed(tuple));
 	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
 	for (i = 0; i < n_fields; i++) {
 		n_ext += dtuple_get_nth_field(tuple, i)->ext;
 	}
 	return(n_ext);
 }
 /***********************************************************************
 Sets types of fields binary in a tuple. */
 UNIV_INLINE
 void
 dtuple_set_types_binary(
 /*====================*/
 	dtuple_t*	tuple,	/* in: data tuple */
 	ulint		n)	/* in: number of fields to set */
 {
 	dtype_t*	dfield_type;
 	ulint		i;
 	for (i = 0; i < n; i++) {
 		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
 		dtype_set(dfield_type, DATA_BINARY, 0, 0);
 	}
 }
 /****************************************************************
 Folds a prefix given as the number of fields of a tuple. */
 UNIV_INLINE
 ulint
 dtuple_fold(
 /*========*/
 				/* out: the folded value */
 	const dtuple_t*	tuple,	/* in: the tuple */
 	ulint		n_fields,/* in: number of complete fields to fold */
 	ulint		n_bytes,/* in: number of bytes to fold in an
 				incomplete last field */
 	dulint		tree_id)/* in: index tree id */
 {
 	const dfield_t*	field;
 	ulint		i;
 	const byte*	data;
 	ulint		len;
 	ulint		fold;
 	ut_ad(tuple);
 	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
 	ut_ad(dtuple_check_typed(tuple));
 	fold = ut_fold_dulint(tree_id);
 	for (i = 0; i < n_fields; i++) {
 		field = dtuple_get_nth_field(tuple, i);
 		data = (const byte*) dfield_get_data(field);
 		len = dfield_get_len(field);
 		if (len != UNIV_SQL_NULL) {
 			fold = ut_fold_ulint_pair(fold,
 						  ut_fold_binary(data, len));
 		}
 	}
 	if (n_bytes > 0) {
 		field = dtuple_get_nth_field(tuple, i);
 		data = (const byte*) dfield_get_data(field);
 		len = dfield_get_len(field);
 		if (len != UNIV_SQL_NULL) {
 			if (len > n_bytes) {
 				len = n_bytes;
 			}
 			fold = ut_fold_ulint_pair(fold,
 						  ut_fold_binary(data, len));
 		}
 	}
 	return(fold);
 }
 /**************************************************************************
 Writes an SQL null field full of zeros. */
 UNIV_INLINE
 void
 data_write_sql_null(
 /*================*/
 	byte*	data,	/* in: pointer to a buffer of size len */
 	ulint	len)	/* in: SQL null size in bytes */
 {
 	memset(data, 0, len);
 }
 /**************************************************************************
 Checks if a dtuple contains an SQL null value. */
 UNIV_INLINE
 ibool
 dtuple_contains_null(
 /*=================*/
 				/* out: TRUE if some field is SQL null */
 	const dtuple_t*	tuple)	/* in: dtuple */
 {
 	ulint	n;
 	ulint	i;
 	n = dtuple_get_n_fields(tuple);
 	for (i = 0; i < n; i++) {
 		if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
 			return(TRUE);
 		}
 	}
 	return(FALSE);
 }
 /******************************************************************
 Frees the memory in a big rec vector. */
 UNIV_INLINE
 void
 dtuple_big_rec_free(
 /*================*/
 	big_rec_t*	vector)	/* in, own: big rec vector; it is
 				freed in this function */
 {
 	mem_heap_free(vector->heap);
 }
--- a/include/data0type.h
+++ b/include/data0type.h
@@ -0,0 +1,455 @@
 /******************************************************
 Data types
 (c) 1996 Innobase Oy
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
 #ifndef data0type_h
 #define data0type_h
 #include "univ.i"
 extern ulint	data_mysql_default_charset_coll;
 #define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
 /* SQL data type struct */
 typedef struct dtype_struct		dtype_t;
 /*-------------------------------------------*/
 /* The 'MAIN TYPE' of a column */
 #define	DATA_VARCHAR	1	/* character varying of the
 				latin1_swedish_ci charset-collation; note
 				that the MySQL format for this, DATA_BINARY,
 				DATA_VARMYSQL, is also affected by whether the
 				'precise type' contains
 				DATA_MYSQL_TRUE_VARCHAR */
 #define DATA_CHAR	2	/* fixed length character of the
 				latin1_swedish_ci charset-collation */
 #define DATA_FIXBINARY	3	/* binary string of fixed length */
 #define DATA_BINARY	4	/* binary string */
 #define DATA_BLOB	5	/* binary large object, or a TEXT type;
 				if prtype & DATA_BINARY_TYPE == 0, then this is
 				actually a TEXT column (or a BLOB created
 				with < 4.0.14; since column prefix indexes
 				came only in 4.0.14, the missing flag in BLOBs
 				created before that does not cause any harm) */
 #define	DATA_INT	6	/* integer: can be any size 1 - 8 bytes */
 #define	DATA_SYS_CHILD	7	/* address of the child page in node pointer */
 #define	DATA_SYS	8	/* system column */
 /* Data types >= DATA_FLOAT must be compared using the whole field, not as
 binary strings */
 #define DATA_FLOAT	9
 #define DATA_DOUBLE	10
 #define DATA_DECIMAL	11	/* decimal number stored as an ASCII string */
 #define	DATA_VARMYSQL	12	/* any charset varying length char */
 #define	DATA_MYSQL	13	/* any charset fixed length char */
 				/* NOTE that 4.1.1 used DATA_MYSQL and
 				DATA_VARMYSQL for all character sets, and the
 				charset-collation for tables created with it
 				can also be latin1_swedish_ci */
 #define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
 				requires the values are <= 63 */
 /*-------------------------------------------*/
 /* The 'PRECISE TYPE' of a column */
 /*
 Tables created by a MySQL user have the following convention:
 - In the least significant byte in the precise type we store the MySQL type
 code (not applicable for system columns).
 - In the second least significant byte we OR flags DATA_NOT_NULL,
 DATA_UNSIGNED, DATA_BINARY_TYPE.
 - In the third least significant byte of the precise type of string types we
 store the MySQL charset-collation code. In DATA_BLOB columns created with
 < 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
 are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
 problem, though.
 Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
 precise type, since the charset was always the default charset of the MySQL
 installation. If the stored charset code is 0 in the system table SYS_COLUMNS
 of InnoDB, that means that the default charset of this MySQL installation
 should be used.
 When loading a table definition from the system tables to the InnoDB data
 dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
 if the stored charset-collation is 0, and if that is the case and the type is
 a non-binary string, replace that 0 by the default charset-collation code of
 this MySQL installation. In short, in old tables, the charset-collation code
 in the system tables on disk can be 0, but in in-memory data structures
 (dtype_t), the charset-collation code is always != 0 for non-binary string
 types.
 In new tables, in binary string types, the charset-collation code is the
 MySQL code for the 'binary charset', that is, != 0.
 For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
 DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
 InnoDB performs all comparisons internally, without resorting to the MySQL
 comparison functions. This is to save CPU time.
 InnoDB's own internal system tables have different precise types for their
 columns, and for them the precise type is usually not used at all.
 */
 #define DATA_ENGLISH	4	/* English language character string: this
 				is a relic from pre-MySQL time and only used
 				for InnoDB's own system tables */
 #define DATA_ERROR	111	/* another relic from pre-MySQL time */
 #define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
 				 type from the precise type */
 #define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
 				   format true VARCHAR */
 /* Precise data types for system columns and the length of those columns;
 NOTE: the values must run from 0 up in the order given! All codes must
 be less than 256 */
 #define	DATA_ROW_ID	0	/* row id: a dulint */
 #define DATA_ROW_ID_LEN	6	/* stored length for row id */
 #define DATA_TRX_ID	1	/* transaction id: 6 bytes */
 #define DATA_TRX_ID_LEN	6
 #define	DATA_ROLL_PTR	2	/* rollback data pointer: 7 bytes */
 #define DATA_ROLL_PTR_LEN 7
 #define	DATA_N_SYS_COLS 3	/* number of system columns defined above */
 #define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
 /* Flags ORed to the precise data type */
 #define DATA_NOT_NULL	256	/* this is ORed to the precise type when
 				the column is declared as NOT NULL */
 #define DATA_UNSIGNED	512	/* this id ORed to the precise type when
 				we have an unsigned integer type */
 #define	DATA_BINARY_TYPE 1024	/* if the data type is a binary character
 				string, this is ORed to the precise type:
 				this only holds for tables created with
 				>= MySQL-4.0.14 */
 /* #define	DATA_NONLATIN1	2048 This is a relic from < 4.1.2 and < 5.0.1.
 				In earlier versions this was set for some
 				BLOB columns.
 */
 #define	DATA_LONG_TRUE_VARCHAR 4096	/* this is ORed to the precise data
 				type when the column is true VARCHAR where
 				MySQL uses 2 bytes to store the data len;
 				for shorter VARCHARs MySQL uses only 1 byte */
 /*-------------------------------------------*/
 /* This many bytes we need to store the type information affecting the
 alphabetical order for a single field and decide the storage size of an
 SQL null*/
 #define DATA_ORDER_NULL_TYPE_BUF_SIZE		4
 /* In the >= 4.1.x storage format we add 2 bytes more so that we can also
 store the charset-collation number; one byte is left unused, though */
 #define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
 /*************************************************************************
 Gets the MySQL type code from a dtype. */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
 /*=================*/
 				/* out: MySQL type code; this is NOT an InnoDB
 				type code! */
 	const dtype_t*	type);	/* in: type struct */
 /*************************************************************************
 Determine how many bytes the first n characters of the given string occupy.
 If the string is shorter than n characters, returns the number of bytes
 the characters in the string occupy. */
 UNIV_INTERN
 ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
 					/* out: length of the prefix,
 					in bytes */
 	ulint		prtype,		/* in: precise type */
 	ulint		mbminlen,	/* in: minimum length of a
 					multi-byte character */
 	ulint		mbmaxlen,	/* in: maximum length of a
 					multi-byte character */
 	ulint		prefix_len,	/* in: length of the requested
 					prefix, in characters, multiplied by
 					dtype_get_mbmaxlen(dtype) */
 	ulint		data_len,	/* in: length of str (in bytes) */
 	const char*	str);		/* in: the string whose prefix
 					length is being determined */
 /*************************************************************************
 Checks if a data main type is a string type. Also a BLOB is considered a
 string type. */
 UNIV_INTERN
 ibool
 dtype_is_string_type(
 /*=================*/
 			/* out: TRUE if string type */
 	ulint	mtype);	/* in: InnoDB main data type code: DATA_CHAR, ... */
 /*************************************************************************
 Checks if a type is a binary string type. Note that for tables created with
 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
 those DATA_BLOB columns this function currently returns FALSE. */
 UNIV_INTERN
 ibool
 dtype_is_binary_string_type(
 /*========================*/
 			/* out: TRUE if binary string type */
 	ulint	mtype,	/* in: main data type */
 	ulint	prtype);/* in: precise type */
 /*************************************************************************
 Checks if a type is a non-binary string type. That is, dtype_is_string_type is
 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
 For those DATA_BLOB columns this function currently returns TRUE. */
 UNIV_INTERN
 ibool
 dtype_is_non_binary_string_type(
 /*============================*/
 			/* out: TRUE if non-binary string type */
 	ulint	mtype,	/* in: main data type */
 	ulint	prtype);/* in: precise type */
 /*************************************************************************
 Sets a data type structure. */
 UNIV_INLINE
 void
 dtype_set(
 /*======*/
 	dtype_t*	type,	/* in: type struct to init */
 	ulint		mtype,	/* in: main data type */
 	ulint		prtype,	/* in: precise type */
 	ulint		len);	/* in: precision of type */
 /*************************************************************************
 Copies a data type structure. */
 UNIV_INLINE
 void
 dtype_copy(
 /*=======*/
 	dtype_t*	type1,	/* in: type struct to copy to */
 	const dtype_t*	type2);	/* in: type struct to copy from */
 /*************************************************************************
 Gets the SQL main data type. */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
 /*============*/
 	const dtype_t*	type);
 /*************************************************************************
 Gets the precise data type. */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
 /*=============*/
 	const dtype_t*	type);
 /*************************************************************************
 Compute the mbminlen and mbmaxlen members of a data type structure. */
 UNIV_INLINE
 void
 dtype_get_mblen(
 /*============*/
 	ulint	mtype,		/* in: main type */
 	ulint	prtype,		/* in: precise type (and collation) */
 	ulint*	mbminlen,	/* out: minimum length of a
 				multi-byte character */
 	ulint*	mbmaxlen);	/* out: maximum length of a
 				multi-byte character */
 /*************************************************************************
 Gets the MySQL charset-collation code for MySQL string types. */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
 /*===================*/
 	ulint	prtype);/* in: precise data type */
 /*************************************************************************
 Forms a precise type from the < 4.1.2 format precise type plus the
 charset-collation code. */
 UNIV_INTERN
 ulint
 dtype_form_prtype(
 /*==============*/
 	ulint	old_prtype,	/* in: the MySQL type code and the flags
 				DATA_BINARY_TYPE etc. */
 	ulint	charset_coll);	/* in: MySQL charset-collation code */
 /*************************************************************************
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later. */
 UNIV_INLINE
 ibool
 dtype_is_utf8(
 /*==========*/
 			/* out: TRUE if a subset of UTF-8 */
 	ulint	prtype);/* in: precise data type */
 /*************************************************************************
 Gets the type length. */
 UNIV_INLINE
 ulint
 dtype_get_len(
 /*==========*/
 	const dtype_t*	type);
 /*************************************************************************
 Gets the minimum length of a character, in bytes. */
 UNIV_INLINE
 ulint
 dtype_get_mbminlen(
 /*===============*/
 				/* out: minimum length of a char, in bytes,
 				or 0 if this is not a character type */
 	const dtype_t*	type);	/* in: type */
 /*************************************************************************
 Gets the maximum length of a character, in bytes. */
 UNIV_INLINE
 ulint
 dtype_get_mbmaxlen(
 /*===============*/
 				/* out: maximum length of a char, in bytes,
 				or 0 if this is not a character type */
 	const dtype_t*	type);	/* in: type */
 /*************************************************************************
 Gets the padding character code for the type. */
 UNIV_INLINE
 ulint
 dtype_get_pad_char(
 /*===============*/
 				/* out: padding character code, or
 				ULINT_UNDEFINED if no padding specified */
 	ulint	mtype,		/* in: main type */
 	ulint	prtype);	/* in: precise type */
 /***************************************************************************
 Returns the size of a fixed size data type, 0 if not a fixed size type. */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
 /*=====================*/
 				/* out: fixed size, or 0 */
 	ulint	mtype,		/* in: main type */
 	ulint	prtype,		/* in: precise type */
 	ulint	len,		/* in: length */
 	ulint	mbminlen,	/* in: minimum length of a multibyte char */
 	ulint	mbmaxlen);	/* in: maximum length of a multibyte char */
 /***************************************************************************
 Returns the minimum size of a data type. */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
 /*===================*/
 				/* out: minimum size */
 	ulint	mtype,		/* in: main type */
 	ulint	prtype,		/* in: precise type */
 	ulint	len,		/* in: length */
 	ulint	mbminlen,	/* in: minimum length of a multibyte char */
 	ulint	mbmaxlen);	/* in: maximum length of a multibyte char */
 /***************************************************************************
 Returns the maximum size of a data type. Note: types in system tables may be
 incomplete and return incorrect information. */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
 /*===================*/
 				/* out: maximum size */
 	ulint	mtype,		/* in: main type */
 	ulint	len);		/* in: length */
 /***************************************************************************
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
 For fixed length types it is the fixed length of the type, otherwise 0. */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
 /*====================*/
 				/* out: SQL null storage size
 				in ROW_FORMAT=REDUNDANT */
 	const dtype_t*	type);	/* in: type */
 /**************************************************************************
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. */
 UNIV_INLINE
 void
 dtype_read_for_order_and_null_size(
 /*===============================*/
 	dtype_t*	type,	/* in: type struct */
 	const byte*	buf);	/* in: buffer for the stored order info */
 /**************************************************************************
 Stores for a type the information which determines its alphabetical ordering
 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
 format. */
 UNIV_INLINE
 void
 dtype_new_store_for_order_and_null_size(
 /*====================================*/
 	byte*		buf,	/* in: buffer for
 				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 				bytes where we store the info */
 	const dtype_t*	type,	/* in: type struct */
 	ulint		prefix_len);/* in: prefix length to
 				replace type->len, or 0 */
 /**************************************************************************
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
 format. */
 UNIV_INLINE
 void
 dtype_new_read_for_order_and_null_size(
 /*===================================*/
 	dtype_t*	type,	/* in: type struct */
 	const byte*	buf);	/* in: buffer for stored type order info */
 /*************************************************************************
 Validates a data type structure. */
 UNIV_INTERN
 ibool
 dtype_validate(
 /*===========*/
 				/* out: TRUE if ok */
 	const dtype_t*	type);	/* in: type struct to validate */
 /*************************************************************************
 Prints a data type structure. */
 UNIV_INTERN
 void
 dtype_print(
 /*========*/
 	const dtype_t*	type);	/* in: type */
 /* Structure for an SQL data type.
 If you add fields to this structure, be sure to initialize them everywhere.
 This structure is initialized in the following functions:
 dtype_set()
 dtype_read_for_order_and_null_size()
 dtype_new_read_for_order_and_null_size()
 sym_tab_add_null_lit() */
 struct dtype_struct{
 	unsigned	mtype:8;	/* main data type */
 	unsigned	prtype:24;	/* precise type; MySQL data
 					type, charset code, flags to
 					indicate nullability,
 					signedness, whether this is a
 					binary string, whether this is
 					a true VARCHAR where MySQL
 					uses 2 bytes to store the length */
 	/* the remaining fields do not affect alphabetical ordering: */
 	unsigned	len:16;		/* length; for MySQL data this
 					is field->pack_length(),
 					except that for a >= 5.0.3
 					type true VARCHAR this is the
 					maximum byte length of the
 					string data (in addition to
 					the string, MySQL uses 1 or 2
 					bytes to store the string length) */
 	unsigned	mbminlen:2;	/* minimum length of a
 					character, in bytes */
 	unsigned	mbmaxlen:3;	/* maximum length of a
 					character, in bytes */
 };
 #ifndef UNIV_NONINL
 #include "data0type.ic"
 #endif
 #endif
--- a/include/data0type.ic
+++ b/include/data0type.ic
@@ -0,0 +1,571 @@
 /******************************************************
 Data types
 (c) 1996 Innobase Oy
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
 #include "mach0data.h"
 #include "ha_prototypes.h"
 /*************************************************************************
 Gets the MySQL charset-collation code for MySQL string types. */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
 /*===================*/
 	ulint	prtype)	/* in: precise data type */
 {
 	return((prtype >> 16) & 0xFFUL);
 }
 /*************************************************************************
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later. */
 UNIV_INLINE
 ibool
 dtype_is_utf8(
 /*==========*/
 			/* out: TRUE if a subset of UTF-8 */
 	ulint	prtype)	/* in: precise data type */
 {
 	/* These codes have been copied from strings/ctype-extra.c
 	and strings/ctype-utf8.c. */
 	switch (dtype_get_charset_coll(prtype)) {
 	case 11: /* ascii_general_ci */
 	case 65: /* ascii_bin */
 	case 33: /* utf8_general_ci */
 	case 83: /* utf8_bin */
 	case 254: /* utf8_general_cs */
 			return(TRUE);
 	}
 	return(FALSE);
 }
 /*************************************************************************
 Gets the MySQL type code from a dtype. */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
 /*=================*/
 				/* out: MySQL type code; this is NOT an InnoDB
 				type code! */
 	const dtype_t*	type)	/* in: type struct */
 {
 	return(type->prtype & 0xFFUL);
 }
 /*************************************************************************
 Compute the mbminlen and mbmaxlen members of a data type structure. */
 UNIV_INLINE
 void
 dtype_get_mblen(
 /*============*/
 	ulint	mtype,		/* in: main type */
 	ulint	prtype,		/* in: precise type (and collation) */
 	ulint*	mbminlen,	/* out: minimum length of a
 				multi-byte character */
 	ulint*	mbmaxlen)	/* out: maximum length of a
 				multi-byte character */
 {
 	if (dtype_is_string_type(mtype)) {
 #ifndef UNIV_HOTBACKUP
 		innobase_get_cset_width(dtype_get_charset_coll(prtype),
 					mbminlen, mbmaxlen);
 		ut_ad(*mbminlen <= *mbmaxlen);
 		ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
 		ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
 #else /* !UNIV_HOTBACKUP */
 		ut_a(mtype <= DATA_BINARY);
 		*mbminlen = *mbmaxlen = 1;
 #endif /* !UNIV_HOTBACKUP */
 	} else {
 		*mbminlen = *mbmaxlen = 0;
 	}
 }
 /*************************************************************************
 Compute the mbminlen and mbmaxlen members of a data type structure. */
 UNIV_INLINE
 void
 dtype_set_mblen(
 /*============*/
 	dtype_t*	type)	/* in/out: type */
 {
 	ulint	mbminlen;
 	ulint	mbmaxlen;
 	dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
 	type->mbminlen = mbminlen;
 	type->mbmaxlen = mbmaxlen;
 	ut_ad(dtype_validate(type));
 }
 /*************************************************************************
 Sets a data type structure. */
 UNIV_INLINE
 void
 dtype_set(
 /*======*/
 	dtype_t*	type,	/* in: type struct to init */
 	ulint		mtype,	/* in: main data type */
 	ulint		prtype,	/* in: precise type */
 	ulint		len)	/* in: precision of type */
 {
 	ut_ad(type);
 	ut_ad(mtype <= DATA_MTYPE_MAX);
 	type->mtype = mtype;
 	type->prtype = prtype;
 	type->len = len;
 	dtype_set_mblen(type);
 }
 /*************************************************************************
 Copies a data type structure. */
 UNIV_INLINE
 void
 dtype_copy(
 /*=======*/
 	dtype_t*	type1,	/* in: type struct to copy to */
 	const dtype_t*	type2)	/* in: type struct to copy from */
 {
 	*type1 = *type2;
 	ut_ad(dtype_validate(type1));
 }
 /*************************************************************************
 Gets the SQL main data type. */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
 /*============*/
 	const dtype_t*	type)
 {
 	ut_ad(type);
 	return(type->mtype);
 }
 /*************************************************************************
 Gets the precise data type. */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
 /*=============*/
 	const dtype_t*	type)
 {
 	ut_ad(type);
 	return(type->prtype);
 }
 /*************************************************************************
 Gets the type length. */
 UNIV_INLINE
 ulint
 dtype_get_len(
 /*==========*/
 	const dtype_t*	type)
 {
 	ut_ad(type);
 	return(type->len);
 }
 /*************************************************************************
 Gets the minimum length of a character, in bytes. */
 UNIV_INLINE
 ulint
 dtype_get_mbminlen(
 /*===============*/
 				/* out: minimum length of a char, in bytes,
 				or 0 if this is not a character type */
 	const dtype_t*	type)	/* in: type */
 {
 	ut_ad(type);
 	return(type->mbminlen);
 }
 /*************************************************************************
 Gets the maximum length of a character, in bytes. */
 UNIV_INLINE
 ulint
 dtype_get_mbmaxlen(
 /*===============*/
 				/* out: maximum length of a char, in bytes,
 				or 0 if this is not a character type */
 	const dtype_t*	type)	/* in: type */
 {
 	ut_ad(type);
 	return(type->mbmaxlen);
 }
 /*************************************************************************
 Gets the padding character code for a type. */
 UNIV_INLINE
 ulint
 dtype_get_pad_char(
 /*===============*/
 				/* out: padding character code, or
 				ULINT_UNDEFINED if no padding specified */
 	ulint	mtype,		/* in: main type */
 	ulint	prtype)		/* in: precise type */
 {
 	switch (mtype) {
 	case DATA_FIXBINARY:
 	case DATA_BINARY:
 		if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype)
 				  == DATA_MYSQL_BINARY_CHARSET_COLL)) {
 			/* Starting from 5.0.18, do not pad
 			VARBINARY or BINARY columns. */
 			return(ULINT_UNDEFINED);
 		}
 		/* Fall through */
 	case DATA_CHAR:
 	case DATA_VARCHAR:
 	case DATA_MYSQL:
 	case DATA_VARMYSQL:
 		/* Space is the padding character for all char and binary
 		strings, and starting from 5.0.3, also for TEXT strings. */
 		return(0x20);
 	case DATA_BLOB:
 		if (!(prtype & DATA_BINARY_TYPE)) {
 			return(0x20);
 		}
 		/* Fall through */
 	default:
 		/* No padding specified */
 		return(ULINT_UNDEFINED);
 	}
 }
 /**************************************************************************
 Stores for a type the information which determines its alphabetical ordering
 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
 format. */
 UNIV_INLINE
 void
 dtype_new_store_for_order_and_null_size(
 /*====================================*/
 	byte*		buf,	/* in: buffer for
 				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 				bytes where we store the info */
 	const dtype_t*	type,	/* in: type struct */
 	ulint		prefix_len)/* in: prefix length to
 				replace type->len, or 0 */
 {
 #if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 #error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
 #endif
 	ulint	len;
 	buf[0] = (byte)(type->mtype & 0xFFUL);
 	if (type->prtype & DATA_BINARY_TYPE) {
 		buf[0] = buf[0] | 128;
 	}
 	/* In versions < 4.1.2 we had:	if (type->prtype & DATA_NONLATIN1) {
 	buf[0] = buf[0] | 64;
 	}
 	*/
 	buf[1] = (byte)(type->prtype & 0xFFUL);
 	len = prefix_len ? prefix_len : type->len;
 	mach_write_to_2(buf + 2, len & 0xFFFFUL);
 	ut_ad(dtype_get_charset_coll(type->prtype) < 256);
 	mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
 	if (type->prtype & DATA_NOT_NULL) {
 		buf[4] |= 128;
 	}
 }
 /**************************************************************************
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. This is the < 4.1.x
 storage format. */
 UNIV_INLINE
 void
 dtype_read_for_order_and_null_size(
 /*===============================*/
 	dtype_t*	type,	/* in: type struct */
 	const byte*	buf)	/* in: buffer for stored type order info */
 {
 #if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
 # error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
 #endif
 	type->mtype = buf[0] & 63;
 	type->prtype = buf[1];
 	if (buf[0] & 128) {
 		type->prtype = type->prtype | DATA_BINARY_TYPE;
 	}
 	type->len = mach_read_from_2(buf + 2);
 	type->prtype = dtype_form_prtype(type->prtype,
 					 data_mysql_default_charset_coll);
 	dtype_set_mblen(type);
 }
 /**************************************************************************
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
 storage format. */
 UNIV_INLINE
 void
 dtype_new_read_for_order_and_null_size(
 /*===================================*/
 	dtype_t*	type,	/* in: type struct */
 	const byte*	buf)	/* in: buffer for stored type order info */
 {
 	ulint	charset_coll;
 #if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 #error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
 #endif
 	type->mtype = buf[0] & 63;
 	type->prtype = buf[1];
 	if (buf[0] & 128) {
 		type->prtype |= DATA_BINARY_TYPE;
 	}
 	if (buf[4] & 128) {
 		type->prtype |= DATA_NOT_NULL;
 	}
 	type->len = mach_read_from_2(buf + 2);
 	charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
 	if (dtype_is_string_type(type->mtype)) {
 		ut_a(charset_coll < 256);
 		if (charset_coll == 0) {
 			/* This insert buffer record was inserted with MySQL
 			version < 4.1.2, and the charset-collation code was not
 			explicitly stored to dtype->prtype at that time. It
 			must be the default charset-collation of this MySQL
 			installation. */
 			charset_coll = data_mysql_default_charset_coll;
 		}
 		type->prtype = dtype_form_prtype(type->prtype, charset_coll);
 	}
 	dtype_set_mblen(type);
 }
 /***************************************************************************
 Returns the size of a fixed size data type, 0 if not a fixed size type. */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
 /*=====================*/
 				/* out: fixed size, or 0 */
 	ulint	mtype,		/* in: main type */
 	ulint	prtype,		/* in: precise type */
 	ulint	len,		/* in: length */
 	ulint	mbminlen,	/* in: minimum length of a multibyte char */
 	ulint	mbmaxlen)	/* in: maximum length of a multibyte char */
 {
 	switch (mtype) {
 	case DATA_SYS:
 #ifdef UNIV_DEBUG
 		switch (prtype & DATA_MYSQL_TYPE_MASK) {
 		case DATA_ROW_ID:
 			ut_ad(len == DATA_ROW_ID_LEN);
 			break;
 		case DATA_TRX_ID:
 			ut_ad(len == DATA_TRX_ID_LEN);
 			break;
 		case DATA_ROLL_PTR:
 			ut_ad(len == DATA_ROLL_PTR_LEN);
 			break;
 		default:
 			ut_ad(0);
 			return(0);
 		}
 #endif /* UNIV_DEBUG */
 	case DATA_CHAR:
 	case DATA_FIXBINARY:
 	case DATA_INT:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
 		return(len);
 	case DATA_MYSQL:
 		if (prtype & DATA_BINARY_TYPE) {
 			return(len);
 		} else {
 #ifdef UNIV_HOTBACKUP
 			if (mbminlen == mbmaxlen) {
 				return(len);
 			}
 #else /* UNIV_HOTBACKUP */
 			/* We play it safe here and ask MySQL for
 			mbminlen and mbmaxlen.	Although
 			mbminlen and mbmaxlen are
 			initialized if and only if prtype
 			is (in one of the 3 functions in this file),
 			it could be that none of these functions
 			has been called. */
 			ulint	i_mbminlen, i_mbmaxlen;
 			innobase_get_cset_width(
 				dtype_get_charset_coll(prtype),
 				&i_mbminlen, &i_mbmaxlen);
 			if (UNIV_UNLIKELY(mbminlen != i_mbminlen)
 			    || UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr, "  InnoDB: "
 					"mbminlen=%lu, "
 					"mbmaxlen=%lu, "
 					"type->mbminlen=%lu, "
 					"type->mbmaxlen=%lu\n",
 					(ulong) i_mbminlen,
 					(ulong) i_mbmaxlen,
 					(ulong) mbminlen,
 					(ulong) mbmaxlen);
 			}
 			if (mbminlen == mbmaxlen) {
 				return(len);
 			}
 #endif /* !UNIV_HOTBACKUP */
 		}
 		/* fall through for variable-length charsets */
 	case DATA_VARCHAR:
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
 	case DATA_BLOB:
 		return(0);
 	default:
 		ut_error;
 	}
 	return(0);
 }
 /***************************************************************************
 Returns the minimum size of a data type. */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
 /*===================*/
 				/* out: minimum size */
 	ulint	mtype,		/* in: main type */
 	ulint	prtype,		/* in: precise type */
 	ulint	len,		/* in: length */
 	ulint	mbminlen,	/* in: minimum length of a multibyte char */
 	ulint	mbmaxlen)	/* in: maximum length of a multibyte char */
 {
 	switch (mtype) {
 	case DATA_SYS:
 #ifdef UNIV_DEBUG
 		switch (prtype & DATA_MYSQL_TYPE_MASK) {
 		case DATA_ROW_ID:
 			ut_ad(len == DATA_ROW_ID_LEN);
 			break;
 		case DATA_TRX_ID:
 			ut_ad(len == DATA_TRX_ID_LEN);
 			break;
 		case DATA_ROLL_PTR:
 			ut_ad(len == DATA_ROLL_PTR_LEN);
 			break;
 		default:
 			ut_ad(0);
 			return(0);
 		}
 #endif /* UNIV_DEBUG */
 	case DATA_CHAR:
 	case DATA_FIXBINARY:
 	case DATA_INT:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
 		return(len);
 	case DATA_MYSQL:
 		if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) {
 			return(len);
 		}
 		/* this is a variable-length character set */
 		ut_a(mbminlen > 0);
 		ut_a(mbmaxlen > mbminlen);
 		ut_a(len % mbmaxlen == 0);
 		return(len * mbminlen / mbmaxlen);
 	case DATA_VARCHAR:
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
 	case DATA_BLOB:
 		return(0);
 	default:
 		ut_error;
 	}
 	return(0);
 }
 /***************************************************************************
 Returns the maximum size of a data type. Note: types in system tables may be
 incomplete and return incorrect information. */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
 /*===================*/
 				/* out: maximum size */
 	ulint	mtype,		/* in: main type */
 	ulint	len)		/* in: length */
 {
 	switch (mtype) {
 	case DATA_SYS:
 	case DATA_CHAR:
 	case DATA_FIXBINARY:
 	case DATA_INT:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
 	case DATA_MYSQL:
 	case DATA_VARCHAR:
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
 		return(len);
 	case DATA_BLOB:
 		break;
 	default:
 		ut_error;
 	}
 	return(ULINT_MAX);
 }
 /***************************************************************************
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
 For fixed length types it is the fixed length of the type, otherwise 0. */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
 /*====================*/
 				/* out: SQL null storage size
 				in ROW_FORMAT=REDUNDANT */
 	const dtype_t*	type)	/* in: type */
 {
 	return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
 					type->mbminlen, type->mbmaxlen) > 0);
 }
--- a/include/data0types.h
+++ b/include/data0types.h
@@ -0,0 +1,19 @@
 /************************************************************************
 Some type definitions
 (c) 1994-2000 Innobase Oy
 Created 9/21/2000 Heikki Tuuri
 *************************************************************************/
 #ifndef data0types_h
 #define data0types_h
 /* SQL data field struct */
 typedef struct dfield_struct	dfield_t;
 /* SQL data tuple struct */
 typedef struct dtuple_struct	dtuple_t;
 #endif
--- a/include/db0err.h
+++ b/include/db0err.h
@@ -0,0 +1,88 @@
 /******************************************************
 Global error codes for the database
 (c) 1996 Innobase Oy
 Created 5/24/1996 Heikki Tuuri
 *******************************************************/
 #ifndef db0err_h
 #define db0err_h
 enum db_err {
 	DB_SUCCESS = 10,
 	/* The following are error codes */
 	DB_ERROR,
 	DB_OUT_OF_MEMORY,
 	DB_OUT_OF_FILE_SPACE,
 	DB_LOCK_WAIT,
 	DB_DEADLOCK,
 	DB_ROLLBACK,
 	DB_DUPLICATE_KEY,
 	DB_QUE_THR_SUSPENDED,
 	DB_MISSING_HISTORY,		/* required history data has been
 					deleted due to lack of space in
 					rollback segment */
 	DB_CLUSTER_NOT_FOUND = 30,
 	DB_TABLE_NOT_FOUND,
 	DB_MUST_GET_MORE_FILE_SPACE,	/* the database has to be stopped
 					and restarted with more file space */
 	DB_TABLE_IS_BEING_USED,
 	DB_TOO_BIG_RECORD,		/* a record in an index would not fit
 					on a compressed page, or it would
 					become bigger than 1/2 free space in
 					an uncompressed page frame */
 	DB_LOCK_WAIT_TIMEOUT,		/* lock wait lasted too long */
 	DB_NO_REFERENCED_ROW,		/* referenced key value not found
 					for a foreign key in an insert or
 					update of a row */
 	DB_ROW_IS_REFERENCED,		/* cannot delete or update a row
 					because it contains a key value
 					which is referenced */
 	DB_CANNOT_ADD_CONSTRAINT,	/* adding a foreign key constraint
 					to a table failed */
 	DB_CORRUPTION,			/* data structure corruption noticed */
 	DB_COL_APPEARS_TWICE_IN_INDEX,	/* InnoDB cannot handle an index
 					where same column appears twice */
 	DB_CANNOT_DROP_CONSTRAINT,	/* dropping a foreign key constraint
 					from a table failed */
 	DB_NO_SAVEPOINT,		/* no savepoint exists with the given
 					name */
 	DB_TABLESPACE_ALREADY_EXISTS,	/* we cannot create a new single-table
 					tablespace because a file of the same
 					name already exists */
 	DB_TABLESPACE_DELETED,		/* tablespace does not exist or is
 					being dropped right now */
 	DB_LOCK_TABLE_FULL,		/* lock structs have exhausted the
 					buffer pool (for big transactions,
 					InnoDB stores the lock structs in the
 					buffer pool) */
 	DB_FOREIGN_DUPLICATE_KEY,	/* foreign key constraints
 					activated by the operation would
 					lead to a duplicate key in some
 					table */
 	DB_TOO_MANY_CONCURRENT_TRXS,	/* when InnoDB runs out of the
 					preconfigured undo slots, this can
 					only happen when there are too many
 					concurrent transactions */
 	DB_UNSUPPORTED,			/* when InnoDB sees any artefact or
 					a feature that it can't recoginize or
 					work with e.g., FT indexes created by
 					a later version of the engine. */
 	DB_PRIMARY_KEY_IS_NULL,		/* a column in the PRIMARY KEY
 					was found to be NULL */
 	/* The following are partial failure codes */
 	DB_FAIL = 1000,
 	DB_OVERFLOW,
 	DB_UNDERFLOW,
 	DB_STRONG_FAIL,
 	DB_ZIP_OVERFLOW,
 	DB_RECORD_NOT_FOUND = 1500,
 	DB_END_OF_INDEX
 };
 #endif
--- a/include/dict0boot.h
+++ b/include/dict0boot.h
@@ -0,0 +1,134 @@
 /******************************************************
 Data dictionary creation and booting
 (c) 1996 Innobase Oy
 Created 4/18/1996 Heikki Tuuri
 *******************************************************/
 #ifndef dict0boot_h
 #define dict0boot_h
 #include "univ.i"
 #include "mtr0mtr.h"
 #include "mtr0log.h"
 #include "ut0byte.h"
 #include "buf0buf.h"
 #include "fsp0fsp.h"
 #include "dict0dict.h"
 typedef	byte	dict_hdr_t;
 /**************************************************************************
 Gets a pointer to the dictionary header and x-latches its page. */
 UNIV_INTERN
 dict_hdr_t*
 dict_hdr_get(
 /*=========*/
 			/* out: pointer to the dictionary header,
 			page x-latched */
 	mtr_t*	mtr);	/* in: mtr */
 /**************************************************************************
 Returns a new row, table, index, or tree id. */
 UNIV_INTERN
 dulint
 dict_hdr_get_new_id(
 /*================*/
 			/* out: the new id */
 	ulint	type);	/* in: DICT_HDR_ROW_ID, ... */
 /**************************************************************************
 Returns a new row id. */
 UNIV_INLINE
 dulint
 dict_sys_get_new_row_id(void);
 /*=========================*/
 			/* out: the new id */
 /**************************************************************************
 Reads a row id from a record or other 6-byte stored form. */
 UNIV_INLINE
 dulint
 dict_sys_read_row_id(
 /*=================*/
 			/* out: row id */
 	byte*	field);	/* in: record field */
 /**************************************************************************
 Writes a row id to a record or other 6-byte stored form. */
 UNIV_INLINE
 void
 dict_sys_write_row_id(
 /*==================*/
 	byte*	field,	/* in: record field */
 	dulint	row_id);/* in: row id */
 /*********************************************************************
 Initializes the data dictionary memory structures when the database is
 started. This function is also called when the data dictionary is created. */
 UNIV_INTERN
 void
 dict_boot(void);
 /*===========*/
 /*********************************************************************
 Creates and initializes the data dictionary at the database creation. */
 UNIV_INTERN
 void
 dict_create(void);
 /*=============*/
 /* Space id and page no where the dictionary header resides */
 #define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
 #define	DICT_HDR_PAGE_NO	FSP_DICT_HDR_PAGE_NO
 /* The ids for the basic system tables and their indexes */
 #define DICT_TABLES_ID		ut_dulint_create(0, 1)
 #define DICT_COLUMNS_ID		ut_dulint_create(0, 2)
 #define DICT_INDEXES_ID		ut_dulint_create(0, 3)
 #define DICT_FIELDS_ID		ut_dulint_create(0, 4)
 /* The following is a secondary index on SYS_TABLES */
 #define DICT_TABLE_IDS_ID	ut_dulint_create(0, 5)
 #define	DICT_HDR_FIRST_ID	10	/* the ids for tables etc. start
 					from this number, except for basic
 					system tables and their above defined
 					indexes; ibuf tables and indexes are
 					assigned as the id the number
 					DICT_IBUF_ID_MIN plus the space id */
 #define DICT_IBUF_ID_MIN	ut_dulint_create(0xFFFFFFFFUL, 0)
 /* The offset of the dictionary header on the page */
 #define	DICT_HDR		FSEG_PAGE_DATA
 /*-------------------------------------------------------------*/
 /* Dictionary header offsets */
 #define DICT_HDR_ROW_ID		0	/* The latest assigned row id */
 #define	DICT_HDR_TABLE_ID	8	/* The latest assigned table id */
 #define	DICT_HDR_INDEX_ID	16	/* The latest assigned index id */
 #define	DICT_HDR_MIX_ID		24	/* Obsolete, always 0. */
 #define	DICT_HDR_TABLES		32	/* Root of the table index tree */
 #define	DICT_HDR_TABLE_IDS	36	/* Root of the table index tree */
 #define	DICT_HDR_COLUMNS	40	/* Root of the column index tree */
 #define	DICT_HDR_INDEXES	44	/* Root of the index index tree */
 #define	DICT_HDR_FIELDS		48	/* Root of the index field
 					index tree */
 #define DICT_HDR_FSEG_HEADER	56	/* Segment header for the tablespace
 					segment into which the dictionary
 					header is created */
 /*-------------------------------------------------------------*/
 /* The field number of the page number field in the sys_indexes table
 clustered index */
 #define DICT_SYS_INDEXES_PAGE_NO_FIELD	 8
 #define DICT_SYS_INDEXES_SPACE_NO_FIELD	 7
 #define DICT_SYS_INDEXES_TYPE_FIELD	 6
 /* When a row id which is zero modulo this number (which must be a power of
 two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
 updated */
 #define DICT_HDR_ROW_ID_WRITE_MARGIN	256
 #ifndef UNIV_NONINL
 #include "dict0boot.ic"
 #endif
 #endif
--- a/include/dict0boot.ic
+++ b/include/dict0boot.ic
@@ -0,0 +1,76 @@
 /******************************************************
 Data dictionary creation and booting
 (c) 1996 Innobase Oy
 Created 4/18/1996 Heikki Tuuri
 *******************************************************/
 /**************************************************************************
 Writes the current value of the row id counter to the dictionary header file
 page. */
 UNIV_INTERN
 void
 dict_hdr_flush_row_id(void);
 /*=======================*/
 /**************************************************************************
 Returns a new row id. */
 UNIV_INLINE
 dulint
 dict_sys_get_new_row_id(void)
 /*=========================*/
 			/* out: the new id */
 {
 	dulint	id;
 	mutex_enter(&(dict_sys->mutex));
 	id = dict_sys->row_id;
 	if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
 		dict_hdr_flush_row_id();
 	}
 	UT_DULINT_INC(dict_sys->row_id);
 	mutex_exit(&(dict_sys->mutex));
 	return(id);
 }
 /**************************************************************************
 Reads a row id from a record or other 6-byte stored form. */
 UNIV_INLINE
 dulint
 dict_sys_read_row_id(
 /*=================*/
 			/* out: row id */
 	byte*	field)	/* in: record field */
 {
 #if DATA_ROW_ID_LEN != 6
 # error "DATA_ROW_ID_LEN != 6"
 #endif
 	return(mach_read_from_6(field));
 }
 /**************************************************************************
 Writes a row id to a record or other 6-byte stored form. */
 UNIV_INLINE
 void
 dict_sys_write_row_id(
 /*==================*/
 	byte*	field,	/* in: record field */
 	dulint	row_id)	/* in: row id */
 {
 #if DATA_ROW_ID_LEN != 6
 # error "DATA_ROW_ID_LEN != 6"
 #endif
 	mach_write_to_6(field, row_id);
 }
--- a/include/dict0crea.h
+++ b/include/dict0crea.h
@@ -0,0 +1,183 @@
 /******************************************************
 Database object creation
 (c) 1996 Innobase Oy
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
 #ifndef dict0crea_h
 #define dict0crea_h
 #include "univ.i"
 #include "dict0types.h"
 #include "dict0dict.h"
 #include "que0types.h"
 #include "row0types.h"
 #include "mtr0mtr.h"
 /*************************************************************************
 Creates a table create graph. */
 UNIV_INTERN
 tab_node_t*
 tab_create_graph_create(
 /*====================*/
 				/* out, own: table create node */
 	dict_table_t*	table,	/* in: table to create, built as a memory data
 				structure */
 	mem_heap_t*	heap);	/* in: heap where created */
 /*************************************************************************
 Creates an index create graph. */
 UNIV_INTERN
 ind_node_t*
 ind_create_graph_create(
 /*====================*/
 				/* out, own: index create node */
 	dict_index_t*	index,	/* in: index to create, built as a memory data
 				structure */
 	mem_heap_t*	heap);	/* in: heap where created */
 /***************************************************************
 Creates a table. This is a high-level function used in SQL execution graphs. */
 UNIV_INTERN
 que_thr_t*
 dict_create_table_step(
 /*===================*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /***************************************************************
 Creates an index. This is a high-level function used in SQL execution
 graphs. */
 UNIV_INTERN
 que_thr_t*
 dict_create_index_step(
 /*===================*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /***********************************************************************
 Truncates the index tree associated with a row in SYS_INDEXES table. */
 UNIV_INTERN
 ulint
 dict_truncate_index_tree(
 /*=====================*/
 				/* out: new root page number, or
 				FIL_NULL on failure */
 	dict_table_t*	table,	/* in: the table the index belongs to */
 	ulint		space,	/* in: 0=truncate,
 				nonzero=create the index tree in the
 				given tablespace */
 	btr_pcur_t*	pcur,	/* in/out: persistent cursor pointing to
 				record in the clustered index of
 				SYS_INDEXES table. The cursor may be
 				repositioned in this call. */
 	mtr_t*		mtr);	/* in: mtr having the latch
 				on the record page. The mtr may be
 				committed and restarted in this call. */
 /***********************************************************************
 Drops the index tree associated with a row in SYS_INDEXES table. */
 UNIV_INTERN
 void
 dict_drop_index_tree(
 /*=================*/
 	rec_t*	rec,	/* in/out: record in the clustered index
 			of SYS_INDEXES table */
 	mtr_t*	mtr);	/* in: mtr having the latch on the record page */
 #ifndef UNIV_HOTBACKUP
 /********************************************************************
 Creates the foreign key constraints system tables inside InnoDB
 at database creation or database start if they are not found or are
 not of the right form. */
 UNIV_INTERN
 ulint
 dict_create_or_check_foreign_constraint_tables(void);
 /*================================================*/
 				/* out: DB_SUCCESS or error code */
 /************************************************************************
 Adds foreign key definitions to data dictionary tables in the database. We
 look at table->foreign_list, and also generate names to constraints that were
 not named by the user. A generated constraint has a name of the format
 databasename/tablename_ibfk_<number>, where the numbers start from 1, and are
 given locally for this table, that is, the number is not global, as in the
 old format constraints < 4.0.18 it used to be. */
 UNIV_INTERN
 ulint
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
 				/* out: error code or DB_SUCCESS */
 	ulint		start_id,/* in: if we are actually doing ALTER TABLE
 				ADD CONSTRAINT, we want to generate constraint
 				numbers which are bigger than in the table so
 				far; we number the constraints from
 				start_id + 1 up; start_id should be set to 0 if
 				we are creating a new table, or if the table
 				so far has no constraints for which the name
 				was generated here */
 	dict_table_t*	table,	/* in: table */
 	trx_t*		trx);	/* in: transaction */
 #endif /* !UNIV_HOTBACKUP */
 /* Table create node structure */
 struct tab_node_struct{
 	que_common_t	common;	/* node type: QUE_NODE_TABLE_CREATE */
 	dict_table_t*	table;	/* table to create, built as a memory data
 				structure with dict_mem_... functions */
 	ins_node_t*	tab_def; /* child node which does the insert of
 				the table definition; the row to be inserted
 				is built by the parent node  */
 	ins_node_t*	col_def; /* child node which does the inserts of
 				the column definitions; the row to be inserted
 				is built by the parent node  */
 	commit_node_t*	commit_node;
 				/* child node which performs a commit after
 				a successful table creation */
 	/*----------------------*/
 	/* Local storage for this graph node */
 	ulint		state;	/* node execution state */
 	ulint		col_no;	/* next column definition to insert */
 	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
 };
 /* Table create node states */
 #define	TABLE_BUILD_TABLE_DEF	1
 #define	TABLE_BUILD_COL_DEF	2
 #define	TABLE_COMMIT_WORK	3
 #define	TABLE_ADD_TO_CACHE	4
 #define	TABLE_COMPLETED		5
 /* Index create node struct */
 struct ind_node_struct{
 	que_common_t	common;	/* node type: QUE_NODE_INDEX_CREATE */
 	dict_index_t*	index;	/* index to create, built as a memory data
 				structure with dict_mem_... functions */
 	ins_node_t*	ind_def; /* child node which does the insert of
 				the index definition; the row to be inserted
 				is built by the parent node  */
 	ins_node_t*	field_def; /* child node which does the inserts of
 				the field definitions; the row to be inserted
 				is built by the parent node  */
 	commit_node_t*	commit_node;
 				/* child node which performs a commit after
 				a successful index creation */
 	/*----------------------*/
 	/* Local storage for this graph node */
 	ulint		state;	/* node execution state */
 	ulint		page_no;/* root page number of the index */
 	dict_table_t*	table;	/* table which owns the index */
 	dtuple_t*	ind_row;/* index definition row built */
 	ulint		field_no;/* next field definition to insert */
 	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
 };
 /* Index create node states */
 #define	INDEX_BUILD_INDEX_DEF	1
 #define	INDEX_BUILD_FIELD_DEF	2
 #define	INDEX_CREATE_INDEX_TREE	3
 #define	INDEX_COMMIT_WORK	4
 #define	INDEX_ADD_TO_CACHE	5
 #ifndef UNIV_NONINL
 #include "dict0crea.ic"
 #endif
 #endif
--- a/include/dict0crea.ic
+++ b/include/dict0crea.ic
@@ -0,0 +1,8 @@
 /******************************************************
 Database object creation
 (c) 1996 Innobase Oy
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
--- a/include/dict0dict.h
+++ b/include/dict0dict.h
--- a/include/dict0dict.ic
+++ b/include/dict0dict.ic
@@ -0,0 +1,778 @@
 /**********************************************************************
 Data dictionary system
 (c) 1996 Innobase Oy
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
 #include "dict0load.h"
 #include "rem0types.h"
 #include "data0type.h"
 /*************************************************************************
 Gets the column data type. */
 UNIV_INLINE
 void
 dict_col_copy_type(
 /*===============*/
 	const dict_col_t*	col,	/* in: column */
 	dtype_t*		type)	/* out: data type */
 {
 	ut_ad(col && type);
 	type->mtype = col->mtype;
 	type->prtype = col->prtype;
 	type->len = col->len;
 	type->mbminlen = col->mbminlen;
 	type->mbmaxlen = col->mbmaxlen;
 }
 #ifdef UNIV_DEBUG
 /*************************************************************************
 Assert that a column and a data type match. */
 UNIV_INLINE
 ibool
 dict_col_type_assert_equal(
 /*=======================*/
 					/* out: TRUE */
 	const dict_col_t*	col,	/* in: column */
 	const dtype_t*		type)	/* in: data type */
 {
 	ut_ad(col);
 	ut_ad(type);
 	ut_ad(col->mtype == type->mtype);
 	ut_ad(col->prtype == type->prtype);
 	ut_ad(col->len == type->len);
 	ut_ad(col->mbminlen == type->mbminlen);
 	ut_ad(col->mbmaxlen == type->mbmaxlen);
 	return(TRUE);
 }
 #endif /* UNIV_DEBUG */
 /***************************************************************************
 Returns the minimum size of the column. */
 UNIV_INLINE
 ulint
 dict_col_get_min_size(
 /*==================*/
 					/* out: minimum size */
 	const dict_col_t*	col)	/* in: column */
 {
 	return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
 				      col->mbminlen, col->mbmaxlen));
 }
 /***************************************************************************
 Returns the maximum size of the column. */
 UNIV_INLINE
 ulint
 dict_col_get_max_size(
 /*==================*/
 					/* out: maximum size */
 	const dict_col_t*	col)	/* in: column */
 {
 	return(dtype_get_max_size_low(col->mtype, col->len));
 }
 /***************************************************************************
 Returns the size of a fixed size column, 0 if not a fixed size column. */
 UNIV_INLINE
 ulint
 dict_col_get_fixed_size(
 /*====================*/
 					/* out: fixed size, or 0 */
 	const dict_col_t*	col)	/* in: column */
 {
 	return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
 					col->mbminlen, col->mbmaxlen));
 }
 /***************************************************************************
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
 For fixed length types it is the fixed length of the type, otherwise 0. */
 UNIV_INLINE
 ulint
 dict_col_get_sql_null_size(
 /*=======================*/
 					/* out: SQL null storage size
 					in ROW_FORMAT=REDUNDANT */
 	const dict_col_t*	col)	/* in: column */
 {
 	return(dict_col_get_fixed_size(col));
 }
 /*************************************************************************
 Gets the column number. */
 UNIV_INLINE
 ulint
 dict_col_get_no(
 /*============*/
 	const dict_col_t*	col)
 {
 	ut_ad(col);
 	return(col->ind);
 }
 /*************************************************************************
 Gets the column position in the clustered index. */
 UNIV_INLINE
 ulint
 dict_col_get_clust_pos(
 /*===================*/
 	const dict_col_t*	col,		/* in: table column */
 	const dict_index_t*	clust_index)	/* in: clustered index */
 {
 	ulint	i;
 	ut_ad(col);
 	ut_ad(clust_index);
 	ut_ad(dict_index_is_clust(clust_index));
 	for (i = 0; i < clust_index->n_def; i++) {
 		const dict_field_t*	field = &clust_index->fields[i];
 		if (!field->prefix_len && field->col == col) {
 			return(i);
 		}
 	}
 	return(ULINT_UNDEFINED);
 }
 #ifdef UNIV_DEBUG
 /************************************************************************
 Gets the first index on the table (the clustered index). */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_first_index(
 /*=======================*/
 					/* out: index, NULL if none exists */
 	const dict_table_t*	table)	/* in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
 }
 /************************************************************************
 Gets the next index on the table. */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_next_index(
 /*======================*/
 					/* out: index, NULL if none left */
 	const dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
 }
 #endif /* UNIV_DEBUG */
 /************************************************************************
 Check whether the index is the clustered index. */
 UNIV_INLINE
 ulint
 dict_index_is_clust(
 /*================*/
 					/* out: nonzero for clustered index,
 					zero for other indexes */
 	const dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED));
 }
 /************************************************************************
 Check whether the index is unique. */
 UNIV_INLINE
 ulint
 dict_index_is_unique(
 /*=================*/
 					/* out: nonzero for unique index,
 					zero for other indexes */
 	const dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(UNIV_UNLIKELY(index->type & DICT_UNIQUE));
 }
 /************************************************************************
 Check whether the index is the insert buffer tree. */
 UNIV_INLINE
 ulint
 dict_index_is_ibuf(
 /*===============*/
 					/* out: nonzero for insert buffer,
 					zero for other indexes */
 	const dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(UNIV_UNLIKELY(index->type & DICT_IBUF));
 }
 /************************************************************************
 Gets the number of user-defined columns in a table in the dictionary
 cache. */
 UNIV_INLINE
 ulint
 dict_table_get_n_user_cols(
 /*=======================*/
 					/* out: number of user-defined
 					(e.g., not ROW_ID)
 					columns of a table */
 	const dict_table_t*	table)	/* in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	return(table->n_cols - DATA_N_SYS_COLS);
 }
 /************************************************************************
 Gets the number of system columns in a table in the dictionary cache. */
 UNIV_INLINE
 ulint
 dict_table_get_n_sys_cols(
 /*======================*/
 					/* out: number of system (e.g.,
 					ROW_ID) columns of a table */
 	const dict_table_t*	table __attribute__((unused)))	/* in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_ad(table->cached);
 	return(DATA_N_SYS_COLS);
 }
 /************************************************************************
 Gets the number of all columns (also system) in a table in the dictionary
 cache. */
 UNIV_INLINE
 ulint
 dict_table_get_n_cols(
 /*==================*/
 					/* out: number of columns of a table */
 	const dict_table_t*	table)	/* in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	return(table->n_cols);
 }
 #ifdef UNIV_DEBUG
 /************************************************************************
 Gets the nth column of a table. */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_nth_col(
 /*===================*/
 					/* out: pointer to column object */
 	const dict_table_t*	table,	/* in: table */
 	ulint			pos)	/* in: position of column */
 {
 	ut_ad(table);
 	ut_ad(pos < table->n_def);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	return((dict_col_t*) (table->cols) + pos);
 }
 /************************************************************************
 Gets the given system column of a table. */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_sys_col(
 /*===================*/
 					/* out: pointer to column object */
 	const dict_table_t*	table,	/* in: table */
 	ulint			sys)	/* in: DATA_ROW_ID, ... */
 {
 	dict_col_t*	col;
 	ut_ad(table);
 	ut_ad(sys < DATA_N_SYS_COLS);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	col = dict_table_get_nth_col(table, table->n_cols
 				     - DATA_N_SYS_COLS + sys);
 	ut_ad(col->mtype == DATA_SYS);
 	ut_ad(col->prtype == (sys | DATA_NOT_NULL));
 	return(col);
 }
 #endif /* UNIV_DEBUG */
 /************************************************************************
 Gets the given system column number of a table. */
 UNIV_INLINE
 ulint
 dict_table_get_sys_col_no(
 /*======================*/
 					/* out: column number */
 	const dict_table_t*	table,	/* in: table */
 	ulint			sys)	/* in: DATA_ROW_ID, ... */
 {
 	ut_ad(table);
 	ut_ad(sys < DATA_N_SYS_COLS);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	return(table->n_cols - DATA_N_SYS_COLS + sys);
 }
 /************************************************************************
 Check whether the table uses the compact page format. */
 UNIV_INLINE
 ibool
 dict_table_is_comp(
 /*===============*/
 					/* out: TRUE if table uses the
 					compact page format */
 	const dict_table_t*	table)	/* in: table */
 {
 	ut_ad(table);
 #if DICT_TF_COMPACT != TRUE
 #error
 #endif
 	return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
 }
 /************************************************************************
 Determine the file format of a table. */
 UNIV_INLINE
 ulint
 dict_table_get_format(
 /*==================*/
 					/* out: file format version */
 	const dict_table_t*	table)	/* in: table */
 {
 	ut_ad(table);
 	return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT);
 }
 /************************************************************************
 Determine the file format of a table. */
 UNIV_INLINE
 void
 dict_table_set_format(
 /*==================*/
 	dict_table_t*	table,	/* in/out: table */
 	ulint		format)	/* in: file format version */
 {
 	ut_ad(table);
 	table->flags = (table->flags & ~DICT_TF_FORMAT_MASK)
 		| (format << DICT_TF_FORMAT_SHIFT);
 }
 /************************************************************************
 Extract the compressed page size from table flags. */
 UNIV_INLINE
 ulint
 dict_table_flags_to_zip_size(
 /*=========================*/
 			/* out: compressed page size,
 			or 0 if not compressed */
 	ulint	flags)	/* in: flags */
 {
 	ulint	zip_size = flags & DICT_TF_ZSSIZE_MASK;
 	if (UNIV_UNLIKELY(zip_size)) {
 		zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
 			 << (zip_size >> DICT_TF_ZSSIZE_SHIFT));
 		ut_ad(zip_size <= UNIV_PAGE_SIZE);
 	}
 	return(zip_size);
 }
 /************************************************************************
 Check whether the table uses the compressed compact page format. */
 UNIV_INLINE
 ulint
 dict_table_zip_size(
 /*================*/
 					/* out: compressed page size,
 					or 0 if not compressed */
 	const dict_table_t*	table)	/* in: table */
 {
 	ut_ad(table);
 	return(dict_table_flags_to_zip_size(table->flags));
 }
 /************************************************************************
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system. */
 UNIV_INLINE
 ulint
 dict_index_get_n_fields(
 /*====================*/
 					/* out: number of fields */
 	const dict_index_t*	index)	/* in: an internal
 					representation of index (in
 					the dictionary cache) */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(index->n_fields);
 }
 /************************************************************************
 Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
 we do not take multiversioning into account: in the B-tree use the value
 returned by dict_index_get_n_unique_in_tree. */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique(
 /*====================*/
 					/* out: number of fields */
 	const dict_index_t*	index)	/* in: an internal representation
 					of index (in the dictionary cache) */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	ut_ad(index->cached);
 	return(index->n_uniq);
 }
 /************************************************************************
 Gets the number of fields in the internal representation of an index
 which uniquely determine the position of an index entry in the index, if
 we also take multiversioning into account. */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique_in_tree(
 /*============================*/
 					/* out: number of fields */
 	const dict_index_t*	index)	/* in: an internal representation
 					of index (in the dictionary cache) */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	ut_ad(index->cached);
 	if (dict_index_is_clust(index)) {
 		return(dict_index_get_n_unique(index));
 	}
 	return(dict_index_get_n_fields(index));
 }
 /************************************************************************
 Gets the number of user-defined ordering fields in the index. In the internal
 representation of clustered indexes we add the row id to the ordering fields
 to make a clustered index unique, but this function returns the number of
 fields the user defined in the index as ordering fields. */
 UNIV_INLINE
 ulint
 dict_index_get_n_ordering_defined_by_user(
 /*======================================*/
 					/* out: number of fields */
 	const dict_index_t*	index)	/* in: an internal representation
 					of index (in the dictionary cache) */
 {
 	return(index->n_user_defined_cols);
 }
 #ifdef UNIV_DEBUG
 /************************************************************************
 Gets the nth field of an index. */
 UNIV_INLINE
 dict_field_t*
 dict_index_get_nth_field(
 /*=====================*/
 					/* out: pointer to field object */
 	const dict_index_t*	index,	/* in: index */
 	ulint			pos)	/* in: position of field */
 {
 	ut_ad(index);
 	ut_ad(pos < index->n_def);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return((dict_field_t*) (index->fields) + pos);
 }
 #endif /* UNIV_DEBUG */
 /************************************************************************
 Returns the position of a system column in an index. */
 UNIV_INLINE
 ulint
 dict_index_get_sys_col_pos(
 /*=======================*/
 					/* out: position,
 					ULINT_UNDEFINED if not contained */
 	const dict_index_t*	index,	/* in: index */
 	ulint			type)	/* in: DATA_ROW_ID, ... */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	ut_ad(!(index->type & DICT_UNIVERSAL));
 	if (dict_index_is_clust(index)) {
 		return(dict_col_get_clust_pos(
 			       dict_table_get_sys_col(index->table, type),
 			       index));
 	}
 	return(dict_index_get_nth_col_pos(
 		       index, dict_table_get_sys_col_no(index->table, type)));
 }
 /*************************************************************************
 Gets the field column. */
 UNIV_INLINE
 const dict_col_t*
 dict_field_get_col(
 /*===============*/
 	const dict_field_t*	field)
 {
 	ut_ad(field);
 	return(field->col);
 }
 /************************************************************************
 Gets pointer to the nth column in an index. */
 UNIV_INLINE
 const dict_col_t*
 dict_index_get_nth_col(
 /*===================*/
 					/* out: column */
 	const dict_index_t*	index,	/* in: index */
 	ulint			pos)	/* in: position of the field */
 {
 	return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
 }
 /************************************************************************
 Gets the column number the nth field in an index. */
 UNIV_INLINE
 ulint
 dict_index_get_nth_col_no(
 /*======================*/
 					/* out: column number */
 	const dict_index_t*	index,	/* in: index */
 	ulint			pos)	/* in: position of the field */
 {
 	return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
 }
 /************************************************************************
 Returns the minimum data size of an index record. */
 UNIV_INLINE
 ulint
 dict_index_get_min_size(
 /*====================*/
 					/* out: minimum data size in bytes */
 	const dict_index_t*	index)	/* in: index */
 {
 	ulint	n	= dict_index_get_n_fields(index);
 	ulint	size	= 0;
 	while (n--) {
 		size += dict_col_get_min_size(dict_index_get_nth_col(index,
 								     n));
 	}
 	return(size);
 }
 /*************************************************************************
 Gets the space id of the root of the index tree. */
 UNIV_INLINE
 ulint
 dict_index_get_space(
 /*=================*/
 					/* out: space id */
 	const dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(index->space);
 }
 /*************************************************************************
 Sets the space id of the root of the index tree. */
 UNIV_INLINE
 void
 dict_index_set_space(
 /*=================*/
 	dict_index_t*	index,	/* in/out: index */
 	ulint		space)	/* in: space id */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	index->space = space;
 }
 /*************************************************************************
 Gets the page number of the root of the index tree. */
 UNIV_INLINE
 ulint
 dict_index_get_page(
 /*================*/
 					/* out: page number */
 	const dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(index->page);
 }
 /*************************************************************************
 Sets the page number of the root of index tree. */
 UNIV_INLINE
 void
 dict_index_set_page(
 /*================*/
 	dict_index_t*	index,	/* in/out: index */
 	ulint		page)	/* in: page number */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	index->page = page;
 }
 /*************************************************************************
 Gets the type of the index tree. */
 UNIV_INLINE
 ulint
 dict_index_get_type(
 /*================*/
 					/* out: type */
 	const dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(index->type);
 }
 /*************************************************************************
 Gets the read-write lock of the index tree. */
 UNIV_INLINE
 rw_lock_t*
 dict_index_get_lock(
 /*================*/
 				/* out: read-write lock */
 	dict_index_t*	index)	/* in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	return(&(index->lock));
 }
 /************************************************************************
 Returns free space reserved for future updates of records. This is
 relevant only in the case of many consecutive inserts, as updates
 which make the records bigger might fragment the index. */
 UNIV_INLINE
 ulint
 dict_index_get_space_reserve(void)
 /*==============================*/
 				/* out: number of free bytes on page,
 				reserved for updates */
 {
 	return(UNIV_PAGE_SIZE / 16);
 }
 /**************************************************************************
 Checks if a table is in the dictionary cache. */
 UNIV_INLINE
 dict_table_t*
 dict_table_check_if_in_cache_low(
 /*=============================*/
 					/* out: table, NULL if not found */
 	const char*	table_name)	/* in: table name */
 {
 	dict_table_t*	table;
 	ulint		table_fold;
 	ut_ad(table_name);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	/* Look for the table name in the hash table */
 	table_fold = ut_fold_string(table_name);
 	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
 		    dict_table_t*, table, !strcmp(table->name, table_name));
 	return(table);
 }
 /**************************************************************************
 Gets a table; loads it to the dictionary cache if necessary. A low-level
 function. */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_low(
 /*===============*/
 					/* out: table, NULL if not found */
 	const char*	table_name)	/* in: table name */
 {
 	dict_table_t*	table;
 	ut_ad(table_name);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	table = dict_table_check_if_in_cache_low(table_name);
 	if (table == NULL) {
 		table = dict_load_table(table_name);
 	}
 	return(table);
 }
 /**************************************************************************
 Returns a table object based on table id. */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_on_id_low(
 /*=====================*/
 				/* out: table, NULL if does not exist */
 	dulint	table_id)	/* in: table id */
 {
 	dict_table_t*	table;
 	ulint		fold;
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	/* Look for the table name in the hash table */
 	fold = ut_fold_dulint(table_id);
 	HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
 		    dict_table_t*, table, !ut_dulint_cmp(table->id, table_id));
 	if (table == NULL) {
 		table = dict_load_table_on_id(table_id);
 	}
 	/* TODO: should get the type information from MySQL */
 	return(table);
 }
--- a/include/dict0load.h
+++ b/include/dict0load.h
@@ -0,0 +1,103 @@
 /******************************************************
 Loads to the memory cache database object definitions
 from dictionary tables
 (c) 1996 Innobase Oy
 Created 4/24/1996 Heikki Tuuri
 *******************************************************/
 #ifndef dict0load_h
 #define dict0load_h
 #include "univ.i"
 #include "dict0types.h"
 #include "ut0byte.h"
 #include "mem0mem.h"
 /************************************************************************
 In a crash recovery we already have all the tablespace objects created.
 This function compares the space id information in the InnoDB data dictionary
 to what we already read with fil_load_single_table_tablespaces().
 In a normal startup, we create the tablespace objects for every table in
 InnoDB's data dictionary, if the corresponding .ibd file exists.
 We also scan the biggest space id, and store it to fil_system. */
 UNIV_INTERN
 void
 dict_check_tablespaces_and_store_max_id(
 /*====================================*/
 	ibool	in_crash_recovery);	/* in: are we doing a crash recovery */
 /************************************************************************
 Finds the first table name in the given database. */
 UNIV_INTERN
 char*
 dict_get_first_table_name_in_db(
 /*============================*/
 				/* out, own: table name, NULL if
 				does not exist; the caller must free
 				the memory in the string! */
 	const char*	name);	/* in: database name which ends to '/' */
 /************************************************************************
 Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
 a foreign key references columns in this table. */
 UNIV_INTERN
 dict_table_t*
 dict_load_table(
 /*============*/
 				/* out: table, NULL if does not exist;
 				if the table is stored in an .ibd file,
 				but the file does not exist,
 				then we set the ibd_file_missing flag TRUE
 				in the table object we return */
 	const char*	name);	/* in: table name in the
 				databasename/tablename format */
 /***************************************************************************
 Loads a table object based on the table id. */
 UNIV_INTERN
 dict_table_t*
 dict_load_table_on_id(
 /*==================*/
 				/* out: table; NULL if table does not exist */
 	dulint	table_id);	/* in: table id */
 /************************************************************************
 This function is called when the database is booted.
 Loads system table index definitions except for the clustered index which
 is added to the dictionary cache at booting before calling this function. */
 UNIV_INTERN
 void
 dict_load_sys_table(
 /*================*/
 	dict_table_t*	table);	/* in: system table */
 #ifndef UNIV_HOTBACKUP
 /***************************************************************************
 Loads foreign key constraints where the table is either the foreign key
 holder or where the table is referenced by a foreign key. Adds these
 constraints to the data dictionary. Note that we know that the dictionary
 cache already contains all constraints where the other relevant table is
 already in the dictionary cache. */
 UNIV_INTERN
 ulint
 dict_load_foreigns(
 /*===============*/
 					/* out: DB_SUCCESS or error code */
 	const char*	table_name,	/* in: table name */
 	ibool		check_charsets);/* in: TRUE=check charsets
 					compatibility */
 #endif /* !UNIV_HOTBACKUP */
 /************************************************************************
 Prints to the standard output information on all tables found in the data
 dictionary system table. */
 UNIV_INTERN
 void
 dict_print(void);
 /*============*/
 #ifndef UNIV_NONINL
 #include "dict0load.ic"
 #endif
 #endif
--- a/include/dict0load.ic
+++ b/include/dict0load.ic
@@ -0,0 +1,9 @@
 /******************************************************
 Loads to the memory cache database object definitions
 from dictionary tables
 (c) 1996 Innobase Oy
 Created 4/24/1996 Heikki Tuuri
 *******************************************************/
--- a/include/dict0mem.h
+++ b/include/dict0mem.h
@@ -0,0 +1,475 @@
 /******************************************************
 Data dictionary memory object creation
 (c) 1996 Innobase Oy
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
 #ifndef dict0mem_h
 #define dict0mem_h
 #include "univ.i"
 #include "dict0types.h"
 #include "data0type.h"
 #include "data0data.h"
 #include "mem0mem.h"
 #include "rem0types.h"
 #include "btr0types.h"
 #include "ut0mem.h"
 #include "ut0lst.h"
 #include "ut0rnd.h"
 #include "ut0byte.h"
 #include "sync0rw.h"
 #include "lock0types.h"
 #include "hash0hash.h"
 #include "que0types.h"
 /* Type flags of an index: OR'ing of the flags is allowed to define a
 combination of types */
 #define DICT_CLUSTERED	1	/* clustered index */
 #define DICT_UNIQUE	2	/* unique index */
 #define	DICT_UNIVERSAL	4	/* index which can contain records from any
 				other index */
 #define	DICT_IBUF 	8	/* insert buffer tree */
 /* Types for a table object */
 #define DICT_TABLE_ORDINARY		1
 #if 0 /* not implemented */
 #define	DICT_TABLE_CLUSTER_MEMBER	2
 #define	DICT_TABLE_CLUSTER		3 /* this means that the table is
 					  really a cluster definition */
 #endif
 /* Table flags.  All unused bits must be 0. */
 #define DICT_TF_COMPACT			1	/* Compact page format.
 						This must be set for
 						new file formats
 						(later than
 						DICT_TF_FORMAT_51). */
 /* compressed page size (0=uncompressed, up to 15 compressed sizes) */
 #define DICT_TF_ZSSIZE_SHIFT		1
 #define DICT_TF_ZSSIZE_MASK		(15 << DICT_TF_ZSSIZE_SHIFT)
 #define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1)
 #define DICT_TF_FORMAT_SHIFT		5	/* file format */
 #define DICT_TF_FORMAT_MASK		(127 << DICT_TF_FORMAT_SHIFT)
 #define DICT_TF_FORMAT_51		0	/* InnoDB/MySQL up to 5.1 */
 #define DICT_TF_FORMAT_ZIP		1	/* InnoDB plugin for 5.1:
 						compressed tables,
 						new BLOB treatment */
 #define DICT_TF_FORMAT_MAX		DICT_TF_FORMAT_ZIP
 #define DICT_TF_BITS			6	/* number of flag bits */
 #if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
 # error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
 #endif
 /**************************************************************************
 Creates a table memory object. */
 UNIV_INTERN
 dict_table_t*
 dict_mem_table_create(
 /*==================*/
 					/* out, own: table object */
 	const char*	name,		/* in: table name */
 	ulint		space,		/* in: space where the clustered index
 					of the table is placed; this parameter
 					is ignored if the table is made
 					a member of a cluster */
 	ulint		n_cols,		/* in: number of columns */
 	ulint		flags);		/* in: table flags */
 /********************************************************************
 Free a table memory object. */
 UNIV_INTERN
 void
 dict_mem_table_free(
 /*================*/
 	dict_table_t*	table);		/* in: table */
 /**************************************************************************
 Adds a column definition to a table. */
 UNIV_INTERN
 void
 dict_mem_table_add_col(
 /*===================*/
 	dict_table_t*	table,	/* in: table */
 	mem_heap_t*	heap,	/* in: temporary memory heap, or NULL */
 	const char*	name,	/* in: column name, or NULL */
 	ulint		mtype,	/* in: main datatype */
 	ulint		prtype,	/* in: precise type */
 	ulint		len);	/* in: precision */
 /**************************************************************************
 Creates an index memory object. */
 UNIV_INTERN
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
 					/* out, own: index object */
 	const char*	table_name,	/* in: table name */
 	const char*	index_name,	/* in: index name */
 	ulint		space,		/* in: space where the index tree is
 					placed, ignored if the index is of
 					the clustered type */
 	ulint		type,		/* in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields);	/* in: number of fields */
 /**************************************************************************
 Adds a field definition to an index. NOTE: does not take a copy
 of the column name if the field is a column. The memory occupied
 by the column name may be released only after publishing the index. */
 UNIV_INTERN
 void
 dict_mem_index_add_field(
 /*=====================*/
 	dict_index_t*	index,		/* in: index */
 	const char*	name,		/* in: column name */
 	ulint		prefix_len);	/* in: 0 or the column prefix length
 					in a MySQL index like
 					INDEX (textcol(25)) */
 /**************************************************************************
 Frees an index memory object. */
 UNIV_INTERN
 void
 dict_mem_index_free(
 /*================*/
 	dict_index_t*	index);	/* in: index */
 /**************************************************************************
 Creates and initializes a foreign constraint memory object. */
 UNIV_INTERN
 dict_foreign_t*
 dict_mem_foreign_create(void);
 /*=========================*/
 				/* out, own: foreign constraint struct */
 /* Data structure for a column in a table */
 struct dict_col_struct{
 	/*----------------------*/
 	/* The following are copied from dtype_t,
 	so that all bit-fields can be packed tightly. */
 	unsigned	mtype:8;	/* main data type */
 	unsigned	prtype:24;	/* precise type; MySQL data
 					type, charset code, flags to
 					indicate nullability,
 					signedness, whether this is a
 					binary string, whether this is
 					a true VARCHAR where MySQL
 					uses 2 bytes to store the length */
 	/* the remaining fields do not affect alphabetical ordering: */
 	unsigned	len:16;		/* length; for MySQL data this
 					is field->pack_length(),
 					except that for a >= 5.0.3
 					type true VARCHAR this is the
 					maximum byte length of the
 					string data (in addition to
 					the string, MySQL uses 1 or 2
 					bytes to store the string length) */
 	unsigned	mbminlen:2;	/* minimum length of a
 					character, in bytes */
 	unsigned	mbmaxlen:3;	/* maximum length of a
 					character, in bytes */
 	/*----------------------*/
 	/* End of definitions copied from dtype_t */
 	unsigned	ind:10;		/* table column position
 					(starting from 0) */
 	unsigned	ord_part:1;	/* nonzero if this column
 					appears in the ordering fields
 					of an index */
 };
 /* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
 indexed column length (or indexed prefix length). It is set to 3*256,
 so that one can create a column prefix index on 256 characters of a
 TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
 a character may take at most 3 bytes.
 This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
 files would be at risk! */
 #define DICT_MAX_INDEX_COL_LEN		REC_MAX_INDEX_COL_LEN
 /* Data structure for a field in an index */
 struct dict_field_struct{
 	dict_col_t*	col;		/* pointer to the table column */
 	const char*	name;		/* name of the column */
 	unsigned	prefix_len:10;	/* 0 or the length of the column
 					prefix in bytes in a MySQL index of
 					type, e.g., INDEX (textcol(25));
 					must be smaller than
 					DICT_MAX_INDEX_COL_LEN; NOTE that
 					in the UTF-8 charset, MySQL sets this
 					to 3 * the prefix len in UTF-8 chars */
 	unsigned	fixed_len:10;	/* 0 or the fixed length of the
 					column if smaller than
 					DICT_MAX_INDEX_COL_LEN */
 };
 /* Data structure for an index.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_index_create(). */
 struct dict_index_struct{
 	dulint		id;	/* id of the index */
 	mem_heap_t*	heap;	/* memory heap */
 	const char*	name;	/* index name */
 	const char*	table_name; /* table name */
 	dict_table_t*	table;	/* back pointer to table */
 	unsigned	space:32;
 				/* space where the index tree is placed */
 	unsigned	page:32;/* index tree root page number */
 	unsigned	type:4;	/* index type (DICT_CLUSTERED, DICT_UNIQUE,
 				DICT_UNIVERSAL, DICT_IBUF) */
 	unsigned	trx_id_offset:10;/* position of the the trx id column
 				in a clustered index record, if the fields
 				before it are known to be of a fixed size,
 				0 otherwise */
 	unsigned	n_user_defined_cols:10;
 				/* number of columns the user defined to
 				be in the index: in the internal
 				representation we add more columns */
 	unsigned	n_uniq:10;/* number of fields from the beginning
 				which are enough to determine an index
 				entry uniquely */
 	unsigned	n_def:10;/* number of fields defined so far */
 	unsigned	n_fields:10;/* number of fields in the index */
 	unsigned	n_nullable:10;/* number of nullable fields */
 	unsigned	cached:1;/* TRUE if the index object is in the
 				dictionary cache */
 	unsigned	to_be_dropped:1;
 				/* TRUE if this index is marked to be
 				dropped in ha_innobase::prepare_drop_index(),
 				otherwise FALSE */
 	dict_field_t*	fields;	/* array of field descriptions */
 	UT_LIST_NODE_T(dict_index_t)
 			indexes;/* list of indexes of the table */
 	btr_search_t*	search_info; /* info used in optimistic searches */
 	/*----------------------*/
 	ib_int64_t*	stat_n_diff_key_vals;
 				/* approximate number of different key values
 				for this index, for each n-column prefix
 				where n <= dict_get_n_unique(index); we
 				periodically calculate new estimates */
 	ulint		stat_index_size;
 				/* approximate index size in database pages */
 	ulint		stat_n_leaf_pages;
 				/* approximate number of leaf pages in the
 				index tree */
 	rw_lock_t	lock;	/* read-write lock protecting the upper levels
 				of the index tree */
 #ifdef ROW_MERGE_IS_INDEX_USABLE
 	dulint		trx_id; /* id of the transaction that created this
 				index, or ut_dulint_zero if the index existed
 				when InnoDB was started up */
 #endif /* ROW_MERGE_IS_INDEX_USABLE */
 #ifdef UNIV_DEBUG
 	ulint		magic_n;/* magic number */
 # define DICT_INDEX_MAGIC_N	76789786
 #endif
 };
 /* Data structure for a foreign key constraint; an example:
 FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D).  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
 struct dict_foreign_struct{
 	mem_heap_t*	heap;		/* this object is allocated from
 					this memory heap */
 	char*		id;		/* id of the constraint as a
 					null-terminated string */
 	unsigned	n_fields:10;	/* number of indexes' first fields
 					for which the the foreign key
 					constraint is defined: we allow the
 					indexes to contain more fields than
 					mentioned in the constraint, as long
 					as the first fields are as mentioned */
 	unsigned	type:6;		/* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
 					or DICT_FOREIGN_ON_DELETE_SET_NULL */
 	char*		foreign_table_name;/* foreign table name */
 	dict_table_t*	foreign_table;	/* table where the foreign key is */
 	const char**	foreign_col_names;/* names of the columns in the
 					foreign key */
 	char*		referenced_table_name;/* referenced table name */
 	dict_table_t*	referenced_table;/* table where the referenced key
 					is */
 	const char**	referenced_col_names;/* names of the referenced
 					columns in the referenced table */
 	dict_index_t*	foreign_index;	/* foreign index; we require that
 					both tables contain explicitly defined
 					indexes for the constraint: InnoDB
 					does not generate new indexes
 					implicitly */
 	dict_index_t*	referenced_index;/* referenced index */
 	UT_LIST_NODE_T(dict_foreign_t)
 			foreign_list;	/* list node for foreign keys of the
 					table */
 	UT_LIST_NODE_T(dict_foreign_t)
 			referenced_list;/* list node for referenced keys of the
 					table */
 };
 /* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
 a foreign key constraint is enforced, therefore RESTRICT just means no flag */
 #define DICT_FOREIGN_ON_DELETE_CASCADE	1
 #define DICT_FOREIGN_ON_DELETE_SET_NULL	2
 #define DICT_FOREIGN_ON_UPDATE_CASCADE	4
 #define DICT_FOREIGN_ON_UPDATE_SET_NULL	8
 #define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
 #define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
 /* Data structure for a database table.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_table_create(). */
 struct dict_table_struct{
 	dulint		id;	/* id of the table */
 	mem_heap_t*	heap;	/* memory heap */
 	const char*	name;	/* table name */
 	const char*	dir_path_of_temp_table;/* NULL or the directory path
 				where a TEMPORARY table that was explicitly
 				created by a user should be placed if
 				innodb_file_per_table is defined in my.cnf;
 				in Unix this is usually /tmp/..., in Windows
 				\temp\... */
 	unsigned	space:32;
 				/* space where the clustered index of the
 				table is placed */
 	unsigned	flags:DICT_TF_BITS;/* DICT_TF_COMPACT, ... */
 	unsigned	ibd_file_missing:1;
 				/* TRUE if this is in a single-table
 				tablespace and the .ibd file is missing; then
 				we must return in ha_innodb.cc an error if the
 				user tries to query such an orphaned table */
 	unsigned	tablespace_discarded:1;
 				/* this flag is set TRUE when the user
 				calls DISCARD TABLESPACE on this
 				table, and reset to FALSE in IMPORT
 				TABLESPACE */
 	unsigned	cached:1;/* TRUE if the table object has been added
 				to the dictionary cache */
 	unsigned	n_def:10;/* number of columns defined so far */
 	unsigned	n_cols:10;/* number of columns */
 	dict_col_t*	cols;	/* array of column descriptions */
 	const char*	col_names;
 				/* Column names packed in a character string
 				"name1\0name2\0...nameN\0".  Until
 				the string contains n_cols, it will be
 				allocated from a temporary heap.  The final
 				string will be allocated from table->heap. */
 	hash_node_t	name_hash; /* hash chain node */
 	hash_node_t	id_hash; /* hash chain node */
 	UT_LIST_BASE_NODE_T(dict_index_t)
 			indexes; /* list of indexes of the table */
 	UT_LIST_BASE_NODE_T(dict_foreign_t)
 			foreign_list;/* list of foreign key constraints
 				in the table; these refer to columns
 				in other tables */
 	UT_LIST_BASE_NODE_T(dict_foreign_t)
 			referenced_list;/* list of foreign key constraints
 				which refer to this table */
 	UT_LIST_NODE_T(dict_table_t)
 			table_LRU; /* node of the LRU list of tables */
 	ulint		n_mysql_handles_opened;
 				/* count of how many handles MySQL has opened
 				to this table; dropping of the table is
 				NOT allowed until this count gets to zero;
 				MySQL does NOT itself check the number of
 				open handles at drop */
 	ulint		n_foreign_key_checks_running;
 				/* count of how many foreign key check
 				operations are currently being performed
 				on the table: we cannot drop the table while
 				there are foreign key checks running on
 				it! */
 	lock_t*		auto_inc_lock;/* a buffer for an auto-inc lock
 				for this table: we allocate the memory here
 				so that individual transactions can get it
 				and release it without a need to allocate
 				space from the lock heap of the trx:
 				otherwise the lock heap would grow rapidly
 				if we do a large insert from a select */
 	dulint		query_cache_inv_trx_id;
 				/* transactions whose trx id < than this
 				number are not allowed to store to the MySQL
 				query cache or retrieve from it; when a trx
 				with undo logs commits, it sets this to the
 				value of the trx id counter for the tables it
 				had an IX lock on */
 	UT_LIST_BASE_NODE_T(lock_t)
 			locks; /* list of locks on the table */
 #ifdef UNIV_DEBUG
 	/*----------------------*/
 	ibool		does_not_fit_in_memory;
 				/* this field is used to specify in simulations
 				tables which are so big that disk should be
 				accessed: disk access is simulated by
 				putting the thread to sleep for a while;
 				NOTE that this flag is not stored to the data
 				dictionary on disk, and the database will
 				forget about value TRUE if it has to reload
 				the table definition from disk */
 #endif /* UNIV_DEBUG */
 	/*----------------------*/
 	unsigned	big_rows:1;
 				/* flag: TRUE if the maximum length of
 				a single row exceeds BIG_ROW_SIZE;
 				initialized in dict_table_add_to_cache() */
 	unsigned	stat_initialized:1; /* TRUE if statistics have
 				been calculated the first time
 				after database startup or table creation */
 	ib_int64_t	stat_n_rows;
 				/* approximate number of rows in the table;
 				we periodically calculate new estimates */
 	ulint		stat_clustered_index_size;
 				/* approximate clustered index size in
 				database pages */
 	ulint		stat_sum_of_other_index_sizes;
 				/* other indexes in database pages */
 	ulint		stat_modified_counter;
 				/* when a row is inserted, updated, or deleted,
 				we add 1 to this number; we calculate new
 				estimates for the stat_... values for the
 				table and the indexes at an interval of 2 GB
 				or when about 1 / 16 of table has been
 				modified; also when the estimate operation is
 				called for MySQL SHOW TABLE STATUS; the
 				counter is reset to zero at statistics
 				calculation; this counter is not protected by
 				any latch, because this is only used for
 				heuristics */
 	/*----------------------*/
 	mutex_t		autoinc_mutex;
 				/* mutex protecting the autoincrement
 				counter */
 	ibool		autoinc_inited;
 				/* TRUE if the autoinc counter has been
 				inited; MySQL gets the init value by executing
 				SELECT MAX(auto inc column) */
 	ib_uint64_t	autoinc;/* autoinc counter value to give to the
 				next inserted row */
 	ib_int64_t	autoinc_increment;
 				/* The increment step of the auto increment
 				column. Value must be greater than or equal
 				to 1 */
 	/*----------------------*/
 	ulong		n_waiting_or_granted_auto_inc_locks;
 				/* This counter is used to track the number
 				of granted and pending autoinc locks on this
 				table. This value is set after acquiring the
 				kernel mutex but we peek the contents to
 				determine whether other transactions have
 				acquired the AUTOINC lock or not. Of course
 				only one transaction can be granted the
 				lock but there can be multiple waiters. */
 #ifdef UNIV_DEBUG
 	ulint		magic_n;/* magic number */
 # define DICT_TABLE_MAGIC_N	76333786
 #endif /* UNIV_DEBUG */
 };
 #ifndef UNIV_NONINL
 #include "dict0mem.ic"
 #endif
 #endif
--- a/include/dict0mem.ic
+++ b/include/dict0mem.ic
@@ -0,0 +1,9 @@
 /**********************************************************************
 Data dictionary memory object creation
 (c) 1996 Innobase Oy
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
--- a/include/dict0types.h
+++ b/include/dict0types.h
@@ -0,0 +1,29 @@
 /******************************************************
 Data dictionary global types
 (c) 1996 Innobase Oy
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
 #ifndef dict0types_h
 #define dict0types_h
 #include "ut0list.h"
 typedef struct dict_sys_struct		dict_sys_t;
 typedef struct dict_col_struct		dict_col_t;
 typedef struct dict_field_struct	dict_field_t;
 typedef struct dict_index_struct	dict_index_t;
 typedef struct dict_table_struct	dict_table_t;
 typedef struct dict_foreign_struct	dict_foreign_t;
 /* A cluster object is a table object with the type field set to
 DICT_CLUSTERED */
 typedef dict_table_t			dict_cluster_t;
 typedef struct ind_node_struct		ind_node_t;
 typedef struct tab_node_struct		tab_node_t;
 #endif
--- a/include/dyn0dyn.h
+++ b/include/dyn0dyn.h
@@ -0,0 +1,166 @@
 /******************************************************
 The dynamically allocated array
 (c) 1996 Innobase Oy
 Created 2/5/1996 Heikki Tuuri
 *******************************************************/
 #ifndef dyn0dyn_h
 #define dyn0dyn_h
 #include "univ.i"
 #include "ut0lst.h"
 #include "mem0mem.h"
 typedef struct dyn_block_struct		dyn_block_t;
 typedef dyn_block_t			dyn_array_t;
 /* This is the initial 'payload' size of a dynamic array;
 this must be > MLOG_BUF_MARGIN + 30! */
 #define	DYN_ARRAY_DATA_SIZE	512
 /*************************************************************************
 Initializes a dynamic array. */
 UNIV_INLINE
 dyn_array_t*
 dyn_array_create(
 /*=============*/
 				/* out: initialized dyn array */
 	dyn_array_t*	arr);	/* in: pointer to a memory buffer of
 				size sizeof(dyn_array_t) */
 /****************************************************************
 Frees a dynamic array. */
 UNIV_INLINE
 void
 dyn_array_free(
 /*===========*/
 	dyn_array_t*	arr);	/* in: dyn array */
 /*************************************************************************
 Makes room on top of a dyn array and returns a pointer to a buffer in it.
 After copying the elements, the caller must close the buffer using
 dyn_array_close. */
 UNIV_INLINE
 byte*
 dyn_array_open(
 /*===========*/
 				/* out: pointer to the buffer */
 	dyn_array_t*	arr,	/* in: dynamic array */
 	ulint		size);	/* in: size in bytes of the buffer; MUST be
 				smaller than DYN_ARRAY_DATA_SIZE! */
 /*************************************************************************
 Closes the buffer returned by dyn_array_open. */
 UNIV_INLINE
 void
 dyn_array_close(
 /*============*/
 	dyn_array_t*	arr,	/* in: dynamic array */
 	byte*		ptr);	/* in: buffer space from ptr up was not used */
 /*************************************************************************
 Makes room on top of a dyn array and returns a pointer to
 the added element. The caller must copy the element to
 the pointer returned. */
 UNIV_INLINE
 void*
 dyn_array_push(
 /*===========*/
 				/* out: pointer to the element */
 	dyn_array_t*	arr,	/* in: dynamic array */
 	ulint		size);	/* in: size in bytes of the element */
 /****************************************************************
 Returns pointer to an element in dyn array. */
 UNIV_INLINE
 void*
 dyn_array_get_element(
 /*==================*/
 				/* out: pointer to element */
 	dyn_array_t*	arr,	/* in: dyn array */
 	ulint		pos);	/* in: position of element as bytes
 				from array start */
 /****************************************************************
 Returns the size of stored data in a dyn array. */
 UNIV_INLINE
 ulint
 dyn_array_get_data_size(
 /*====================*/
 				/* out: data size in bytes */
 	dyn_array_t*	arr);	/* in: dyn array */
 /****************************************************************
 Gets the first block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_first_block(
 /*======================*/
 	dyn_array_t*	arr);	/* in: dyn array */
 /****************************************************************
 Gets the last block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_last_block(
 /*=====================*/
 	dyn_array_t*	arr);	/* in: dyn array */
 /************************************************************************
 Gets the next block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_next_block(
 /*=====================*/
 				/* out: pointer to next, NULL if end of list */
 	dyn_array_t*	arr,	/* in: dyn array */
 	dyn_block_t*	block);	/* in: dyn array block */
 /************************************************************************
 Gets the number of used bytes in a dyn array block. */
 UNIV_INLINE
 ulint
 dyn_block_get_used(
 /*===============*/
 				/* out: number of bytes used */
 	dyn_block_t*	block);	/* in: dyn array block */
 /************************************************************************
 Gets pointer to the start of data in a dyn array block. */
 UNIV_INLINE
 byte*
 dyn_block_get_data(
 /*===============*/
 				/* out: pointer to data */
 	dyn_block_t*	block);	/* in: dyn array block */
 /************************************************************
 Pushes n bytes to a dyn array. */
 UNIV_INLINE
 void
 dyn_push_string(
 /*============*/
 	dyn_array_t*	arr,	/* in: dyn array */
 	const byte*	str,	/* in: string to write */
 	ulint		len);	/* in: string length */
 /*#################################################################*/
 /* NOTE! Do not use the fields of the struct directly: the definition
 appears here only for the compiler to know its size! */
 struct dyn_block_struct{
 	mem_heap_t*	heap;	/* in the first block this is != NULL
 				if dynamic allocation has been needed */
 	ulint		used;	/* number of data bytes used in this block */
 	byte		data[DYN_ARRAY_DATA_SIZE];
 				/* storage for array elements */
 	UT_LIST_BASE_NODE_T(dyn_block_t) base;
 				/* linear list of dyn blocks: this node is
 				used only in the first block */
 	UT_LIST_NODE_T(dyn_block_t) list;
 				/* linear list node: used in all blocks */
 #ifdef UNIV_DEBUG
 	ulint		buf_end;/* only in the debug version: if dyn array is
 				opened, this is the buffer end offset, else
 				this is 0 */
 	ulint		magic_n;
 #endif
 };
 #ifndef UNIV_NONINL
 #include "dyn0dyn.ic"
 #endif
 #endif
--- a/include/dyn0dyn.ic
+++ b/include/dyn0dyn.ic
@@ -0,0 +1,346 @@
 /******************************************************
 The dynamically allocated array
 (c) 1996 Innobase Oy
 Created 2/5/1996 Heikki Tuuri
 *******************************************************/
 #define DYN_BLOCK_MAGIC_N	375767
 #define DYN_BLOCK_FULL_FLAG	0x1000000UL
 /****************************************************************
 Adds a new block to a dyn array. */
 UNIV_INTERN
 dyn_block_t*
 dyn_array_add_block(
 /*================*/
 				/* out: created block */
 	dyn_array_t*	arr);	/* in: dyn array */
 /****************************************************************
 Gets the first block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_first_block(
 /*======================*/
 	dyn_array_t*	arr)	/* in: dyn array */
 {
 	return(arr);
 }
 /****************************************************************
 Gets the last block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_last_block(
 /*=====================*/
 	dyn_array_t*	arr)	/* in: dyn array */
 {
 	if (arr->heap == NULL) {
 		return(arr);
 	}
 	return(UT_LIST_GET_LAST(arr->base));
 }
 /************************************************************************
 Gets the next block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_next_block(
 /*=====================*/
 				/* out: pointer to next, NULL if end of list */
 	dyn_array_t*	arr,	/* in: dyn array */
 	dyn_block_t*	block)	/* in: dyn array block */
 {
 	ut_ad(arr && block);
 	if (arr->heap == NULL) {
 		ut_ad(arr == block);
 		return(NULL);
 	}
 	return(UT_LIST_GET_NEXT(list, block));
 }
 /************************************************************************
 Gets the number of used bytes in a dyn array block. */
 UNIV_INLINE
 ulint
 dyn_block_get_used(
 /*===============*/
 				/* out: number of bytes used */
 	dyn_block_t*	block)	/* in: dyn array block */
 {
 	ut_ad(block);
 	return((block->used) & ~DYN_BLOCK_FULL_FLAG);
 }
 /************************************************************************
 Gets pointer to the start of data in a dyn array block. */
 UNIV_INLINE
 byte*
 dyn_block_get_data(
 /*===============*/
 				/* out: pointer to data */
 	dyn_block_t*	block)	/* in: dyn array block */
 {
 	ut_ad(block);
 	return(block->data);
 }
 /*************************************************************************
 Initializes a dynamic array. */
 UNIV_INLINE
 dyn_array_t*
 dyn_array_create(
 /*=============*/
 				/* out: initialized dyn array */
 	dyn_array_t*	arr)	/* in: pointer to a memory buffer of
 				size sizeof(dyn_array_t) */
 {
 	ut_ad(arr);
 #if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
 # error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
 #endif
 	arr->heap = NULL;
 	arr->used = 0;
 #ifdef UNIV_DEBUG
 	arr->buf_end = 0;
 	arr->magic_n = DYN_BLOCK_MAGIC_N;
 #endif
 	return(arr);
 }
 /****************************************************************
 Frees a dynamic array. */
 UNIV_INLINE
 void
 dyn_array_free(
 /*===========*/
 	dyn_array_t*	arr)	/* in: dyn array */
 {
 	if (arr->heap != NULL) {
 		mem_heap_free(arr->heap);
 	}
 #ifdef UNIV_DEBUG
 	arr->magic_n = 0;
 #endif
 }
 /*************************************************************************
 Makes room on top of a dyn array and returns a pointer to the added element.
 The caller must copy the element to the pointer returned. */
 UNIV_INLINE
 void*
 dyn_array_push(
 /*===========*/
 				/* out: pointer to the element */
 	dyn_array_t*	arr,	/* in: dynamic array */
 	ulint		size)	/* in: size in bytes of the element */
 {
 	dyn_block_t*	block;
 	ulint		used;
 	ut_ad(arr);
 	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
 	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
 	ut_ad(size);
 	block = arr;
 	used = block->used;
 	if (used + size > DYN_ARRAY_DATA_SIZE) {
 		/* Get the last array block */
 		block = dyn_array_get_last_block(arr);
 		used = block->used;
 		if (used + size > DYN_ARRAY_DATA_SIZE) {
 			block = dyn_array_add_block(arr);
 			used = block->used;
 		}
 	}
 	block->used = used + size;
 	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
 	return((block->data) + used);
 }
 /*************************************************************************
 Makes room on top of a dyn array and returns a pointer to a buffer in it.
 After copying the elements, the caller must close the buffer using
 dyn_array_close. */
 UNIV_INLINE
 byte*
 dyn_array_open(
 /*===========*/
 				/* out: pointer to the buffer */
 	dyn_array_t*	arr,	/* in: dynamic array */
 	ulint		size)	/* in: size in bytes of the buffer; MUST be
 				smaller than DYN_ARRAY_DATA_SIZE! */
 {
 	dyn_block_t*	block;
 	ulint		used;
 	ut_ad(arr);
 	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
 	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
 	ut_ad(size);
 	block = arr;
 	used = block->used;
 	if (used + size > DYN_ARRAY_DATA_SIZE) {
 		/* Get the last array block */
 		block = dyn_array_get_last_block(arr);
 		used = block->used;
 		if (used + size > DYN_ARRAY_DATA_SIZE) {
 			block = dyn_array_add_block(arr);
 			used = block->used;
 			ut_a(size <= DYN_ARRAY_DATA_SIZE);
 		}
 	}
 	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
 #ifdef UNIV_DEBUG
 	ut_ad(arr->buf_end == 0);
 	arr->buf_end = used + size;
 #endif
 	return((block->data) + used);
 }
 /*************************************************************************
 Closes the buffer returned by dyn_array_open. */
 UNIV_INLINE
 void
 dyn_array_close(
 /*============*/
 	dyn_array_t*	arr,	/* in: dynamic array */
 	byte*		ptr)	/* in: buffer space from ptr up was not used */
 {
 	dyn_block_t*	block;
 	ut_ad(arr);
 	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
 	block = dyn_array_get_last_block(arr);
 	ut_ad(arr->buf_end + block->data >= ptr);
 	block->used = ptr - block->data;
 	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
 #ifdef UNIV_DEBUG
 	arr->buf_end = 0;
 #endif
 }
 /****************************************************************
 Returns pointer to an element in dyn array. */
 UNIV_INLINE
 void*
 dyn_array_get_element(
 /*==================*/
 				/* out: pointer to element */
 	dyn_array_t*	arr,	/* in: dyn array */
 	ulint		pos)	/* in: position of element as bytes
 				from array start */
 {
 	dyn_block_t*	block;
 	ulint		used;
 	ut_ad(arr);
 	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
 	/* Get the first array block */
 	block = dyn_array_get_first_block(arr);
 	if (arr->heap != NULL) {
 		used = dyn_block_get_used(block);
 		while (pos >= used) {
 			pos -= used;
 			block = UT_LIST_GET_NEXT(list, block);
 			ut_ad(block);
 			used = dyn_block_get_used(block);
 		}
 	}
 	ut_ad(block);
 	ut_ad(dyn_block_get_used(block) >= pos);
 	return(block->data + pos);
 }
 /****************************************************************
 Returns the size of stored data in a dyn array. */
 UNIV_INLINE
 ulint
 dyn_array_get_data_size(
 /*====================*/
 				/* out: data size in bytes */
 	dyn_array_t*	arr)	/* in: dyn array */
 {
 	dyn_block_t*	block;
 	ulint		sum	= 0;
 	ut_ad(arr);
 	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
 	if (arr->heap == NULL) {
 		return(arr->used);
 	}
 	/* Get the first array block */
 	block = dyn_array_get_first_block(arr);
 	while (block != NULL) {
 		sum += dyn_block_get_used(block);
 		block = dyn_array_get_next_block(arr, block);
 	}
 	return(sum);
 }
 /************************************************************
 Pushes n bytes to a dyn array. */
 UNIV_INLINE
 void
 dyn_push_string(
 /*============*/
 	dyn_array_t*	arr,	/* in: dyn array */
 	const byte*	str,	/* in: string to write */
 	ulint		len)	/* in: string length */
 {
 	ulint	n_copied;
 	while (len > 0) {
 		if (len > DYN_ARRAY_DATA_SIZE) {
 			n_copied = DYN_ARRAY_DATA_SIZE;
 		} else {
 			n_copied = len;
 		}
 		memcpy(dyn_array_push(arr, n_copied), str, n_copied);
 		str += n_copied;
 		len -= n_copied;
 	}
 }
--- a/include/eval0eval.h
+++ b/include/eval0eval.h
@@ -0,0 +1,97 @@
 /******************************************************
 SQL evaluator: evaluates simple data structures, like expressions, in
 a query graph
 (c) 1997 Innobase Oy
 Created 12/29/1997 Heikki Tuuri
 *******************************************************/
 #ifndef eval0eval_h
 #define eval0eval_h
 #include "univ.i"
 #include "que0types.h"
 #include "pars0sym.h"
 #include "pars0pars.h"
 /*********************************************************************
 Free the buffer from global dynamic memory for a value of a que_node,
 if it has been allocated in the above function. The freeing for pushed
 column values is done in sel_col_prefetch_buf_free. */
 UNIV_INTERN
 void
 eval_node_free_val_buf(
 /*===================*/
 	que_node_t*	node);	/* in: query graph node */
 /*********************************************************************
 Evaluates a symbol table symbol. */
 UNIV_INLINE
 void
 eval_sym(
 /*=====*/
 	sym_node_t*	sym_node);	/* in: symbol table node */
 /*********************************************************************
 Evaluates an expression. */
 UNIV_INLINE
 void
 eval_exp(
 /*=====*/
 	que_node_t*	exp_node);	/* in: expression */
 /*********************************************************************
 Sets an integer value as the value of an expression node. */
 UNIV_INLINE
 void
 eval_node_set_int_val(
 /*==================*/
 	que_node_t*	node,	/* in: expression node */
 	lint		val);	/* in: value to set */
 /*********************************************************************
 Gets an integer value from an expression node. */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
 /*==================*/
 				/* out: integer value */
 	que_node_t*	node);	/* in: expression node */
 /*********************************************************************
 Copies a binary string value as the value of a query graph node. Allocates a
 new buffer if necessary. */
 UNIV_INLINE
 void
 eval_node_copy_and_alloc_val(
 /*=========================*/
 	que_node_t*	node,	/* in: query graph node */
 	const byte*	str,	/* in: binary string */
 	ulint		len);	/* in: string length or UNIV_SQL_NULL */
 /*********************************************************************
 Copies a query node value to another node. */
 UNIV_INLINE
 void
 eval_node_copy_val(
 /*===============*/
 	que_node_t*	node1,	/* in: node to copy to */
 	que_node_t*	node2);	/* in: node to copy from */
 /*********************************************************************
 Gets a iboolean value from a query node. */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
 /*====================*/
 				/* out: iboolean value */
 	que_node_t*	node);	/* in: query graph node */
 /*********************************************************************
 Evaluates a comparison node. */
 UNIV_INTERN
 ibool
 eval_cmp(
 /*=====*/
 					/* out: the result of the comparison */
 	func_node_t*	cmp_node);	/* in: comparison node */
 #ifndef UNIV_NONINL
 #include "eval0eval.ic"
 #endif
 #endif
--- a/include/eval0eval.ic
+++ b/include/eval0eval.ic
@@ -0,0 +1,234 @@
 /******************************************************
 SQL evaluator: evaluates simple data structures, like expressions, in
 a query graph
 (c) 1997 Innobase Oy
 Created 12/29/1997 Heikki Tuuri
 *******************************************************/
 #include "que0que.h"
 #include "rem0cmp.h"
 #include "pars0grm.h"
 /*********************************************************************
 Evaluates a function node. */
 UNIV_INTERN
 void
 eval_func(
 /*======*/
 	func_node_t*	func_node);	/* in: function node */
 /*********************************************************************
 Allocate a buffer from global dynamic memory for a value of a que_node.
 NOTE that this memory must be explicitly freed when the query graph is
 freed. If the node already has allocated buffer, that buffer is freed
 here. NOTE that this is the only function where dynamic memory should be
 allocated for a query node val field. */
 UNIV_INTERN
 byte*
 eval_node_alloc_val_buf(
 /*====================*/
 				/* out: pointer to allocated buffer */
 	que_node_t*	node,	/* in: query graph node; sets the val field
 				data field to point to the new buffer, and
 				len field equal to size */
 	ulint		size);	/* in: buffer size */
 /*********************************************************************
 Allocates a new buffer if needed. */
 UNIV_INLINE
 byte*
 eval_node_ensure_val_buf(
 /*=====================*/
 				/* out: pointer to buffer */
 	que_node_t*	node,	/* in: query graph node; sets the val field
 				data field to point to the new buffer, and
 				len field equal to size */
 	ulint		size)	/* in: buffer size */
 {
 	dfield_t*	dfield;
 	byte*		data;
 	dfield = que_node_get_val(node);
 	dfield_set_len(dfield, size);
 	data = dfield_get_data(dfield);
 	if (!data || que_node_get_val_buf_size(node) < size) {
 		data = eval_node_alloc_val_buf(node, size);
 	}
 	return(data);
 }
 /*********************************************************************
 Evaluates a symbol table symbol. */
 UNIV_INLINE
 void
 eval_sym(
 /*=====*/
 	sym_node_t*	sym_node)	/* in: symbol table node */
 {
 	ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
 	if (sym_node->indirection) {
 		/* The symbol table node is an alias for a variable or a
 		column */
 		dfield_copy_data(que_node_get_val(sym_node),
 				 que_node_get_val(sym_node->indirection));
 	}
 }
 /*********************************************************************
 Evaluates an expression. */
 UNIV_INLINE
 void
 eval_exp(
 /*=====*/
 	que_node_t*	exp_node)	/* in: expression */
 {
 	if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
 		eval_sym((sym_node_t*)exp_node);
 		return;
 	}
 	eval_func(exp_node);
 }
 /*********************************************************************
 Sets an integer value as the value of an expression node. */
 UNIV_INLINE
 void
 eval_node_set_int_val(
 /*==================*/
 	que_node_t*	node,	/* in: expression node */
 	lint		val)	/* in: value to set */
 {
 	dfield_t*	dfield;
 	byte*		data;
 	dfield = que_node_get_val(node);
 	data = dfield_get_data(dfield);
 	if (data == NULL) {
 		data = eval_node_alloc_val_buf(node, 4);
 	}
 	ut_ad(dfield_get_len(dfield) == 4);
 	mach_write_to_4(data, (ulint)val);
 }
 /*********************************************************************
 Gets an integer non-SQL null value from an expression node. */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
 /*==================*/
 				/* out: integer value */
 	que_node_t*	node)	/* in: expression node */
 {
 	dfield_t*	dfield;
 	dfield = que_node_get_val(node);
 	ut_ad(dfield_get_len(dfield) == 4);
 	return((int)mach_read_from_4(dfield_get_data(dfield)));
 }
 /*********************************************************************
 Gets a iboolean value from a query node. */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
 /*====================*/
 				/* out: iboolean value */
 	que_node_t*	node)	/* in: query graph node */
 {
 	dfield_t*	dfield;
 	byte*		data;
 	dfield = que_node_get_val(node);
 	data = dfield_get_data(dfield);
 	ut_ad(data != NULL);
 	return(mach_read_from_1(data));
 }
 /*********************************************************************
 Sets a iboolean value as the value of a function node. */
 UNIV_INLINE
 void
 eval_node_set_ibool_val(
 /*====================*/
 	func_node_t*	func_node,	/* in: function node */
 	ibool		val)		/* in: value to set */
 {
 	dfield_t*	dfield;
 	byte*		data;
 	dfield = que_node_get_val(func_node);
 	data = dfield_get_data(dfield);
 	if (data == NULL) {
 		/* Allocate 1 byte to hold the value */
 		data = eval_node_alloc_val_buf(func_node, 1);
 	}
 	ut_ad(dfield_get_len(dfield) == 1);
 	mach_write_to_1(data, val);
 }
 /*********************************************************************
 Copies a binary string value as the value of a query graph node. Allocates a
 new buffer if necessary. */
 UNIV_INLINE
 void
 eval_node_copy_and_alloc_val(
 /*=========================*/
 	que_node_t*	node,	/* in: query graph node */
 	const byte*	str,	/* in: binary string */
 	ulint		len)	/* in: string length or UNIV_SQL_NULL */
 {
 	byte*		data;
 	if (len == UNIV_SQL_NULL) {
 		dfield_set_len(que_node_get_val(node), len);
 		return;
 	}
 	data = eval_node_ensure_val_buf(node, len);
 	ut_memcpy(data, str, len);
 }
 /*********************************************************************
 Copies a query node value to another node. */
 UNIV_INLINE
 void
 eval_node_copy_val(
 /*===============*/
 	que_node_t*	node1,	/* in: node to copy to */
 	que_node_t*	node2)	/* in: node to copy from */
 {
 	dfield_t*	dfield2;
 	dfield2 = que_node_get_val(node2);
 	eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
 				     dfield_get_len(dfield2));
 }
--- a/include/eval0proc.h
+++ b/include/eval0proc.h
@@ -0,0 +1,87 @@
 /******************************************************
 Executes SQL stored procedures and their control structures
 (c) 1998 Innobase Oy
 Created 1/20/1998 Heikki Tuuri
 *******************************************************/
 #ifndef eval0proc_h
 #define eval0proc_h
 #include "univ.i"
 #include "que0types.h"
 #include "pars0sym.h"
 #include "pars0pars.h"
 /**************************************************************************
 Performs an execution step of a procedure node. */
 UNIV_INLINE
 que_thr_t*
 proc_step(
 /*======*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /**************************************************************************
 Performs an execution step of an if-statement node. */
 UNIV_INTERN
 que_thr_t*
 if_step(
 /*====*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /**************************************************************************
 Performs an execution step of a while-statement node. */
 UNIV_INTERN
 que_thr_t*
 while_step(
 /*=======*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /**************************************************************************
 Performs an execution step of a for-loop node. */
 UNIV_INTERN
 que_thr_t*
 for_step(
 /*=====*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /**************************************************************************
 Performs an execution step of an assignment statement node. */
 UNIV_INTERN
 que_thr_t*
 assign_step(
 /*========*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /**************************************************************************
 Performs an execution step of a procedure call node. */
 UNIV_INLINE
 que_thr_t*
 proc_eval_step(
 /*===========*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /**************************************************************************
 Performs an execution step of an exit statement node. */
 UNIV_INTERN
 que_thr_t*
 exit_step(
 /*======*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 /**************************************************************************
 Performs an execution step of a return-statement node. */
 UNIV_INTERN
 que_thr_t*
 return_step(
 /*========*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr);	/* in: query thread */
 #ifndef UNIV_NONINL
 #include "eval0proc.ic"
 #endif
 #endif
--- a/include/eval0proc.ic
+++ b/include/eval0proc.ic
@@ -0,0 +1,71 @@
 /******************************************************
 Executes SQL stored procedures and their control structures
 (c) 1998 Innobase Oy
 Created 1/20/1998 Heikki Tuuri
 *******************************************************/
 #include "pars0pars.h"
 #include "que0que.h"
 #include "eval0eval.h"
 /**************************************************************************
 Performs an execution step of a procedure node. */
 UNIV_INLINE
 que_thr_t*
 proc_step(
 /*======*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	proc_node_t*	node;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
 	if (thr->prev_node == que_node_get_parent(node)) {
 		/* Start execution from the first statement in the statement
 		list */
 		thr->run_node = node->stat_list;
 	} else {
 		/* Move to the next statement */
 		ut_ad(que_node_get_next(thr->prev_node) == NULL);
 		thr->run_node = NULL;
 	}
 	if (thr->run_node == NULL) {
 		thr->run_node = que_node_get_parent(node);
 	}
 	return(thr);
 }
 /**************************************************************************
 Performs an execution step of a procedure call node. */
 UNIV_INLINE
 que_thr_t*
 proc_eval_step(
 /*===========*/
 				/* out: query thread to run next or NULL */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	func_node_t*	node;
 	ut_ad(thr);
 	node = thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
 	/* Evaluate the procedure */
 	eval_exp(node);
 	thr->run_node = que_node_get_parent(node);
 	return(thr);
 }
--- a/include/fil0fil.h
+++ b/include/fil0fil.h
@@ -0,0 +1,702 @@
 /******************************************************
 The low-level file system
 (c) 1995 Innobase Oy
 Created 10/25/1995 Heikki Tuuri
 *******************************************************/
 #ifndef fil0fil_h
 #define fil0fil_h
 #include "univ.i"
 #include "sync0rw.h"
 #include "dict0types.h"
 #include "ibuf0types.h"
 #include "ut0byte.h"
 #include "os0file.h"
 /* When mysqld is run, the default directory "." is the mysqld datadir, but in
 ibbackup we must set it explicitly; the patgh must NOT contain the trailing
 '/' or '\' */
 extern const char*	fil_path_to_mysql_datadir;
 /* Initial size of a single-table tablespace in pages */
 #define FIL_IBD_FILE_INITIAL_SIZE	4
 /* 'null' (undefined) page offset in the context of file spaces */
 #define	FIL_NULL	ULINT32_UNDEFINED
 /* Space address data type; this is intended to be used when
 addresses accurate to a byte are stored in file pages. If the page part
 of the address is FIL_NULL, the address is considered undefined. */
 typedef	byte	fil_faddr_t;	/* 'type' definition in C: an address
 				stored in a file page is a string of bytes */
 #define FIL_ADDR_PAGE	0	/* first in address is the page offset */
 #define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
 #define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
 /* A struct for storing a space address FIL_ADDR, when it is used
 in C program data structures. */
 typedef struct fil_addr_struct	fil_addr_t;
 struct fil_addr_struct{
 	ulint	page;		/* page number within a space */
 	ulint	boffset;	/* byte offset within the page */
 };
 /* Null file address */
 extern fil_addr_t	fil_addr_null;
 /* The byte offsets on a file page for various variables */
 #define FIL_PAGE_SPACE_OR_CHKSUM 0	/* in < MySQL-4.0.14 space id the
 					page belongs to (== 0) but in later
 					versions the 'new' checksum of the
 					page */
 #define FIL_PAGE_OFFSET		4	/* page offset inside space */
 #define FIL_PAGE_PREV		8	/* if there is a 'natural' predecessor
 					of the page, its offset.
 					Otherwise FIL_NULL.
 					This field is not set on BLOB pages,
 					which are stored as a singly-linked
 					list.  See also FIL_PAGE_NEXT. */
 #define FIL_PAGE_NEXT		12	/* if there is a 'natural' successor
 					of the page, its offset.
 					Otherwise FIL_NULL.
 					B-tree index pages
 					(FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
 					on the same PAGE_LEVEL are maintained
 					as a doubly linked list via
 					FIL_PAGE_PREV and FIL_PAGE_NEXT
 					in the collation order of the
 					smallest user record on each page. */
 #define FIL_PAGE_LSN		16	/* lsn of the end of the newest
 					modification log record to the page */
 #define	FIL_PAGE_TYPE		24	/* file page type: FIL_PAGE_INDEX,...,
 					2 bytes.
 					The contents of this field can only
 					be trusted in the following case:
 					if the page is an uncompressed
 					B-tree index page, then it is
 					guaranteed that the value is
 					FIL_PAGE_INDEX.
 					The opposite does not hold.
 					In tablespaces created by
 					MySQL/InnoDB 5.1.7 or later, the
 					contents of this field is valid
 					for all uncompressed pages. */
 #define FIL_PAGE_FILE_FLUSH_LSN	26	/* this is only defined for the
 					first page in a data file: the file
 					has been flushed to disk at least up
 					to this lsn */
 #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /* starting from 4.1.x this
 					contains the space id of the page */
 #define FIL_PAGE_DATA		38	/* start of the data on the page */
 /* File page trailer */
 #define FIL_PAGE_END_LSN_OLD_CHKSUM 8	/* the low 4 bytes of this are used
 					to store the page checksum, the
 					last 4 bytes should be identical
 					to the last 4 bytes of FIL_PAGE_LSN */
 #define FIL_PAGE_DATA_END	8
 /* File page types (values of FIL_PAGE_TYPE) */
 #define FIL_PAGE_INDEX		17855	/* B-tree node */
 #define FIL_PAGE_UNDO_LOG	2	/* Undo log page */
 #define FIL_PAGE_INODE		3	/* Index node */
 #define FIL_PAGE_IBUF_FREE_LIST	4	/* Insert buffer free list */
 /* File page types introduced in MySQL/InnoDB 5.1.7 */
 #define FIL_PAGE_TYPE_ALLOCATED	0	/* Freshly allocated page */
 #define FIL_PAGE_IBUF_BITMAP	5	/* Insert buffer bitmap */
 #define FIL_PAGE_TYPE_SYS	6	/* System page */
 #define FIL_PAGE_TYPE_TRX_SYS	7	/* Transaction system data */
 #define FIL_PAGE_TYPE_FSP_HDR	8	/* File space header */
 #define FIL_PAGE_TYPE_XDES	9	/* Extent descriptor page */
 #define FIL_PAGE_TYPE_BLOB	10	/* Uncompressed BLOB page */
 #define FIL_PAGE_TYPE_ZBLOB	11	/* First compressed BLOB page */
 #define FIL_PAGE_TYPE_ZBLOB2	12	/* Subsequent compressed BLOB page */
 /* Space types */
 #define FIL_TABLESPACE		501
 #define FIL_LOG			502
 extern ulint	fil_n_log_flushes;
 extern ulint	fil_n_pending_log_flushes;
 extern ulint	fil_n_pending_tablespace_flushes;
 /***********************************************************************
 Returns the version number of a tablespace, -1 if not found. */
 UNIV_INTERN
 ib_int64_t
 fil_space_get_version(
 /*==================*/
 			/* out: version number, -1 if the tablespace does not
 			exist in the memory cache */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Returns the latch of a file space. */
 UNIV_INTERN
 rw_lock_t*
 fil_space_get_latch(
 /*================*/
 			/* out: latch protecting storage allocation */
 	ulint	id,	/* in: space id */
 	ulint*	zip_size);/* out: compressed page size, or
 			0 for uncompressed tablespaces */
 /***********************************************************************
 Returns the type of a file space. */
 UNIV_INTERN
 ulint
 fil_space_get_type(
 /*===============*/
 			/* out: FIL_TABLESPACE or FIL_LOG */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Returns the ibuf data of a file space. */
 UNIV_INTERN
 ibuf_data_t*
 fil_space_get_ibuf_data(
 /*====================*/
 			/* out: ibuf data for this space */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Appends a new file to the chain of files of a space. File must be closed. */
 UNIV_INTERN
 void
 fil_node_create(
 /*============*/
 	const char*	name,	/* in: file name (file must be closed) */
 	ulint		size,	/* in: file size in database blocks, rounded
 				downwards to an integer */
 	ulint		id,	/* in: space id where to append */
 	ibool		is_raw);/* in: TRUE if a raw device or
 				a raw disk partition */
 #ifdef UNIV_LOG_ARCHIVE
 /********************************************************************
 Drops files from the start of a file space, so that its size is cut by
 the amount given. */
 UNIV_INTERN
 void
 fil_space_truncate_start(
 /*=====================*/
 	ulint	id,		/* in: space id */
 	ulint	trunc_len);	/* in: truncate by this much; it is an error
 				if this does not equal to the combined size of
 				some initial files in the space */
 #endif /* UNIV_LOG_ARCHIVE */
 /***********************************************************************
 Creates a space memory object and puts it to the 'fil system' hash table. If
 there is an error, prints an error message to the .err log. */
 UNIV_INTERN
 ibool
 fil_space_create(
 /*=============*/
 				/* out: TRUE if success */
 	const char*	name,	/* in: space name */
 	ulint		id,	/* in: space id */
 	ulint		zip_size,/* in: compressed page size, or
 				0 for uncompressed tablespaces */
 	ulint		purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
 /***********************************************************************
 Frees a space object from a the tablespace memory cache. Closes the files in
 the chain but does not delete them. */
 UNIV_INTERN
 ibool
 fil_space_free(
 /*===========*/
 			/* out: TRUE if success */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Returns the size of the space in pages. The tablespace must be cached in the
 memory cache. */
 UNIV_INTERN
 ulint
 fil_space_get_size(
 /*===============*/
 			/* out: space size, 0 if space not found */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Returns the flags of the space. The tablespace must be cached
 in the memory cache. */
 UNIV_INTERN
 ulint
 fil_space_get_flags(
 /*================*/
 			/* out: flags, ULINT_UNDEFINED if space not found */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Returns the compressed page size of the space, or 0 if the space
 is not compressed. The tablespace must be cached in the memory cache. */
 UNIV_INTERN
 ulint
 fil_space_get_zip_size(
 /*===================*/
 			/* out: compressed page size, ULINT_UNDEFINED
 			if space not found */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Checks if the pair space, page_no refers to an existing page in a tablespace
 file space. The tablespace must be cached in the memory cache. */
 UNIV_INTERN
 ibool
 fil_check_adress_in_tablespace(
 /*===========================*/
 			/* out: TRUE if the address is meaningful */
 	ulint	id,	/* in: space id */
 	ulint	page_no);/* in: page number */
 /********************************************************************
 Initializes the tablespace memory cache. */
 UNIV_INTERN
 void
 fil_init(
 /*=====*/
 	ulint	max_n_open);	/* in: max number of open files */
 /***********************************************************************
 Opens all log files and system tablespace data files. They stay open until the
 database server shutdown. This should be called at a server startup after the
 space objects for the log and the system tablespace have been created. The
 purpose of this operation is to make sure we never run out of file descriptors
 if we need to read from the insert buffer or to write to the log. */
 UNIV_INTERN
 void
 fil_open_log_and_system_tablespace_files(void);
 /*==========================================*/
 /***********************************************************************
 Closes all open files. There must not be any pending i/o's or not flushed
 modifications in the files. */
 UNIV_INTERN
 void
 fil_close_all_files(void);
 /*=====================*/
 /***********************************************************************
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
 UNIV_INTERN
 void
 fil_set_max_space_id_if_bigger(
 /*===========================*/
 	ulint	max_id);/* in: maximum known id */
 /********************************************************************
 Initializes the ibuf data structure for space 0 == the system tablespace.
 This can be called after the file space headers have been created and the
 dictionary system has been initialized. */
 UNIV_INTERN
 void
 fil_ibuf_init_at_db_start(void);
 /*===========================*/
 /********************************************************************
 Writes the flushed lsn and the latest archived log number to the page
 header of the first page of each data file in the system tablespace. */
 UNIV_INTERN
 ulint
 fil_write_flushed_lsn_to_data_files(
 /*================================*/
 					/* out: DB_SUCCESS or error number */
 	ib_uint64_t	lsn,		/* in: lsn to write */
 	ulint		arch_log_no);	/* in: latest archived log
 					file number */
 /***********************************************************************
 Reads the flushed lsn and arch no fields from a data file at database
 startup. */
 UNIV_INTERN
 void
 fil_read_flushed_lsn_and_arch_log_no(
 /*=================================*/
 	os_file_t	data_file,		/* in: open data file */
 	ibool		one_read_already,	/* in: TRUE if min and max
 						parameters below already
 						contain sensible data */
 #ifdef UNIV_LOG_ARCHIVE
 	ulint*		min_arch_log_no,	/* in/out: */
 	ulint*		max_arch_log_no,	/* in/out: */
 #endif /* UNIV_LOG_ARCHIVE */
 	ib_uint64_t*	min_flushed_lsn,	/* in/out: */
 	ib_uint64_t*	max_flushed_lsn);	/* in/out: */
 /***********************************************************************
 Increments the count of pending insert buffer page merges, if space is not
 being deleted. */
 UNIV_INTERN
 ibool
 fil_inc_pending_ibuf_merges(
 /*========================*/
 			/* out: TRUE if being deleted, and ibuf merges should
 			be skipped */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Decrements the count of pending insert buffer page merges. */
 UNIV_INTERN
 void
 fil_decr_pending_ibuf_merges(
 /*=========================*/
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Parses the body of a log record written about an .ibd file operation. That is,
 the log record part after the standard (type, space id, page no) header of the
 log record.
 If desired, also replays the delete or rename operation if the .ibd file
 exists and the space id in it matches. Replays the create operation if a file
 at that path does not exist yet. If the database directory for the file to be
 created does not exist, then we create the directory, too.
 Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
 datadir that we should use in replaying the file operations. */
 UNIV_INTERN
 byte*
 fil_op_log_parse_or_replay(
 /*=======================*/
 				/* out: end of log record, or NULL if the
 				record was not completely contained between
 				ptr and end_ptr */
 	byte*	ptr,		/* in: buffer containing the log record body,
 				or an initial segment of it, if the record does
 				not fir completely between ptr and end_ptr */
 	byte*	end_ptr,	/* in: buffer end */
 	ulint	type,		/* in: the type of this log record */
 	ulint	space_id);	/* in: the space id of the tablespace in
 				question, or 0 if the log record should
 				only be parsed but not replayed */
 /***********************************************************************
 Deletes a single-table tablespace. The tablespace must be cached in the
 memory cache. */
 UNIV_INTERN
 ibool
 fil_delete_tablespace(
 /*==================*/
 			/* out: TRUE if success */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Discards a single-table tablespace. The tablespace must be cached in the
 memory cache. Discarding is like deleting a tablespace, but
 1) we do not drop the table from the data dictionary;
 2) we remove all insert buffer entries for the tablespace immediately; in DROP
 TABLE they are only removed gradually in the background;
 3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
 as it originally had. */
 UNIV_INTERN
 ibool
 fil_discard_tablespace(
 /*===================*/
 			/* out: TRUE if success */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Renames a single-table tablespace. The tablespace must be cached in the
 tablespace memory cache. */
 UNIV_INTERN
 ibool
 fil_rename_tablespace(
 /*==================*/
 					/* out: TRUE if success */
 	const char*	old_name,	/* in: old table name in the standard
 					databasename/tablename format of
 					InnoDB, or NULL if we do the rename
 					based on the space id only */
 	ulint		id,		/* in: space id */
 	const char*	new_name);	/* in: new table name in the standard
 					databasename/tablename format
 					of InnoDB */
 /***********************************************************************
 Creates a new single-table tablespace to a database directory of MySQL.
 Database directories are under the 'datadir' of MySQL. The datadir is the
 directory of a running mysqld program. We can refer to it by simply the
 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
 dir of the mysqld server. */
 UNIV_INTERN
 ulint
 fil_create_new_single_table_tablespace(
 /*===================================*/
 					/* out: DB_SUCCESS or error code */
 	ulint*		space_id,	/* in/out: space id; if this is != 0,
 					then this is an input parameter,
 					otherwise output */
 	const char*	tablename,	/* in: the table name in the usual
 					databasename/tablename format
 					of InnoDB, or a dir path to a temp
 					table */
 	ibool		is_temp,	/* in: TRUE if a table created with
 					CREATE TEMPORARY TABLE */
 	ulint		flags,		/* in: tablespace flags */
 	ulint		size);		/* in: the initial size of the
 					tablespace file in pages,
 					must be >= FIL_IBD_FILE_INITIAL_SIZE */
 /************************************************************************
 Tries to open a single-table tablespace and optionally checks the space id is
 right in it. If does not succeed, prints an error message to the .err log. This
 function is used to open a tablespace when we start up mysqld, and also in
 IMPORT TABLESPACE.
 NOTE that we assume this operation is used either at the database startup
 or under the protection of the dictionary mutex, so that two users cannot
 race here. This operation does not leave the file associated with the
 tablespace open, but closes it after we have looked at the space id in it. */
 UNIV_INTERN
 ibool
 fil_open_single_table_tablespace(
 /*=============================*/
 					/* out: TRUE if success */
 	ibool		check_space_id,	/* in: should we check that the space
 					id in the file is right; we assume
 					that this function runs much faster
 					if no check is made, since accessing
 					the file inode probably is much
 					faster (the OS caches them) than
 					accessing the first page of the file */
 	ulint		id,		/* in: space id */
 	ulint		flags,		/* in: tablespace flags */
 	const char*	name);		/* in: table name in the
 					databasename/tablename format */
 /************************************************************************
 It is possible, though very improbable, that the lsn's in the tablespace to be
 imported have risen above the current system lsn, if a lengthy purge, ibuf
 merge, or rollback was performed on a backup taken with ibbackup. If that is
 the case, reset page lsn's in the file. We assume that mysqld was shut down
 after it performed these cleanup operations on the .ibd file, so that it at
 the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
 first page of the .ibd file, and we can determine whether we need to reset the
 lsn's just by looking at that flush lsn. */
 UNIV_INTERN
 ibool
 fil_reset_too_high_lsns(
 /*====================*/
 					/* out: TRUE if success */
 	const char*	name,		/* in: table name in the
 					databasename/tablename format */
 	ib_uint64_t	current_lsn);	/* in: reset lsn's if the lsn stamped
 					to FIL_PAGE_FILE_FLUSH_LSN in the
 					first page is too high */
 /************************************************************************
 At the server startup, if we need crash recovery, scans the database
 directories under the MySQL datadir, looking for .ibd files. Those files are
 single-table tablespaces. We need to know the space id in each of them so that
 we know into which file we should look to check the contents of a page stored
 in the doublewrite buffer, also to know where to apply log records where the
 space id is != 0. */
 UNIV_INTERN
 ulint
 fil_load_single_table_tablespaces(void);
 /*===================================*/
 			/* out: DB_SUCCESS or error number */
 /************************************************************************
 If we need crash recovery, and we have called
 fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
 we can call this function to print an error message of orphaned .ibd files
 for which there is not a data dictionary entry with a matching table name
 and space id. */
 UNIV_INTERN
 void
 fil_print_orphaned_tablespaces(void);
 /*================================*/
 /***********************************************************************
 Returns TRUE if a single-table tablespace does not exist in the memory cache,
 or is being deleted there. */
 UNIV_INTERN
 ibool
 fil_tablespace_deleted_or_being_deleted_in_mem(
 /*===========================================*/
 				/* out: TRUE if does not exist or is being\
 				deleted */
 	ulint		id,	/* in: space id */
 	ib_int64_t	version);/* in: tablespace_version should be this; if
 				you pass -1 as the value of this, then this
 				parameter is ignored */
 /***********************************************************************
 Returns TRUE if a single-table tablespace exists in the memory cache. */
 UNIV_INTERN
 ibool
 fil_tablespace_exists_in_mem(
 /*=========================*/
 			/* out: TRUE if exists */
 	ulint	id);	/* in: space id */
 /***********************************************************************
 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
 cache. Note that if we have not done a crash recovery at the database startup,
 there may be many tablespaces which are not yet in the memory cache. */
 UNIV_INTERN
 ibool
 fil_space_for_table_exists_in_mem(
 /*==============================*/
 					/* out: TRUE if a matching tablespace
 					exists in the memory cache */
 	ulint		id,		/* in: space id */
 	const char*	name,		/* in: table name in the standard
 					'databasename/tablename' format or
 					the dir path to a temp table */
 	ibool		is_temp,	/* in: TRUE if created with CREATE
 					TEMPORARY TABLE */
 	ibool		mark_space,	/* in: in crash recovery, at database
 					startup we mark all spaces which have
 					an associated table in the InnoDB
 					data dictionary, so that
 					we can print a warning about orphaned
 					tablespaces */
 	ibool		print_error_if_does_not_exist);
 					/* in: print detailed error
 					information to the .err log if a
 					matching tablespace is not found from
 					memory */
 /**************************************************************************
 Tries to extend a data file so that it would accommodate the number of pages
 given. The tablespace must be cached in the memory cache. If the space is big
 enough already, does nothing. */
 UNIV_INTERN
 ibool
 fil_extend_space_to_desired_size(
 /*=============================*/
 				/* out: TRUE if success */
 	ulint*	actual_size,	/* out: size of the space after extension;
 				if we ran out of disk space this may be lower
 				than the desired size */
 	ulint	space_id,	/* in: space id */
 	ulint	size_after_extend);/* in: desired size in pages after the
 				extension; if the current space size is bigger
 				than this already, the function does nothing */
 #ifdef UNIV_HOTBACKUP
 /************************************************************************
 Extends all tablespaces to the size stored in the space header. During the
 ibbackup --apply-log phase we extended the spaces on-demand so that log records
 could be appllied, but that may have left spaces still too small compared to
 the size stored in the space header. */
 UNIV_INTERN
 void
 fil_extend_tablespaces_to_stored_len(void);
 /*======================================*/
 #endif
 /***********************************************************************
 Tries to reserve free extents in a file space. */
 UNIV_INTERN
 ibool
 fil_space_reserve_free_extents(
 /*===========================*/
 				/* out: TRUE if succeed */
 	ulint	id,		/* in: space id */
 	ulint	n_free_now,	/* in: number of free extents now */
 	ulint	n_to_reserve);	/* in: how many one wants to reserve */
 /***********************************************************************
 Releases free extents in a file space. */
 UNIV_INTERN
 void
 fil_space_release_free_extents(
 /*===========================*/
 	ulint	id,		/* in: space id */
 	ulint	n_reserved);	/* in: how many one reserved */
 /***********************************************************************
 Gets the number of reserved extents. If the database is silent, this number
 should be zero. */
 UNIV_INTERN
 ulint
 fil_space_get_n_reserved_extents(
 /*=============================*/
 	ulint	id);		/* in: space id */
 /************************************************************************
 Reads or writes data. This operation is asynchronous (aio). */
 UNIV_INTERN
 ulint
 fil_io(
 /*===*/
 				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
 				if we are trying to do i/o on a tablespace
 				which does not exist */
 	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
 				ORed to OS_FILE_LOG, if a log i/o
 				and ORed to OS_AIO_SIMULATED_WAKE_LATER
 				if simulated aio and we want to post a
 				batch of i/os; NOTE that a simulated batch
 				may introduce hidden chances of deadlocks,
 				because i/os are not actually handled until
 				all have been posted: use with great
 				caution! */
 	ibool	sync,		/* in: TRUE if synchronous aio is desired */
 	ulint	space_id,	/* in: space id */
 	ulint	zip_size,	/* in: compressed page size in bytes;
 				0 for uncompressed pages */
 	ulint	block_offset,	/* in: offset in number of blocks */
 	ulint	byte_offset,	/* in: remainder of offset in bytes; in
 				aio this must be divisible by the OS block
 				size */
 	ulint	len,		/* in: how many bytes to read or write; this
 				must not cross a file boundary; in aio this
 				must be a block size multiple */
 	void*	buf,		/* in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
 	void*	message);	/* in: message for aio handler if non-sync
 				aio used, else ignored */
 /**************************************************************************
 Waits for an aio operation to complete. This function is used to write the
 handler for completed requests. The aio array of pending requests is divided
 into segments (see os0file.c for more info). The thread specifies which
 segment it wants to wait for. */
 UNIV_INTERN
 void
 fil_aio_wait(
 /*=========*/
 	ulint	segment);	/* in: the number of the segment in the aio
 				array to wait for */
 /**************************************************************************
 Flushes to disk possible writes cached by the OS. If the space does not exist
 or is being dropped, does not do anything. */
 UNIV_INTERN
 void
 fil_flush(
 /*======*/
 	ulint	space_id);	/* in: file space id (this can be a group of
 				log files or a tablespace of the database) */
 /**************************************************************************
 Flushes to disk writes in file spaces of the given type possibly cached by
 the OS. */
 UNIV_INTERN
 void
 fil_flush_file_spaces(
 /*==================*/
 	ulint	purpose);	/* in: FIL_TABLESPACE, FIL_LOG */
 /**********************************************************************
 Checks the consistency of the tablespace cache. */
 UNIV_INTERN
 ibool
 fil_validate(void);
 /*==============*/
 			/* out: TRUE if ok */
 /************************************************************************
 Returns TRUE if file address is undefined. */
 UNIV_INTERN
 ibool
 fil_addr_is_null(
 /*=============*/
 				/* out: TRUE if undefined */
 	fil_addr_t	addr);	/* in: address */
 /************************************************************************
 Accessor functions for a file page */
 UNIV_INTERN
 ulint
 fil_page_get_prev(const byte*	page);
 ulint
 fil_page_get_next(const byte*	page);
 /*************************************************************************
 Sets the file page type. */
 UNIV_INTERN
 void
 fil_page_set_type(
 /*==============*/
 	byte*	page,	/* in: file page */
 	ulint	type);	/* in: type */
 /*************************************************************************
 Gets the file page type. */
 UNIV_INTERN
 ulint
 fil_page_get_type(
 /*==============*/
 				/* out: type; NOTE that if the type
 				has not been written to page, the
 				return value not defined */
 	const byte*	page);	/* in: file page */
 typedef	struct fil_space_struct	fil_space_t;
 #endif
--- a/include/fsp0fsp.h
+++ b/include/fsp0fsp.h
@@ -0,0 +1,417 @@
 /******************************************************
 File space management
 (c) 1995 Innobase Oy
 Created 12/18/1995 Heikki Tuuri
 *******************************************************/
 #ifndef fsp0fsp_h
 #define fsp0fsp_h
 #include "univ.i"
 #include "mtr0mtr.h"
 #include "fut0lst.h"
 #include "ut0byte.h"
 #include "page0types.h"
 /* If records are inserted in order, there are the following
 flags to tell this (their type is made byte for the compiler
 to warn if direction and hint parameters are switched in
 fseg_alloc_free_page): */
 #define	FSP_UP		((byte)111)	/* alphabetically upwards */
 #define	FSP_DOWN	((byte)112)	/* alphabetically downwards */
 #define	FSP_NO_DIR	((byte)113)	/* no order */
 /* File space extent size (one megabyte) in pages */
 #define	FSP_EXTENT_SIZE		(1 << (20 - UNIV_PAGE_SIZE_SHIFT))
 /* On a page of any file segment, data may be put starting from this offset: */
 #define FSEG_PAGE_DATA		FIL_PAGE_DATA
 /* File segment header which points to the inode describing the file segment */
 typedef	byte	fseg_header_t;
 #define FSEG_HDR_SPACE		0	/* space id of the inode */
 #define FSEG_HDR_PAGE_NO	4	/* page number of the inode */
 #define FSEG_HDR_OFFSET		8	/* byte offset of the inode */
 #define FSEG_HEADER_SIZE	10
 /**************************************************************************
 Initializes the file space system. */
 UNIV_INTERN
 void
 fsp_init(void);
 /*==========*/
 /**************************************************************************
 Gets the current free limit of the system tablespace.  The free limit
 means the place of the first page which has never been put to the the
 free list for allocation.  The space above that address is initialized
 to zero.  Sets also the global variable log_fsp_current_free_limit. */
 UNIV_INTERN
 ulint
 fsp_header_get_free_limit(void);
 /*===========================*/
 			/* out: free limit in megabytes */
 /**************************************************************************
 Gets the size of the system tablespace from the tablespace header.  If
 we do not have an auto-extending data file, this should be equal to
 the size of the data files.  If there is an auto-extending data file,
 this can be smaller. */
 UNIV_INTERN
 ulint
 fsp_header_get_tablespace_size(void);
 /*================================*/
 			/* out: size in pages */
 /**************************************************************************
 Reads the file space size stored in the header page. */
 UNIV_INTERN
 ulint
 fsp_get_size_low(
 /*=============*/
 			/* out: tablespace size stored in the space header */
 	page_t*	page);	/* in: header page (page 0 in the tablespace) */
 /**************************************************************************
 Reads the space id from the first page of a tablespace. */
 UNIV_INTERN
 ulint
 fsp_header_get_space_id(
 /*====================*/
 				/* out: space id, ULINT UNDEFINED if error */
 	const page_t*	page);	/* in: first page of a tablespace */
 /**************************************************************************
 Reads the space flags from the first page of a tablespace. */
 UNIV_INTERN
 ulint
 fsp_header_get_flags(
 /*=================*/
 				/* out: flags */
 	const page_t*	page);	/* in: first page of a tablespace */
 /**************************************************************************
 Reads the compressed page size from the first page of a tablespace. */
 UNIV_INTERN
 ulint
 fsp_header_get_zip_size(
 /*====================*/
 				/* out: compressed page size in bytes,
 				or 0 if uncompressed */
 	const page_t*	page);	/* in: first page of a tablespace */
 /**************************************************************************
 Writes the space id and compressed page size to a tablespace header.
 This function is used past the buffer pool when we in fil0fil.c create
 a new single-table tablespace. */
 UNIV_INTERN
 void
 fsp_header_init_fields(
 /*===================*/
 	page_t*	page,		/* in/out: first page in the space */
 	ulint	space_id,	/* in: space id */
 	ulint	flags);		/* in: tablespace flags (FSP_SPACE_FLAGS):
 				0, or table->flags if newer than COMPACT */
 /**************************************************************************
 Initializes the space header of a new created space and creates also the
 insert buffer tree root if space == 0. */
 UNIV_INTERN
 void
 fsp_header_init(
 /*============*/
 	ulint	space,		/* in: space id */
 	ulint	size,		/* in: current size in blocks */
 	mtr_t*	mtr);		/* in: mini-transaction handle */
 /**************************************************************************
 Increases the space size field of a space. */
 UNIV_INTERN
 void
 fsp_header_inc_size(
 /*================*/
 	ulint	space,	/* in: space id */
 	ulint	size_inc,/* in: size increment in pages */
 	mtr_t*	mtr);	/* in: mini-transaction handle */
 /**************************************************************************
 Creates a new segment. */
 UNIV_INTERN
 buf_block_t*
 fseg_create(
 /*========*/
 			/* out: the block where the segment header is placed,
 			x-latched, NULL if could not create segment
 			because of lack of space */
 	ulint	space,	/* in: space id */
 	ulint	page,	/* in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
 			will belong to the created segment */
 	ulint	byte_offset, /* in: byte offset of the created segment header
 			on the page */
 	mtr_t*	mtr);	/* in: mtr */
 /**************************************************************************
 Creates a new segment. */
 UNIV_INTERN
 buf_block_t*
 fseg_create_general(
 /*================*/
 			/* out: the block where the segment header is placed,
 			x-latched, NULL if could not create segment
 			because of lack of space */
 	ulint	space,	/* in: space id */
 	ulint	page,	/* in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
 			will belong to the created segment */
 	ulint	byte_offset, /* in: byte offset of the created segment header
 			on the page */
 	ibool	has_done_reservation, /* in: TRUE if the caller has already
 			done the reservation for the pages with
 			fsp_reserve_free_extents (at least 2 extents: one for
 			the inode and the other for the segment) then there is
 			no need to do the check for this individual
 			operation */
 	mtr_t*	mtr);	/* in: mtr */
 /**************************************************************************
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used. */
 UNIV_INTERN
 ulint
 fseg_n_reserved_pages(
 /*==================*/
 				/* out: number of reserved pages */
 	fseg_header_t*	header,	/* in: segment header */
 	ulint*		used,	/* out: number of pages used (<= reserved) */
 	mtr_t*		mtr);	/* in: mtr handle */
 /**************************************************************************
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize
 file space fragmentation. */
 UNIV_INTERN
 ulint
 fseg_alloc_free_page(
 /*=================*/
 				/* out: the allocated page offset
 				FIL_NULL if no page could be allocated */
 	fseg_header_t*	seg_header, /* in: segment header */
 	ulint		hint,	/* in: hint of which page would be desirable */
 	byte		direction, /* in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
 	mtr_t*		mtr);	/* in: mtr handle */
 /**************************************************************************
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
 fragmentation. */
 UNIV_INTERN
 ulint
 fseg_alloc_free_page_general(
 /*=========================*/
 				/* out: allocated page offset, FIL_NULL if no
 				page could be allocated */
 	fseg_header_t*	seg_header,/* in: segment header */
 	ulint		hint,	/* in: hint of which page would be desirable */
 	byte		direction,/* in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
 	ibool		has_done_reservation, /* in: TRUE if the caller has
 				already done the reservation for the page
 				with fsp_reserve_free_extents, then there
 				is no need to do the check for this individual
 				page */
 	mtr_t*		mtr);	/* in: mtr handle */
 /**************************************************************************
 Reserves free pages from a tablespace. All mini-transactions which may
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
 to do their operation, like a B-tree page split, fully. Reservations
 must be released with function fil_space_release_free_extents!
 The alloc_type below has the following meaning: FSP_NORMAL means an
 operation which will probably result in more space usage, like an
 insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
 deleting rows, then this allocation will in the long run result in
 less space usage (after a purge); FSP_CLEANING means allocation done
 in a physical record delete (like in a purge) or other cleaning operation
 which will result in less space usage in the long run. We prefer the latter
 two types of allocation: when space is scarce, FSP_NORMAL allocations
 will not succeed, but the latter two allocations will succeed, if possible.
 The purpose is to avoid dead end where the database is full but the
 user cannot free any space because these freeing operations temporarily
 reserve some space.
 Single-table tablespaces whose size is < 32 pages are a special case. In this
 function we would liberally reserve several 64 page extents for every page
 split or merge in a B-tree. But we do not want to waste disk space if the table
 only occupies < 32 pages. That is why we apply different rules in that special
 case, just ensuring that there are 3 free pages available. */
 UNIV_INTERN
 ibool
 fsp_reserve_free_extents(
 /*=====================*/
 			/* out: TRUE if we were able to make the reservation */
 	ulint*	n_reserved,/* out: number of extents actually reserved; if we
 			return TRUE and the tablespace size is < 64 pages,
 			then this can be 0, otherwise it is n_ext */
 	ulint	space,	/* in: space id */
 	ulint	n_ext,	/* in: number of extents to reserve */
 	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
 	mtr_t*	mtr);	/* in: mtr */
 /**************************************************************************
 This function should be used to get information on how much we still
 will be able to insert new data to the database without running out the
 tablespace. Only free extents are taken into account and we also subtract
 the safety margin required by the above function fsp_reserve_free_extents. */
 UNIV_INTERN
 ullint
 fsp_get_available_space_in_free_extents(
 /*====================================*/
 			/* out: available space in kB */
 	ulint	space);	/* in: space id */
 /**************************************************************************
 Frees a single page of a segment. */
 UNIV_INTERN
 void
 fseg_free_page(
 /*===========*/
 	fseg_header_t*	seg_header, /* in: segment header */
 	ulint		space,	/* in: space id */
 	ulint		page,	/* in: page offset */
 	mtr_t*		mtr);	/* in: mtr handle */
 /***********************************************************************
 Frees a segment. The freeing is performed in several mini-transactions,
 so that there is no danger of bufferfixing too many buffer pages. */
 UNIV_INTERN
 void
 fseg_free(
 /*======*/
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes
 			or 0 for uncompressed pages */
 	ulint	page_no,/* in: page number where the segment header is
 			placed */
 	ulint	offset);/* in: byte offset of the segment header on that
 			page */
 /**************************************************************************
 Frees part of a segment. This function can be used to free a segment
 by repeatedly calling this function in different mini-transactions.
 Doing the freeing in a single mini-transaction might result in
 too big a mini-transaction. */
 UNIV_INTERN
 ibool
 fseg_free_step(
 /*===========*/
 				/* out: TRUE if freeing completed */
 	fseg_header_t*	header,	/* in, own: segment header; NOTE: if the header
 				resides on the first page of the frag list
 				of the segment, this pointer becomes obsolete
 				after the last freeing step */
 	mtr_t*		mtr);	/* in: mtr */
 /**************************************************************************
 Frees part of a segment. Differs from fseg_free_step because this function
 leaves the header page unfreed. */
 UNIV_INTERN
 ibool
 fseg_free_step_not_header(
 /*======================*/
 				/* out: TRUE if freeing completed, except the
 				header page */
 	fseg_header_t*	header,	/* in: segment header which must reside on
 				the first fragment page of the segment */
 	mtr_t*		mtr);	/* in: mtr */
 /***************************************************************************
 Checks if a page address is an extent descriptor page address. */
 UNIV_INLINE
 ibool
 fsp_descr_page(
 /*===========*/
 			/* out: TRUE if a descriptor page */
 	ulint	zip_size,/* in: compressed page size in bytes;
 			0 for uncompressed pages */
 	ulint	page_no);/* in: page number */
 /***************************************************************
 Parses a redo log record of a file page init. */
 UNIV_INTERN
 byte*
 fsp_parse_init_file_page(
 /*=====================*/
 				/* out: end of log record or NULL */
 	byte*		ptr,	/* in: buffer */
 	byte*		end_ptr, /* in: buffer end */
 	buf_block_t*	block);	/* in: block or NULL */
 /***********************************************************************
 Validates the file space system and its segments. */
 UNIV_INTERN
 ibool
 fsp_validate(
 /*=========*/
 			/* out: TRUE if ok */
 	ulint	space);	/* in: space id */
 /***********************************************************************
 Prints info of a file space. */
 UNIV_INTERN
 void
 fsp_print(
 /*======*/
 	ulint	space);	/* in: space id */
 /***********************************************************************
 Validates a segment. */
 UNIV_INTERN
 ibool
 fseg_validate(
 /*==========*/
 				/* out: TRUE if ok */
 	fseg_header_t*	header, /* in: segment header */
 	mtr_t*		mtr2);	/* in: mtr */
 #ifdef UNIV_BTR_PRINT
 /***********************************************************************
 Writes info of a segment. */
 UNIV_INTERN
 void
 fseg_print(
 /*=======*/
 	fseg_header_t*	header, /* in: segment header */
 	mtr_t*		mtr);	/* in: mtr */
 #endif /* UNIV_BTR_PRINT */
 /* Flags for fsp_reserve_free_extents */
 #define FSP_NORMAL	1000000
 #define	FSP_UNDO	2000000
 #define FSP_CLEANING	3000000
 /* Number of pages described in a single descriptor page: currently each page
 description takes less than 1 byte; a descriptor page is repeated every
 this many file pages */
 /* #define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE */
 /* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
 /* The space low address page map */
 /*--------------------------------------*/
 				/* The following two pages are repeated
 				every XDES_DESCRIBED_PER_PAGE pages in
 				every tablespace. */
 #define FSP_XDES_OFFSET			0	/* extent descriptor */
 #define FSP_IBUF_BITMAP_OFFSET		1	/* insert buffer bitmap */
 				/* The ibuf bitmap pages are the ones whose
 				page number is the number above plus a
 				multiple of XDES_DESCRIBED_PER_PAGE */
 #define FSP_FIRST_INODE_PAGE_NO		2	/* in every tablespace */
 				/* The following pages exist
 				in the system tablespace (space 0). */
 #define FSP_IBUF_HEADER_PAGE_NO		3	/* in tablespace 0 */
 #define FSP_IBUF_TREE_ROOT_PAGE_NO	4	/* in tablespace 0 */
 				/* The ibuf tree root page number in
 				tablespace 0; its fseg inode is on the page
 				number FSP_FIRST_INODE_PAGE_NO */
 #define FSP_TRX_SYS_PAGE_NO		5	/* in tablespace 0 */
 #define	FSP_FIRST_RSEG_PAGE_NO		6	/* in tablespace 0 */
 #define FSP_DICT_HDR_PAGE_NO		7	/* in tablespace 0 */
 /*--------------------------------------*/
 #ifndef UNIV_NONINL
 #include "fsp0fsp.ic"
 #endif
 #endif
--- a/include/fsp0fsp.ic
+++ b/include/fsp0fsp.ic
@@ -0,0 +1,28 @@
 /******************************************************
 File space management
 (c) 1995 Innobase Oy
 Created 12/18/1995 Heikki Tuuri
 *******************************************************/
 /***************************************************************************
 Checks if a page address is an extent descriptor page address. */
 UNIV_INLINE
 ibool
 fsp_descr_page(
 /*===========*/
 			/* out: TRUE if a descriptor page */
 	ulint	zip_size,/* in: compressed page size in bytes;
 			0 for uncompressed pages */
 	ulint	page_no)/* in: page number */
 {
 	ut_ad(ut_is_2pow(zip_size));
 	if (!zip_size) {
 		return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
 				     == FSP_XDES_OFFSET));
 	}
 	return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET));
 }
--- a/include/fut0fut.h
+++ b/include/fut0fut.h
@@ -0,0 +1,38 @@
 /**********************************************************************
 File-based utilities
 (c) 1995 Innobase Oy
 Created 12/13/1995 Heikki Tuuri
 ***********************************************************************/
 #ifndef fut0fut_h
 #define fut0fut_h
 #include "univ.i"
 #include "fil0fil.h"
 #include "mtr0mtr.h"
 /************************************************************************
 Gets a pointer to a file address and latches the page. */
 UNIV_INLINE
 byte*
 fut_get_ptr(
 /*========*/
 				/* out: pointer to a byte in a frame; the file
 				page in the frame is bufferfixed and latched */
 	ulint		space,	/* in: space id */
 	ulint		zip_size,/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	fil_addr_t	addr,	/* in: file address */
 	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
 	mtr_t*		mtr);	/* in: mtr handle */
 #ifndef UNIV_NONINL
 #include "fut0fut.ic"
 #endif
 #endif
--- a/include/fut0fut.ic
+++ b/include/fut0fut.ic
@@ -0,0 +1,41 @@
 /**********************************************************************
 File-based utilities
 (c) 1995 Innobase Oy
 Created 12/13/1995 Heikki Tuuri
 ***********************************************************************/
 #include "sync0rw.h"
 #include "buf0buf.h"
 /************************************************************************
 Gets a pointer to a file address and latches the page. */
 UNIV_INLINE
 byte*
 fut_get_ptr(
 /*========*/
 				/* out: pointer to a byte in a frame; the file
 				page in the frame is bufferfixed and latched */
 	ulint		space,	/* in: space id */
 	ulint		zip_size,/* in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	fil_addr_t	addr,	/* in: file address */
 	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
 	mtr_t*		mtr)	/* in: mtr handle */
 {
 	buf_block_t*	block;
 	byte*		ptr;
 	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 	block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
 	ptr = buf_block_get_frame(block) + addr.boffset;
 #ifdef UNIV_SYNC_DEBUG
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 #endif /* UNIV_SYNC_DEBUG */
 	return(ptr);
 }
--- a/include/fut0lst.h
+++ b/include/fut0lst.h
@@ -0,0 +1,198 @@
 /**********************************************************************
 File-based list utilities
 (c) 1995 Innobase Oy
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
 #ifndef fut0lst_h
 #define fut0lst_h
 #include "univ.i"
 #include "fil0fil.h"
 #include "mtr0mtr.h"
 /* The C 'types' of base node and list node: these should be used to
 write self-documenting code. Of course, the sizeof macro cannot be
 applied to these types! */
 typedef	byte	flst_base_node_t;
 typedef	byte	flst_node_t;
 /* The physical size of a list base node in bytes */
 #define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
 /* The physical size of a list node in bytes */
 #define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
 /************************************************************************
 Initializes a list base node. */
 UNIV_INLINE
 void
 flst_init(
 /*======*/
 	flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Adds a node as the last node in a list. */
 UNIV_INTERN
 void
 flst_add_last(
 /*==========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node,	/* in: node to add */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Adds a node as the first node in a list. */
 UNIV_INTERN
 void
 flst_add_first(
 /*===========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node,	/* in: node to add */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Inserts a node after another in a list. */
 UNIV_INTERN
 void
 flst_insert_after(
 /*==============*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node1,	/* in: node to insert after */
 	flst_node_t*		node2,	/* in: node to add */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Inserts a node before another in a list. */
 UNIV_INTERN
 void
 flst_insert_before(
 /*===============*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: node to insert */
 	flst_node_t*		node3,	/* in: node to insert before */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Removes a node. */
 UNIV_INTERN
 void
 flst_remove(
 /*========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: node to remove */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Cuts off the tail of the list, including the node given. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
 UNIV_INTERN
 void
 flst_cut_end(
 /*=========*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: first node to remove */
 	ulint			n_nodes,/* in: number of nodes to remove,
 					must be >= 1 */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Cuts off the tail of the list, not including the given node. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
 UNIV_INTERN
 void
 flst_truncate_end(
 /*==============*/
 	flst_base_node_t*	base,	/* in: pointer to base node of list */
 	flst_node_t*		node2,	/* in: first node not to remove */
 	ulint			n_nodes,/* in: number of nodes to remove */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Gets list length. */
 UNIV_INLINE
 ulint
 flst_get_len(
 /*=========*/
 					/* out: length */
 	const flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Gets list first node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_first(
 /*===========*/
 					/* out: file address */
 	const flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Gets list last node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_last(
 /*==========*/
 					/* out: file address */
 	const flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Gets list next node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_next_addr(
 /*===============*/
 					/* out: file address */
 	const flst_node_t*	node,	/* in: pointer to node */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Gets list prev node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_prev_addr(
 /*===============*/
 					/* out: file address */
 	const flst_node_t*	node,	/* in: pointer to node */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Writes a file address. */
 UNIV_INLINE
 void
 flst_write_addr(
 /*============*/
 	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
 	fil_addr_t	addr,	/* in: file address */
 	mtr_t*		mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Reads a file address. */
 UNIV_INLINE
 fil_addr_t
 flst_read_addr(
 /*===========*/
 					/* out: file address */
 	const fil_faddr_t*	faddr,	/* in: pointer to file faddress */
 	mtr_t*			mtr);	/* in: mini-transaction handle */
 /************************************************************************
 Validates a file-based list. */
 UNIV_INTERN
 ibool
 flst_validate(
 /*==========*/
 					/* out: TRUE if ok */
 	const flst_base_node_t*	base,	/* in: pointer to base node of list */
 	mtr_t*			mtr1);	/* in: mtr */
 /************************************************************************
 Prints info of a file-based list. */
 UNIV_INTERN
 void
 flst_print(
 /*=======*/
 	const flst_base_node_t*	base,	/* in: pointer to base node of list */
 	mtr_t*			mtr);	/* in: mtr */
 #ifndef UNIV_NONINL
 #include "fut0lst.ic"
 #endif
 #endif
--- a/include/fut0lst.ic
+++ b/include/fut0lst.ic
@@ -0,0 +1,146 @@
 /**********************************************************************
 File-based list utilities
 (c) 1995 Innobase Oy
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
 #include "fut0fut.h"
 #include "mtr0log.h"
 #include "buf0buf.h"
 /* We define the field offsets of a node for the list */
 #define FLST_PREV	0	/* 6-byte address of the previous list element;
 				the page part of address is FIL_NULL, if no
 				previous element */
 #define FLST_NEXT	FIL_ADDR_SIZE	/* 6-byte address of the next
 				list element; the page part of address
 				is FIL_NULL, if no next element */
 /* We define the field offsets of a base node for the list */
 #define FLST_LEN	0	/* 32-bit list length field */
 #define	FLST_FIRST	4	/* 6-byte address of the first element
 				of the list; undefined if empty list */
 #define	FLST_LAST	(4 + FIL_ADDR_SIZE) /* 6-byte address of the
 				last element of the list; undefined
 				if empty list */
 /************************************************************************
 Writes a file address. */
 UNIV_INLINE
 void
 flst_write_addr(
 /*============*/
 	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
 	fil_addr_t	addr,	/* in: file address */
 	mtr_t*		mtr)	/* in: mini-transaction handle */
 {
 	ut_ad(faddr && mtr);
 	ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
 	mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
 	mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
 			 MLOG_2BYTES, mtr);
 }
 /************************************************************************
 Reads a file address. */
 UNIV_INLINE
 fil_addr_t
 flst_read_addr(
 /*===========*/
 					/* out: file address */
 	const fil_faddr_t*	faddr,	/* in: pointer to file faddress */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	fil_addr_t	addr;
 	ut_ad(faddr && mtr);
 	addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
 	addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
 				      mtr);
 	return(addr);
 }
 /************************************************************************
 Initializes a list base node. */
 UNIV_INLINE
 void
 flst_init(
 /*======*/
 	flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
 	mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
 	flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
 	flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
 }
 /************************************************************************
 Gets list length. */
 UNIV_INLINE
 ulint
 flst_get_len(
 /*=========*/
 					/* out: length */
 	const flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
 }
 /************************************************************************
 Gets list first node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_first(
 /*===========*/
 					/* out: file address */
 	const flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	return(flst_read_addr(base + FLST_FIRST, mtr));
 }
 /************************************************************************
 Gets list last node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_last(
 /*==========*/
 					/* out: file address */
 	const flst_base_node_t*	base,	/* in: pointer to base node */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	return(flst_read_addr(base + FLST_LAST, mtr));
 }
 /************************************************************************
 Gets list next node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_next_addr(
 /*===============*/
 					/* out: file address */
 	const flst_node_t*	node,	/* in: pointer to node */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	return(flst_read_addr(node + FLST_NEXT, mtr));
 }
 /************************************************************************
 Gets list prev node address. */
 UNIV_INLINE
 fil_addr_t
 flst_get_prev_addr(
 /*===============*/
 					/* out: file address */
 	const flst_node_t*	node,	/* in: pointer to node */
 	mtr_t*			mtr)	/* in: mini-transaction handle */
 {
 	return(flst_read_addr(node + FLST_PREV, mtr));
 }
--- a/include/ha0ha.h
+++ b/include/ha0ha.h
@@ -0,0 +1,172 @@
 /******************************************************
 The hash table with external chains
 (c) 1994-1997 Innobase Oy
 Created 8/18/1994 Heikki Tuuri
 *******************************************************/
 #ifndef ha0ha_h
 #define ha0ha_h
 #include "univ.i"
 #include "hash0hash.h"
 #include "page0types.h"
 #include "buf0types.h"
 /*****************************************************************
 Looks for an element in a hash table. */
 UNIV_INLINE
 void*
 ha_search_and_get_data(
 /*===================*/
 				/* out: pointer to the data of the first hash
 				table node in chain having the fold number,
 				NULL if not found */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold);	/* in: folded value of the searched data */
 /*************************************************************
 Looks for an element when we know the pointer to the data and updates
 the pointer to data if found. */
 UNIV_INTERN
 void
 ha_search_and_update_if_found_func(
 /*===============================*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of the searched data */
 	void*		data,	/* in: pointer to the data */
 #ifdef UNIV_DEBUG
 	buf_block_t*	new_block,/* in: block containing new_data */
 #endif
 	void*		new_data);/* in: new pointer to the data */
 #ifdef UNIV_DEBUG
 # define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
 	ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
 #else
 # define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
 	ha_search_and_update_if_found_func(table,fold,data,new_data)
 #endif
 /*****************************************************************
 Creates a hash table with >= n array cells. The actual number of cells is
 chosen to be a prime number slightly bigger than n. */
 UNIV_INTERN
 hash_table_t*
 ha_create_func(
 /*===========*/
 				/* out, own: created table */
 	ulint	n,		/* in: number of array cells */
 #ifdef UNIV_SYNC_DEBUG
 	ulint	mutex_level,	/* in: level of the mutexes in the latching
 				order: this is used in the debug version */
 #endif /* UNIV_SYNC_DEBUG */
 	ulint	n_mutexes);	/* in: number of mutexes to protect the
 				hash table: must be a power of 2 */
 #ifdef UNIV_SYNC_DEBUG
 # define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m)
 #else /* UNIV_SYNC_DEBUG */
 # define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m)
 #endif /* UNIV_SYNC_DEBUG */
 /*****************************************************************
 Empties a hash table and frees the memory heaps. */
 UNIV_INTERN
 void
 ha_clear(
 /*=====*/
 	hash_table_t*	table);	/* in, own: hash table */
 /*****************************************************************
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted. */
 UNIV_INTERN
 ibool
 ha_insert_for_fold_func(
 /*====================*/
 				/* out: TRUE if succeed, FALSE if no more
 				memory could be allocated */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of data; if a node with
 				the same fold value already exists, it is
 				updated to point to the same data, and no new
 				node is created! */
 #ifdef UNIV_DEBUG
 	buf_block_t*	block,	/* in: buffer block containing the data */
 #endif /* UNIV_DEBUG */
 	void*		data);	/* in: data, must not be NULL */
 #ifdef UNIV_DEBUG
 # define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d)
 #else
 # define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d)
 #endif
 /*****************************************************************
 Deletes an entry from a hash table. */
 UNIV_INTERN
 void
 ha_delete(
 /*======*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of data */
 	void*		data);	/* in: data, must not be NULL and must exist
 				in the hash table */
 /*************************************************************
 Looks for an element when we know the pointer to the data and deletes
 it from the hash table if found. */
 UNIV_INLINE
 ibool
 ha_search_and_delete_if_found(
 /*==========================*/
 				/* out: TRUE if found */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of the searched data */
 	void*		data);	/* in: pointer to the data */
 /*********************************************************************
 Removes from the chain determined by fold all nodes whose data pointer
 points to the page given. */
 UNIV_INTERN
 void
 ha_remove_all_nodes_to_page(
 /*========================*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: fold value */
 	const page_t*	page);	/* in: buffer page */
 /*****************************************************************
 Validates a given range of the cells in hash table. */
 UNIV_INTERN
 ibool
 ha_validate(
 /*========*/
 					/* out: TRUE if ok */
 	hash_table_t*	table,		/* in: hash table */
 	ulint		start_index,	/* in: start index */
 	ulint		end_index);	/* in: end index */
 /*****************************************************************
 Prints info of a hash table. */
 UNIV_INTERN
 void
 ha_print_info(
 /*==========*/
 	FILE*		file,	/* in: file where to print */
 	hash_table_t*	table);	/* in: hash table */
 /* The hash table external chain node */
 typedef struct ha_node_struct ha_node_t;
 struct ha_node_struct {
 	ha_node_t*	next;	/* next chain node or NULL if none */
 #ifdef UNIV_DEBUG
 	buf_block_t*	block;	/* buffer block containing the data, or NULL */
 #endif /* UNIV_DEBUG */
 	void*		data;	/* pointer to the data */
 	ulint		fold;	/* fold value for the data */
 };
 #ifndef UNIV_NONINL
 #include "ha0ha.ic"
 #endif
 #endif
--- a/include/ha0ha.ic
+++ b/include/ha0ha.ic
@@ -0,0 +1,198 @@
 /************************************************************************
 The hash table with external chains
 (c) 1994-1997 Innobase Oy
 Created 8/18/1994 Heikki Tuuri
 *************************************************************************/
 #include "ut0rnd.h"
 #include "mem0mem.h"
 /***************************************************************
 Deletes a hash node. */
 UNIV_INTERN
 void
 ha_delete_hash_node(
 /*================*/
 	hash_table_t*	table,		/* in: hash table */
 	ha_node_t*	del_node);	/* in: node to be deleted */
 /**********************************************************************
 Gets a hash node data. */
 UNIV_INLINE
 void*
 ha_node_get_data(
 /*=============*/
 				/* out: pointer to the data */
 	ha_node_t*	node)	/* in: hash chain node */
 {
 	return(node->data);
 }
 /**********************************************************************
 Sets hash node data. */
 UNIV_INLINE
 void
 ha_node_set_data_func(
 /*==================*/
 	ha_node_t*	node,	/* in: hash chain node */
 #ifdef UNIV_DEBUG
 	buf_block_t*	block,	/* in: buffer block containing the data */
 #endif /* UNIV_DEBUG */
 	void*		data)	/* in: pointer to the data */
 {
 #ifdef UNIV_DEBUG
 	node->block = block;
 #endif /* UNIV_DEBUG */
 	node->data = data;
 }
 #ifdef UNIV_DEBUG
 # define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
 #else /* UNIV_DEBUG */
 # define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
 #endif /* UNIV_DEBUG */
 /**********************************************************************
 Gets the next node in a hash chain. */
 UNIV_INLINE
 ha_node_t*
 ha_chain_get_next(
 /*==============*/
 				/* out: next node, NULL if none */
 	ha_node_t*	node)	/* in: hash chain node */
 {
 	return(node->next);
 }
 /**********************************************************************
 Gets the first node in a hash chain. */
 UNIV_INLINE
 ha_node_t*
 ha_chain_get_first(
 /*===============*/
 				/* out: first node, NULL if none */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: fold value determining the chain */
 {
 	return((ha_node_t*)
 	       hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
 }
 /*****************************************************************
 Looks for an element in a hash table. */
 UNIV_INLINE
 ha_node_t*
 ha_search(
 /*======*/
 				/* out: pointer to the first hash table node
 				in chain having the fold number, NULL if not
 				found */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: folded value of the searched data */
 {
 	ha_node_t*	node;
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	node = ha_chain_get_first(table, fold);
 	while (node) {
 		if (node->fold == fold) {
 			return(node);
 		}
 		node = ha_chain_get_next(node);
 	}
 	return(NULL);
 }
 /*****************************************************************
 Looks for an element in a hash table. */
 UNIV_INLINE
 void*
 ha_search_and_get_data(
 /*===================*/
 				/* out: pointer to the data of the first hash
 				table node in chain having the fold number,
 				NULL if not found */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: folded value of the searched data */
 {
 	ha_node_t*	node;
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	node = ha_chain_get_first(table, fold);
 	while (node) {
 		if (node->fold == fold) {
 			return(node->data);
 		}
 		node = ha_chain_get_next(node);
 	}
 	return(NULL);
 }
 /*************************************************************
 Looks for an element when we know the pointer to the data. */
 UNIV_INLINE
 ha_node_t*
 ha_search_with_data(
 /*================*/
 				/* out: pointer to the hash table node, NULL
 				if not found in the table */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of the searched data */
 	void*		data)	/* in: pointer to the data */
 {
 	ha_node_t*	node;
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	node = ha_chain_get_first(table, fold);
 	while (node) {
 		if (node->data == data) {
 			return(node);
 		}
 		node = ha_chain_get_next(node);
 	}
 	return(NULL);
 }
 /*************************************************************
 Looks for an element when we know the pointer to the data, and deletes
 it from the hash table, if found. */
 UNIV_INLINE
 ibool
 ha_search_and_delete_if_found(
 /*==========================*/
 				/* out: TRUE if found */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold,	/* in: folded value of the searched data */
 	void*		data)	/* in: pointer to the data */
 {
 	ha_node_t*	node;
 	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
 	node = ha_search_with_data(table, fold, data);
 	if (node) {
 		ha_delete_hash_node(table, node);
 		return(TRUE);
 	}
 	return(FALSE);
 }
--- a/include/ha0storage.h
+++ b/include/ha0storage.h
@@ -0,0 +1,113 @@
 /******************************************************
 Hash storage.
 Provides a data structure that stores chunks of data in
 its own storage, avoiding duplicates.
 (c) 2007 Innobase Oy
 Created September 22, 2007 Vasil Dimov
 *******************************************************/
 #ifndef ha0storage_h
 #define ha0storage_h
 #include "univ.i"
 /* This value is used by default by ha_storage_create(). More memory
 is allocated later when/if it is needed. */
 #define HA_STORAGE_DEFAULT_HEAP_BYTES	1024
 /* This value is used by default by ha_storage_create(). It is a
 constant per ha_storage's lifetime. */
 #define HA_STORAGE_DEFAULT_HASH_CELLS	4096
 typedef struct ha_storage_struct	ha_storage_t;
 /***********************************************************************
 Creates a hash storage. If any of the parameters is 0, then a default
 value is used. */
 UNIV_INLINE
 ha_storage_t*
 ha_storage_create(
 /*==============*/
 					/* out, own: hash storage */
 	ulint	initial_heap_bytes,	/* in: initial heap's size */
 	ulint	initial_hash_cells);	/* in: initial number of cells
 					in the hash table */
 /***********************************************************************
 Copies data into the storage and returns a pointer to the copy. If the
 same data chunk is already present, then pointer to it is returned.
 Data chunks are considered to be equal if len1 == len2 and
 memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
 data_len bytes need to be allocated) and the size of storage is going to
 become more than "memlim" then "data" is not added and NULL is returned.
 To disable this behavior "memlim" can be set to 0, which stands for
 "no limit". */
 const void*
 ha_storage_put_memlim(
 /*==================*/
 					/* out: pointer to the copy */
 	ha_storage_t*	storage,	/* in/out: hash storage */
 	const void*	data,		/* in: data to store */
 	ulint		data_len,	/* in: data length */
 	ulint		memlim);	/* in: memory limit to obey */
 /***********************************************************************
 Same as ha_storage_put_memlim() but without memory limit. */
 #define ha_storage_put(storage, data, data_len)	\
 	ha_storage_put_memlim((storage), (data), (data_len), 0)
 /***********************************************************************
 Copies string into the storage and returns a pointer to the copy. If the
 same string is already present, then pointer to it is returned.
 Strings are considered to be equal if strcmp(str1, str2) == 0. */
 #define ha_storage_put_str(storage, str)	\
 	((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
 /***********************************************************************
 Copies string into the storage and returns a pointer to the copy obeying
 a memory limit. */
 #define ha_storage_put_str_memlim(storage, str, memlim)	\
 	((const char*) ha_storage_put_memlim((storage), (str),	\
 					     strlen(str) + 1, (memlim)))
 /***********************************************************************
 Empties a hash storage, freeing memory occupied by data chunks.
 This invalidates any pointers previously returned by ha_storage_put().
 The hash storage is not invalidated itself and can be used again. */
 UNIV_INLINE
 void
 ha_storage_empty(
 /*=============*/
 	ha_storage_t**	storage);	/* in/out: hash storage */
 /***********************************************************************
 Frees a hash storage and everything it contains, it cannot be used after
 this call.
 This invalidates any pointers previously returned by ha_storage_put().
 */
 UNIV_INLINE
 void
 ha_storage_free(
 /*============*/
 	ha_storage_t*	storage);	/* in/out: hash storage */
 /***********************************************************************
 Gets the size of the memory used by a storage. */
 UNIV_INLINE
 ulint
 ha_storage_get_size(
 /*================*/
 						/* out: bytes used */
 	const ha_storage_t*	storage);	/* in: hash storage */
 #ifndef UNIV_NONINL
 #include "ha0storage.ic"
 #endif
 #endif /* ha0storage_h */
--- a/include/ha0storage.ic
+++ b/include/ha0storage.ic
@@ -0,0 +1,130 @@
 /******************************************************
 Hash storage.
 Provides a data structure that stores chunks of data in
 its own storage, avoiding duplicates.
 (c) 2007 Innobase Oy
 Created September 24, 2007 Vasil Dimov
 *******************************************************/
 #include "univ.i"
 #include "ha0storage.h"
 #include "hash0hash.h"
 #include "mem0mem.h"
 struct ha_storage_struct {
 	mem_heap_t*	heap;	/* storage from which memory is
 				allocated */
 	hash_table_t*	hash;	/* hash table used to avoid
 				duplicates */
 };
 /* Objects of this type are put in the hash */
 typedef struct ha_storage_node_struct ha_storage_node_t;
 struct ha_storage_node_struct {
 	ulint			data_len;/* length of the data */
 	const void*		data;	/* pointer to data */
 	ha_storage_node_t*	next;	/* next node in hash chain */
 };
 /***********************************************************************
 Creates a hash storage. If any of the parameters is 0, then a default
 value is used. */
 UNIV_INLINE
 ha_storage_t*
 ha_storage_create(
 /*==============*/
 					/* out, own: hash storage */
 	ulint	initial_heap_bytes,	/* in: initial heap's size */
 	ulint	initial_hash_cells)	/* in: initial number of cells
 					in the hash table */
 {
 	ha_storage_t*	storage;
 	mem_heap_t*	heap;
 	if (initial_heap_bytes == 0) {
 		initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES;
 	}
 	if (initial_hash_cells == 0) {
 		initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS;
 	}
 	/* we put "storage" within "storage->heap" */
 	heap = mem_heap_create(sizeof(ha_storage_t)
 			       + initial_heap_bytes);
 	storage = (ha_storage_t*) mem_heap_alloc(heap,
 						 sizeof(ha_storage_t));
 	storage->heap = heap;
 	storage->hash = hash_create(initial_hash_cells);
 	return(storage);
 }
 /***********************************************************************
 Empties a hash storage, freeing memory occupied by data chunks.
 This invalidates any pointers previously returned by ha_storage_put().
 The hash storage is not invalidated itself and can be used again. */
 UNIV_INLINE
 void
 ha_storage_empty(
 /*=============*/
 	ha_storage_t**	storage)	/* in/out: hash storage */
 {
 	ha_storage_t	temp_storage;
 	temp_storage.heap = (*storage)->heap;
 	temp_storage.hash = (*storage)->hash;
 	hash_table_clear(temp_storage.hash);
 	mem_heap_empty(temp_storage.heap);
 	*storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap,
 						  sizeof(ha_storage_t));
 	(*storage)->heap = temp_storage.heap;
 	(*storage)->hash = temp_storage.hash;
 }
 /***********************************************************************
 Frees a hash storage and everything it contains, it cannot be used after
 this call.
 This invalidates any pointers previously returned by ha_storage_put().
 */
 UNIV_INLINE
 void
 ha_storage_free(
 /*============*/
 	ha_storage_t*	storage)	/* in/out: hash storage */
 {
 	/* order is important because the pointer storage->hash is
 	within the heap */
 	hash_table_free(storage->hash);
 	mem_heap_free(storage->heap);
 }
 /***********************************************************************
 Gets the size of the memory used by a storage. */
 UNIV_INLINE
 ulint
 ha_storage_get_size(
 /*================*/
 						/* out: bytes used */
 	const ha_storage_t*	storage)	/* in: hash storage */
 {
 	ulint	ret;
 	ret = mem_heap_get_size(storage->heap);
 	/* this assumes hash->heap and hash->heaps are NULL */
 	ret += sizeof(hash_table_t);
 	ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash);
 	return(ret);
 }
--- a/include/ha_prototypes.h
+++ b/include/ha_prototypes.h
@@ -0,0 +1,162 @@
 #ifndef HA_INNODB_PROTOTYPES_H
 #define HA_INNODB_PROTOTYPES_H
 #ifndef UNIV_HOTBACKUP
 #include "univ.i" /* ulint, uint */
 #include "m_ctype.h" /* CHARSET_INFO */
 /* Prototypes for global functions in ha_innodb.cc that are called by
 InnoDB's C-code. */
 /*************************************************************************
 Wrapper around MySQL's copy_and_convert function, see it for
 documentation. */
 UNIV_INTERN
 ulint
 innobase_convert_string(
 /*====================*/
 	void*		to,
 	ulint		to_length,
 	CHARSET_INFO*	to_cs,
 	const void*	from,
 	ulint		from_length,
 	CHARSET_INFO*	from_cs,
 	uint*		errors);
 /***********************************************************************
 Formats the raw data in "data" (in InnoDB on-disk format) that is of
 type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
 the result to "buf". The result is converted to "system_charset_info".
 Not more than "buf_size" bytes are written to "buf".
 The result is always '\0'-terminated (provided buf_size > 0) and the
 number of bytes that were written to "buf" is returned (including the
 terminating '\0'). */
 UNIV_INTERN
 ulint
 innobase_raw_format(
 /*================*/
 					/* out: number of bytes
 					that were written */
 	const char*	data,		/* in: raw data */
 	ulint		data_len,	/* in: raw data length
 					in bytes */
 	ulint		charset_coll,	/* in: charset collation */
 	char*		buf,		/* out: output buffer */
 	ulint		buf_size);	/* in: output buffer size
 					in bytes */
 /*********************************************************************
 Convert a table or index name to the MySQL system_charset_info (UTF-8)
 and quote it if needed. */
 UNIV_INTERN
 char*
 innobase_convert_name(
 /*==================*/
 				/* out: pointer to the end of buf */
 	char*		buf,	/* out: buffer for converted identifier */
 	ulint		buflen,	/* in: length of buf, in bytes */
 	const char*	id,	/* in: identifier to convert */
 	ulint		idlen,	/* in: length of id, in bytes */
 	void*		thd,	/* in: MySQL connection thread, or NULL */
 	ibool		table_id);/* in: TRUE=id is a table or database name;
 				FALSE=id is an index name */
 /**********************************************************************
 Returns true if the thread is the replication thread on the slave
 server. Used in srv_conc_enter_innodb() to determine if the thread
 should be allowed to enter InnoDB - the replication thread is treated
 differently than other threads. Also used in
 srv_conc_force_exit_innodb(). */
 UNIV_INTERN
 ibool
 thd_is_replication_slave_thread(
 /*============================*/
 			/* out: true if thd is the replication thread */
 	void*	thd);	/* in: thread handle (THD*) */
 /**********************************************************************
 Returns true if the transaction this thread is processing has edited
 non-transactional tables. Used by the deadlock detector when deciding
 which transaction to rollback in case of a deadlock - we try to avoid
 rolling back transactions that have edited non-transactional tables. */
 UNIV_INTERN
 ibool
 thd_has_edited_nontrans_tables(
 /*===========================*/
 			/* out: true if non-transactional tables have
 			been edited */
 	void*	thd);	/* in: thread handle (THD*) */
 /*****************************************************************
 Prints info of a THD object (== user session thread) to the given file. */
 UNIV_INTERN
 void
 innobase_mysql_print_thd(
 /*=====================*/
 	FILE*	f,		/* in: output stream */
 	void*	thd,		/* in: pointer to a MySQL THD object */
 	uint	max_query_len);	/* in: max query length to print, or 0 to
 				   use the default max length */
 /******************************************************************
 Converts a MySQL type to an InnoDB type. Note that this function returns
 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
 UNIV_INTERN
 ulint
 get_innobase_type_from_mysql_type(
 /*==============================*/
 					/* out: DATA_BINARY,
 					DATA_VARCHAR, ... */
 	ulint*		unsigned_flag,	/* out: DATA_UNSIGNED if an
 					'unsigned type';
 					at least ENUM and SET,
 					and unsigned integer
 					types are 'unsigned types' */
 	const void*	field)		/* in: MySQL Field */
 	__attribute__((nonnull));
 /*****************************************************************
 If you want to print a thd that is not associated with the current thread,
 you must call this function before reserving the InnoDB kernel_mutex, to
 protect MySQL from setting thd->query NULL. If you print a thd of the current
 thread, we know that MySQL cannot modify thd->query, and it is not necessary
 to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
 the kernel_mutex. */
 UNIV_INTERN
 void
 innobase_mysql_prepare_print_arbitrary_thd(void);
 /*============================================*/
 /*****************************************************************
 Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
 In the InnoDB latching order, the mutex sits right above the
 kernel_mutex.  In debug builds, we assert that the kernel_mutex is
 released before this function is invoked. */
 UNIV_INTERN
 void
 innobase_mysql_end_print_arbitrary_thd(void);
 /*========================================*/
 /**********************************************************************
 Get the variable length bounds of the given character set. */
 UNIV_INTERN
 void
 innobase_get_cset_width(
 /*====================*/
 	ulint	cset,		/* in: MySQL charset-collation code */
 	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
 	ulint*	mbmaxlen);	/* out: maximum length of a char (in bytes) */
 /**********************************************************************
 Compares NUL-terminated UTF-8 strings case insensitively. */
 UNIV_INTERN
 int
 innobase_strcasecmp(
 /*================*/
 				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
 	const char*	a,	/* in: first string to compare */
 	const char*	b);	/* in: second string to compare */
 #endif
 #endif
--- a/include/handler0alter.h
+++ b/include/handler0alter.h
@@ -0,0 +1,25 @@
 /******************************************************
 Smart ALTER TABLE
 (c) 2005-2007 Innobase Oy
 *******************************************************/
 /*****************************************************************
 Copies an InnoDB record to table->record[0]. */
 UNIV_INTERN
 void
 innobase_rec_to_mysql(
 /*==================*/
 	TABLE*			table,		/* in/out: MySQL table */
 	const rec_t*		rec,		/* in: record */
 	const dict_index_t*	index,		/* in: index */
 	const ulint*		offsets);	/* in: rec_get_offsets(
 						rec, index, ...) */
 /*****************************************************************
 Resets table->record[0]. */
 UNIV_INTERN
 void
 innobase_rec_reset(
 /*===============*/
 	TABLE*			table);		/* in/out: MySQL table */
--- a/include/hash0hash.h
+++ b/include/hash0hash.h
@@ -0,0 +1,390 @@
 /******************************************************
 The simple hash table utility
 (c) 1997 Innobase Oy
 Created 5/20/1997 Heikki Tuuri
 *******************************************************/
 #ifndef hash0hash_h
 #define hash0hash_h
 #include "univ.i"
 #include "mem0mem.h"
 #include "sync0sync.h"
 typedef struct hash_table_struct hash_table_t;
 typedef struct hash_cell_struct hash_cell_t;
 typedef void*	hash_node_t;
 /* Fix Bug #13859: symbol collision between imap/mysql */
 #define hash_create hash0_create
 /*****************************************************************
 Creates a hash table with >= n array cells. The actual number
 of cells is chosen to be a prime number slightly bigger than n. */
 UNIV_INTERN
 hash_table_t*
 hash_create(
 /*========*/
 			/* out, own: created table */
 	ulint	n);	/* in: number of array cells */
 /*****************************************************************
 Creates a mutex array to protect a hash table. */
 UNIV_INTERN
 void
 hash_create_mutexes_func(
 /*=====================*/
 	hash_table_t*	table,		/* in: hash table */
 #ifdef UNIV_SYNC_DEBUG
 	ulint		sync_level,	/* in: latching order level of the
 					mutexes: used in the debug version */
 #endif /* UNIV_SYNC_DEBUG */
 	ulint		n_mutexes);	/* in: number of mutexes */
 #ifdef UNIV_SYNC_DEBUG
 # define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n)
 #else /* UNIV_SYNC_DEBUG */
 # define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n)
 #endif /* UNIV_SYNC_DEBUG */
 /*****************************************************************
 Frees a hash table. */
 UNIV_INTERN
 void
 hash_table_free(
 /*============*/
 	hash_table_t*	table);	/* in, own: hash table */
 /******************************************************************
 Calculates the hash value from a folded value. */
 UNIV_INLINE
 ulint
 hash_calc_hash(
 /*===========*/
 				/* out: hashed value */
 	ulint		fold,	/* in: folded value */
 	hash_table_t*	table);	/* in: hash table */
 /************************************************************************
 Assert that the mutex for the table in a hash operation is owned. */
 #ifdef UNIV_SYNC_DEBUG
 # define HASH_ASSERT_OWNED(TABLE, FOLD) \
 ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
 #else
 # define HASH_ASSERT_OWNED(TABLE, FOLD)
 #endif
 /***********************************************************************
 Inserts a struct to a hash table. */
 #define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
 do {\
 	hash_cell_t*	cell3333;\
 	TYPE*		struct3333;\
 \
 	HASH_ASSERT_OWNED(TABLE, FOLD)\
 \
 	(DATA)->NAME = NULL;\
 \
 	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
 \
 	if (cell3333->node == NULL) {\
 		cell3333->node = DATA;\
 	} else {\
 		struct3333 = cell3333->node;\
 \
 		while (struct3333->NAME != NULL) {\
 \
 			struct3333 = struct3333->NAME;\
 		}\
 \
 		struct3333->NAME = DATA;\
 	}\
 } while (0)
 #ifdef UNIV_HASH_DEBUG
 # define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
 # define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1
 #else
 # define HASH_ASSERT_VALID(DATA) do {} while (0)
 # define HASH_INVALIDATE(DATA, NAME) do {} while (0)
 #endif
 /***********************************************************************
 Deletes a struct from a hash table. */
 #define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
 do {\
 	hash_cell_t*	cell3333;\
 	TYPE*		struct3333;\
 \
 	HASH_ASSERT_OWNED(TABLE, FOLD)\
 \
 	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
 \
 	if (cell3333->node == DATA) {\
 		HASH_ASSERT_VALID(DATA->NAME);\
 		cell3333->node = DATA->NAME;\
 	} else {\
 		struct3333 = cell3333->node;\
 \
 		while (struct3333->NAME != DATA) {\
 \
 			struct3333 = struct3333->NAME;\
 			ut_a(struct3333);\
 		}\
 \
 		struct3333->NAME = DATA->NAME;\
 	}\
 	HASH_INVALIDATE(DATA, NAME);\
 } while (0)
 /***********************************************************************
 Gets the first struct in a hash chain, NULL if none. */
 #define HASH_GET_FIRST(TABLE, HASH_VAL)\
 	(hash_get_nth_cell(TABLE, HASH_VAL)->node)
 /***********************************************************************
 Gets the next struct in a hash chain, NULL if none. */
 #define HASH_GET_NEXT(NAME, DATA)	((DATA)->NAME)
 /************************************************************************
 Looks for a struct in a hash table. */
 #define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, TEST)\
 {\
 \
 	HASH_ASSERT_OWNED(TABLE, FOLD)\
 \
 	(DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
 	HASH_ASSERT_VALID(DATA);\
 \
 	while ((DATA) != NULL) {\
 		if (TEST) {\
 			break;\
 		} else {\
 			HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\
 			(DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\
 		}\
 	}\
 }
 /****************************************************************
 Gets the nth cell in a hash table. */
 UNIV_INLINE
 hash_cell_t*
 hash_get_nth_cell(
 /*==============*/
 				/* out: pointer to cell */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		n);	/* in: cell index */
 /*****************************************************************
 Clears a hash table so that all the cells become empty. */
 UNIV_INLINE
 void
 hash_table_clear(
 /*=============*/
 	hash_table_t*	table);	/* in/out: hash table */
 /*****************************************************************
 Returns the number of cells in a hash table. */
 UNIV_INLINE
 ulint
 hash_get_n_cells(
 /*=============*/
 				/* out: number of cells */
 	hash_table_t*	table);	/* in: table */
 /***********************************************************************
 Deletes a struct which is stored in the heap of the hash table, and compacts
 the heap. The fold value must be stored in the struct NODE in a field named
 'fold'. */
 #define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
 do {\
 	TYPE*		node111;\
 	TYPE*		top_node111;\
 	hash_cell_t*	cell111;\
 	ulint		fold111;\
 \
 	fold111 = (NODE)->fold;\
 \
 	HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
 \
 	top_node111 = (TYPE*)mem_heap_get_top(\
 				hash_get_heap(TABLE, fold111),\
 							sizeof(TYPE));\
 \
 	/* If the node to remove is not the top node in the heap, compact the\
 	heap of nodes by moving the top node in the place of NODE. */\
 \
 	if (NODE != top_node111) {\
 \
 		/* Copy the top node in place of NODE */\
 \
 		*(NODE) = *top_node111;\
 \
 		cell111 = hash_get_nth_cell(TABLE,\
 				hash_calc_hash(top_node111->fold, TABLE));\
 \
 		/* Look for the pointer to the top node, to update it */\
 \
 		if (cell111->node == top_node111) {\
 			/* The top node is the first in the chain */\
 \
 			cell111->node = NODE;\
 		} else {\
 			/* We have to look for the predecessor of the top\
 			node */\
 			node111 = cell111->node;\
 \
 			while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
 \
 				node111 = HASH_GET_NEXT(NAME, node111);\
 			}\
 \
 			/* Now we have the predecessor node */\
 \
 			node111->NAME = NODE;\
 		}\
 	}\
 \
 	/* Free the space occupied by the top node */\
 \
 	mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
 } while (0)
 /********************************************************************
 Move all hash table entries from OLD_TABLE to NEW_TABLE.*/
 #define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
 do {\
 	ulint		i2222;\
 	ulint		cell_count2222;\
 \
 	cell_count2222 = hash_get_n_cells(OLD_TABLE);\
 \
 	for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
 		NODE_TYPE*	node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
 \
 		while (node2222) {\
 			NODE_TYPE*	next2222 = node2222->PTR_NAME;\
 			ulint		fold2222 = FOLD_FUNC(node2222);\
 \
 			HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
 				fold2222, node2222);\
 \
 			node2222 = next2222;\
 		}\
 	}\
 } while (0)
 /****************************************************************
 Gets the mutex index for a fold value in a hash table. */
 UNIV_INLINE
 ulint
 hash_get_mutex_no(
 /*==============*/
 				/* out: mutex number */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold);	/* in: fold */
 /****************************************************************
 Gets the nth heap in a hash table. */
 UNIV_INLINE
 mem_heap_t*
 hash_get_nth_heap(
 /*==============*/
 				/* out: mem heap */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		i);	/* in: index of the heap */
 /****************************************************************
 Gets the heap for a fold value in a hash table. */
 UNIV_INLINE
 mem_heap_t*
 hash_get_heap(
 /*==========*/
 				/* out: mem heap */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold);	/* in: fold */
 /****************************************************************
 Gets the nth mutex in a hash table. */
 UNIV_INLINE
 mutex_t*
 hash_get_nth_mutex(
 /*===============*/
 				/* out: mutex */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		i);	/* in: index of the mutex */
 /****************************************************************
 Gets the mutex for a fold value in a hash table. */
 UNIV_INLINE
 mutex_t*
 hash_get_mutex(
 /*===========*/
 				/* out: mutex */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold);	/* in: fold */
 /****************************************************************
 Reserves the mutex for a fold value in a hash table. */
 UNIV_INTERN
 void
 hash_mutex_enter(
 /*=============*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold);	/* in: fold */
 /****************************************************************
 Releases the mutex for a fold value in a hash table. */
 UNIV_INTERN
 void
 hash_mutex_exit(
 /*============*/
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold);	/* in: fold */
 /****************************************************************
 Reserves all the mutexes of a hash table, in an ascending order. */
 UNIV_INTERN
 void
 hash_mutex_enter_all(
 /*=================*/
 	hash_table_t*	table);	/* in: hash table */
 /****************************************************************
 Releases all the mutexes of a hash table. */
 UNIV_INTERN
 void
 hash_mutex_exit_all(
 /*================*/
 	hash_table_t*	table);	/* in: hash table */
 struct hash_cell_struct{
 	void*	node;	/* hash chain node, NULL if none */
 };
 /* The hash table structure */
 struct hash_table_struct {
 #ifdef UNIV_DEBUG
 	ibool		adaptive;/* TRUE if this is the hash table of the
 				adaptive hash index */
 #endif /* UNIV_DEBUG */
 	ulint		n_cells;/* number of cells in the hash table */
 	hash_cell_t*	array;	/* pointer to cell array */
 	ulint		n_mutexes;/* if mutexes != NULL, then the number of
 				mutexes, must be a power of 2 */
 	mutex_t*	mutexes;/* NULL, or an array of mutexes used to
 				protect segments of the hash table */
 	mem_heap_t**	heaps;	/* if this is non-NULL, hash chain nodes for
 				external chaining can be allocated from these
 				memory heaps; there are then n_mutexes many of
 				these heaps */
 	mem_heap_t*	heap;
 	ulint		magic_n;
 };
 #define HASH_TABLE_MAGIC_N	76561114
 #ifndef UNIV_NONINL
 #include "hash0hash.ic"
 #endif
 #endif
--- a/include/hash0hash.ic
+++ b/include/hash0hash.ic
@@ -0,0 +1,143 @@
 /******************************************************
 The simple hash table utility
 (c) 1997 Innobase Oy
 Created 5/20/1997 Heikki Tuuri
 *******************************************************/
 #include "ut0rnd.h"
 /****************************************************************
 Gets the nth cell in a hash table. */
 UNIV_INLINE
 hash_cell_t*
 hash_get_nth_cell(
 /*==============*/
 				/* out: pointer to cell */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		n)	/* in: cell index */
 {
 	ut_ad(n < table->n_cells);
 	return(table->array + n);
 }
 /*****************************************************************
 Clears a hash table so that all the cells become empty. */
 UNIV_INLINE
 void
 hash_table_clear(
 /*=============*/
 	hash_table_t*	table)	/* in/out: hash table */
 {
 	memset(table->array, 0x0,
 	       table->n_cells * sizeof(*table->array));
 }
 /*****************************************************************
 Returns the number of cells in a hash table. */
 UNIV_INLINE
 ulint
 hash_get_n_cells(
 /*=============*/
 				/* out: number of cells */
 	hash_table_t*	table)	/* in: table */
 {
 	return(table->n_cells);
 }
 /******************************************************************
 Calculates the hash value from a folded value. */
 UNIV_INLINE
 ulint
 hash_calc_hash(
 /*===========*/
 				/* out: hashed value */
 	ulint		fold,	/* in: folded value */
 	hash_table_t*	table)	/* in: hash table */
 {
 	return(ut_hash_ulint(fold, table->n_cells));
 }
 /****************************************************************
 Gets the mutex index for a fold value in a hash table. */
 UNIV_INLINE
 ulint
 hash_get_mutex_no(
 /*==============*/
 				/* out: mutex number */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: fold */
 {
 	ut_ad(ut_is_2pow(table->n_mutexes));
 	return(ut_2pow_remainder(fold, table->n_mutexes));
 }
 /****************************************************************
 Gets the nth heap in a hash table. */
 UNIV_INLINE
 mem_heap_t*
 hash_get_nth_heap(
 /*==============*/
 				/* out: mem heap */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		i)	/* in: index of the heap */
 {
 	ut_ad(i < table->n_mutexes);
 	return(table->heaps[i]);
 }
 /****************************************************************
 Gets the heap for a fold value in a hash table. */
 UNIV_INLINE
 mem_heap_t*
 hash_get_heap(
 /*==========*/
 				/* out: mem heap */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: fold */
 {
 	ulint	i;
 	if (table->heap) {
 		return(table->heap);
 	}
 	i = hash_get_mutex_no(table, fold);
 	return(hash_get_nth_heap(table, i));
 }
 /****************************************************************
 Gets the nth mutex in a hash table. */
 UNIV_INLINE
 mutex_t*
 hash_get_nth_mutex(
 /*===============*/
 				/* out: mutex */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		i)	/* in: index of the mutex */
 {
 	ut_ad(i < table->n_mutexes);
 	return(table->mutexes + i);
 }
 /****************************************************************
 Gets the mutex for a fold value in a hash table. */
 UNIV_INLINE
 mutex_t*
 hash_get_mutex(
 /*===========*/
 				/* out: mutex */
 	hash_table_t*	table,	/* in: hash table */
 	ulint		fold)	/* in: fold */
 {
 	ulint	i;
 	i = hash_get_mutex_no(table, fold);
 	return(hash_get_nth_mutex(table, i));
 }
--- a/include/ibuf0ibuf.h
+++ b/include/ibuf0ibuf.h
@@ -0,0 +1,316 @@
 /******************************************************
 Insert buffer
 (c) 1997 Innobase Oy
 Created 7/19/1997 Heikki Tuuri
 *******************************************************/
 #ifndef ibuf0ibuf_h
 #define ibuf0ibuf_h
 #include "univ.i"
 #include "dict0mem.h"
 #include "dict0dict.h"
 #include "mtr0mtr.h"
 #include "que0types.h"
 #include "ibuf0types.h"
 #include "fsp0fsp.h"
 extern ibuf_t*	ibuf;
 /**********************************************************************
 Creates the insert buffer data struct for a single tablespace. Reads the
 root page of the insert buffer tree in the tablespace. This function can
 be called only after the dictionary system has been initialized, as this
 creates also the insert buffer table and index for this tablespace. */
 UNIV_INTERN
 ibuf_data_t*
 ibuf_data_init_for_space(
 /*=====================*/
 			/* out, own: ibuf data struct, linked to the list
 			in ibuf control structure. */
 	ulint	space);	/* in: space id */
 /**********************************************************************
 Creates the insert buffer data structure at a database startup and
 initializes the data structures for the insert buffer of each tablespace. */
 UNIV_INTERN
 void
 ibuf_init_at_db_start(void);
 /*=======================*/
 /*************************************************************************
 Reads the biggest tablespace id from the high end of the insert buffer
 tree and updates the counter in fil_system. */
 UNIV_INTERN
 void
 ibuf_update_max_tablespace_id(void);
 /*===============================*/
 /*************************************************************************
 Initializes an ibuf bitmap page. */
 UNIV_INTERN
 void
 ibuf_bitmap_page_init(
 /*==================*/
 	buf_block_t*	block,	/* in: bitmap page */
 	mtr_t*		mtr);	/* in: mtr */
 /****************************************************************************
 Resets the free bits of the page in the ibuf bitmap. This is done in a
 separate mini-transaction, hence this operation does not restrict further
 work to only ibuf bitmap operations, which would result if the latch to the
 bitmap page were kept. */
 UNIV_INTERN
 void
 ibuf_reset_free_bits(
 /*=================*/
 	buf_block_t*	block);	/* in: index page; free bits are set to 0
 				if the index is a non-clustered
 				non-unique, and page level is 0 */
 /****************************************************************************
 Updates the free bits of an uncompressed page in the ibuf bitmap if
 there is not enough free on the page any more. This is done in a
 separate mini-transaction, hence this operation does not restrict
 further work to only ibuf bitmap operations, which would result if the
 latch to the bitmap page were kept. */
 UNIV_INLINE
 void
 ibuf_update_free_bits_if_full(
 /*==========================*/
 	buf_block_t*	block,	/* in: index page to which we have added new
 				records; the free bits are updated if the
 				index is non-clustered and non-unique and
 				the page level is 0, and the page becomes
 				fuller */
 	ulint		max_ins_size,/* in: value of maximum insert size with
 				reorganize before the latest operation
 				performed to the page */
 	ulint		increase);/* in: upper limit for the additional space
 				used in the latest operation, if known, or
 				ULINT_UNDEFINED */
 /**************************************************************************
 Updates the free bits for an uncompressed page to reflect the present state.
 Does this in the mtr given, which means that the latching order rules virtually
 prevent any further operations for this OS thread until mtr is committed. */
 UNIV_INTERN
 void
 ibuf_update_free_bits_low(
 /*======================*/
 	const buf_block_t*	block,		/* in: index page */
 	ulint			max_ins_size,	/* in: value of
 						maximum insert size
 						with reorganize before
 						the latest operation
 						performed to the page */
 	mtr_t*			mtr);		/* in/out: mtr */
 /**************************************************************************
 Updates the free bits for a compressed page to reflect the present state.
 Does this in the mtr given, which means that the latching order rules virtually
 prevent any further operations for this OS thread until mtr is committed. */
 UNIV_INTERN
 void
 ibuf_update_free_bits_zip(
 /*======================*/
 	buf_block_t*	block,	/* in/out: index page */
 	mtr_t*		mtr);	/* in/out: mtr */
 /**************************************************************************
 Updates the free bits for the two pages to reflect the present state. Does
 this in the mtr given, which means that the latching order rules virtually
 prevent any further operations until mtr is committed. */
 UNIV_INTERN
 void
 ibuf_update_free_bits_for_two_pages_low(
 /*====================================*/
 	ulint		zip_size,/* in: compressed page size in bytes;
 				0 for uncompressed pages */
 	buf_block_t*	block1,	/* in: index page */
 	buf_block_t*	block2,	/* in: index page */
 	mtr_t*		mtr);	/* in: mtr */
 /**************************************************************************
 A basic partial test if an insert to the insert buffer could be possible and
 recommended. */
 UNIV_INLINE
 ibool
 ibuf_should_try(
 /*============*/
 	dict_index_t*	index,			/* in: index where to insert */
 	ulint		ignore_sec_unique);	/* in: if != 0, we should
 						ignore UNIQUE constraint on
 						a secondary index when we
 						decide */
 /**********************************************************************
 Returns TRUE if the current OS thread is performing an insert buffer
 routine. */
 UNIV_INTERN
 ibool
 ibuf_inside(void);
 /*=============*/
 		/* out: TRUE if inside an insert buffer routine: for instance,
 		a read-ahead of non-ibuf pages is then forbidden */
 /***************************************************************************
 Checks if a page address is an ibuf bitmap page (level 3 page) address. */
 UNIV_INLINE
 ibool
 ibuf_bitmap_page(
 /*=============*/
 			/* out: TRUE if a bitmap page */
 	ulint	zip_size,/* in: compressed page size in bytes;
 			0 for uncompressed pages */
 	ulint	page_no);/* in: page number */
 /***************************************************************************
 Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
 UNIV_INTERN
 ibool
 ibuf_page(
 /*======*/
 			/* out: TRUE if level 2 or level 3 page */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint	page_no);/* in: page number */
 /***************************************************************************
 Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
 UNIV_INTERN
 ibool
 ibuf_page_low(
 /*==========*/
 			/* out: TRUE if level 2 or level 3 page */
 	ulint	space,	/* in: space id */
 	ulint	zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint	page_no,/* in: page number */
 	mtr_t*	mtr);	/* in: mtr which will contain an x-latch to the
 			bitmap page if the page is not one of the fixed
 			address ibuf pages */
 /***************************************************************************
 Frees excess pages from the ibuf free list. This function is called when an OS
 thread calls fsp services to allocate a new file segment, or a new page to a
 file segment, and the thread did not own the fsp latch before this call. */
 UNIV_INTERN
 void
 ibuf_free_excess_pages(
 /*===================*/
 	ulint	space);		/* in: space id */
 /*************************************************************************
 Makes an index insert to the insert buffer, instead of directly to the disk
 page, if this is possible. Does not do insert if the index is clustered
 or unique. */
 UNIV_INTERN
 ibool
 ibuf_insert(
 /*========*/
 				/* out: TRUE if success */
 	const dtuple_t*	entry,	/* in: index entry to insert */
 	dict_index_t*	index,	/* in: index where to insert */
 	ulint		space,	/* in: space id where to insert */
 	ulint		zip_size,/* in: compressed page size in bytes, or 0 */
 	ulint		page_no,/* in: page number where to insert */
 	que_thr_t*	thr);	/* in: query thread */
 /*************************************************************************
 When an index page is read from a disk to the buffer pool, this function
 inserts to the page the possible index entries buffered in the insert buffer.
 The entries are deleted from the insert buffer. If the page is not read, but
 created in the buffer pool, this function deletes its buffered entries from
 the insert buffer; there can exist entries for such a page if the page
 belonged to an index which subsequently was dropped. */
 UNIV_INTERN
 void
 ibuf_merge_or_delete_for_page(
 /*==========================*/
 	buf_block_t*	block,	/* in: if page has been read from
 				disk, pointer to the page x-latched,
 				else NULL */
 	ulint		space,	/* in: space id of the index page */
 	ulint		page_no,/* in: page number of the index page */
 	ulint		zip_size,/* in: compressed page size in bytes,
 				or 0 */
 	ibool		update_ibuf_bitmap);/* in: normally this is set
 				to TRUE, but if we have deleted or are
 				deleting the tablespace, then we
 				naturally do not want to update a
 				non-existent bitmap page */
 /*************************************************************************
 Deletes all entries in the insert buffer for a given space id. This is used
 in DISCARD TABLESPACE and IMPORT TABLESPACE.
 NOTE: this does not update the page free bitmaps in the space. The space will
 become CORRUPT when you call this function! */
 UNIV_INTERN
 void
 ibuf_delete_for_discarded_space(
 /*============================*/
 	ulint	space);	/* in: space id */
 /*************************************************************************
 Contracts insert buffer trees by reading pages to the buffer pool. */
 UNIV_INTERN
 ulint
 ibuf_contract(
 /*==========*/
 			/* out: a lower limit for the combined size in bytes
 			of entries which will be merged from ibuf trees to the
 			pages read, 0 if ibuf is empty */
 	ibool	sync);	/* in: TRUE if the caller wants to wait for the
 			issued read with the highest tablespace address
 			to complete */
 /*************************************************************************
 Contracts insert buffer trees by reading pages to the buffer pool. */
 UNIV_INTERN
 ulint
 ibuf_contract_for_n_pages(
 /*======================*/
 			/* out: a lower limit for the combined size in bytes
 			of entries which will be merged from ibuf trees to the
 			pages read, 0 if ibuf is empty */
 	ibool	sync,	/* in: TRUE if the caller wants to wait for the
 			issued read with the highest tablespace address
 			to complete */
 	ulint	n_pages);/* in: try to read at least this many pages to
 			the buffer pool and merge the ibuf contents to
 			them */
 /*************************************************************************
 Parses a redo log record of an ibuf bitmap page init. */
 UNIV_INTERN
 byte*
 ibuf_parse_bitmap_init(
 /*===================*/
 				/* out: end of log record or NULL */
 	byte*		ptr,	/* in: buffer */
 	byte*		end_ptr,/* in: buffer end */
 	buf_block_t*	block,	/* in: block or NULL */
 	mtr_t*		mtr);	/* in: mtr or NULL */
 #ifdef UNIV_IBUF_COUNT_DEBUG
 /**********************************************************************
 Gets the ibuf count for a given page. */
 UNIV_INTERN
 ulint
 ibuf_count_get(
 /*===========*/
 			/* out: number of entries in the insert buffer
 			currently buffered for this page */
 	ulint	space,	/* in: space id */
 	ulint	page_no);/* in: page number */
 #endif
 /**********************************************************************
 Looks if the insert buffer is empty. */
 UNIV_INTERN
 ibool
 ibuf_is_empty(void);
 /*===============*/
 			/* out: TRUE if empty */
 /**********************************************************************
 Prints info of ibuf. */
 UNIV_INTERN
 void
 ibuf_print(
 /*=======*/
 	FILE*	file);	/* in: file where to print */
 #define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
 #define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
 /* The ibuf header page currently contains only the file segment header
 for the file segment from which the pages for the ibuf tree are allocated */
 #define IBUF_HEADER		PAGE_DATA
 #define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
 #ifndef UNIV_NONINL
 #include "ibuf0ibuf.ic"
 #endif
 #endif
--- a/include/ibuf0ibuf.ic
+++ b/include/ibuf0ibuf.ic
@@ -0,0 +1,308 @@
 /******************************************************
 Insert buffer
 (c) 1997 Innobase Oy
 Created 7/19/1997 Heikki Tuuri
 *******************************************************/
 #include "buf0lru.h"
 #include "page0page.h"
 #include "page0zip.h"
 extern ulint	ibuf_flush_count;
 /* If this number is n, an index page must contain at least the page size
 per n bytes of free space for ibuf to try to buffer inserts to this page.
 If there is this much of free space, the corresponding bits are set in the
 ibuf bitmap. */
 #define IBUF_PAGE_SIZE_PER_FREE_SPACE	32
 /* Insert buffer data struct for a single tablespace */
 struct ibuf_data_struct{
 	ulint		space;	/* space id */
 	ulint		seg_size;/* allocated pages if the file segment
 				containing ibuf header and tree */
 	ulint		size;	/* size of the insert buffer tree in pages */
 	ibool		empty;	/* after an insert to the ibuf tree is
 				performed, this is set to FALSE, and if a
 				contract operation finds the tree empty, this
 				is set to TRUE */
 	ulint		free_list_len;
 				/* length of the free list */
 	ulint		height;	/* tree height */
 	dict_index_t*	index;	/* insert buffer index */
 	UT_LIST_NODE_T(ibuf_data_t) data_list;
 				/* list of ibuf data structs */
 	ulint		n_inserts;/* number of inserts made to the insert
 				buffer */
 	ulint		n_merges;/* number of pages merged */
 	ulint		n_merged_recs;/* number of records merged */
 };
 struct ibuf_struct{
 	ulint		size;		/* current size of the ibuf index
 					trees in pages */
 	ulint		max_size;	/* recommended maximum size in pages
 					for the ibuf index tree */
 	UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
 					/* list of ibuf data structs for
 					each tablespace */
 };
 /****************************************************************************
 Sets the free bit of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only
 ibuf bitmap operations, which would result if the latch to the bitmap page
 were kept. */
 UNIV_INTERN
 void
 ibuf_set_free_bits_func(
 /*====================*/
 	buf_block_t*	block,	/* in: index page of a non-clustered index;
 				free bit is reset if page level is 0 */
 #ifdef UNIV_IBUF_DEBUG
 	ulint		max_val,/* in: ULINT_UNDEFINED or a maximum
 				value which the bits must have before
 				setting; this is for debugging */
 #endif /* UNIV_IBUF_DEBUG */
 	ulint		val);	/* in: value to set: < 4 */
 #ifdef UNIV_IBUF_DEBUG
 # define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
 #else /* UNIV_IBUF_DEBUG */
 # define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
 #endif /* UNIV_IBUF_DEBUG */
 /**************************************************************************
 A basic partial test if an insert to the insert buffer could be possible and
 recommended. */
 UNIV_INLINE
 ibool
 ibuf_should_try(
 /*============*/
 	dict_index_t*	index,			/* in: index where to insert */
 	ulint		ignore_sec_unique)	/* in: if != 0, we should
 						ignore UNIQUE constraint on
 						a secondary index when we
 						decide */
 {
 	if (!dict_index_is_clust(index)
 	    && (ignore_sec_unique || !dict_index_is_unique(index))) {
 		ibuf_flush_count++;
 		if (ibuf_flush_count % 8 == 0) {
 			buf_LRU_try_free_flushed_blocks();
 		}
 		return(TRUE);
 	}
 	return(FALSE);
 }
 /***************************************************************************
 Checks if a page address is an ibuf bitmap page address. */
 UNIV_INLINE
 ibool
 ibuf_bitmap_page(
 /*=============*/
 			/* out: TRUE if a bitmap page */
 	ulint	zip_size,/* in: compressed page size in bytes;
 			0 for uncompressed pages */
 	ulint	page_no)/* in: page number */
 {
 	ut_ad(ut_is_2pow(zip_size));
 	if (!zip_size) {
 		return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
 				     == FSP_IBUF_BITMAP_OFFSET));
 	}
 	return(UNIV_UNLIKELY((page_no & (zip_size - 1))
 			     == FSP_IBUF_BITMAP_OFFSET));
 }
 /*************************************************************************
 Translates the free space on a page to a value in the ibuf bitmap.*/
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_bits(
 /*===========================*/
 				/* out: value for ibuf bitmap bits */
 	ulint	zip_size,	/* in: compressed page size in bytes;
 				0 for uncompressed pages */
 	ulint	max_ins_size)	/* in: maximum insert size after reorganize
 				for the page */
 {
 	ulint	n;
 	ut_ad(ut_is_2pow(zip_size));
 	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	ut_ad(zip_size <= UNIV_PAGE_SIZE);
 	if (zip_size) {
 		n = max_ins_size
 			/ (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	} else {
 		n = max_ins_size
 			/ (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	}
 	if (n == 3) {
 		n = 2;
 	}
 	if (n > 3) {
 		n = 3;
 	}
 	return(n);
 }
 /*************************************************************************
 Translates the ibuf free bits to the free space on a page in bytes. */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_from_bits(
 /*================================*/
 			/* out: maximum insert size after reorganize for the
 			page */
 	ulint	zip_size,/* in: compressed page size in bytes;
 			0 for uncompressed pages */
 	ulint	bits)	/* in: value for ibuf bitmap bits */
 {
 	ut_ad(bits < 4);
 	ut_ad(ut_is_2pow(zip_size));
 	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	ut_ad(zip_size <= UNIV_PAGE_SIZE);
 	if (zip_size) {
 		if (bits == 3) {
 			return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 		}
 		return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	}
 	if (bits == 3) {
 		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	}
 	return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
 }
 /*************************************************************************
 Translates the free space on a compressed page to a value in the ibuf bitmap.*/
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_zip(
 /*==========================*/
 					/* out: value for ibuf bitmap bits */
 	ulint			zip_size,
 					/* in: compressed page size in bytes */
 	const buf_block_t*	block)	/* in: buffer block */
 {
 	ulint			max_ins_size;
 	const page_zip_des_t*	page_zip;
 	lint			zip_max_ins;
 	ut_ad(zip_size == buf_block_get_zip_size(block));
 	ut_ad(zip_size);
 	max_ins_size = page_get_max_insert_size_after_reorganize(
 		buf_block_get_frame(block), 1);
 	page_zip = buf_block_get_page_zip(block);
 	zip_max_ins = page_zip_max_ins_size(page_zip,
 					    FALSE/* not clustered */);
 	if (UNIV_UNLIKELY(zip_max_ins < 0)) {
 		return(0);
 	} else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) {
 		max_ins_size = (ulint) zip_max_ins;
 	}
 	return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
 }
 /*************************************************************************
 Translates the free space on a page to a value in the ibuf bitmap.*/
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free(
 /*======================*/
 					/* out: value for ibuf bitmap bits */
 	ulint			zip_size,/* in: compressed page size in bytes;
 					0 for uncompressed pages */
 	const buf_block_t*	block)	/* in: buffer block */
 {
 	ut_ad(zip_size == buf_block_get_zip_size(block));
 	if (!zip_size) {
 		ulint	max_ins_size;
 		max_ins_size = page_get_max_insert_size_after_reorganize(
 			buf_block_get_frame(block), 1);
 		return(ibuf_index_page_calc_free_bits(0, max_ins_size));
 	} else {
 		return(ibuf_index_page_calc_free_zip(zip_size, block));
 	}
 }
 /****************************************************************************
 Updates the free bits of an uncompressed page in the ibuf bitmap if
 there is not enough free on the page any more. This is done in a
 separate mini-transaction, hence this operation does not restrict
 further work to only ibuf bitmap operations, which would result if the
 latch to the bitmap page were kept. */
 UNIV_INLINE
 void
 ibuf_update_free_bits_if_full(
 /*==========================*/
 	buf_block_t*	block,	/* in: index page to which we have added new
 				records; the free bits are updated if the
 				index is non-clustered and non-unique and
 				the page level is 0, and the page becomes
 				fuller */
 	ulint		max_ins_size,/* in: value of maximum insert size with
 				reorganize before the latest operation
 				performed to the page */
 	ulint		increase)/* in: upper limit for the additional space
 				used in the latest operation, if known, or
 				ULINT_UNDEFINED */
 {
 	ulint	before;
 	ulint	after;
 	ut_ad(!buf_block_get_page_zip(block));
 	before = ibuf_index_page_calc_free_bits(0, max_ins_size);
 	if (max_ins_size >= increase) {
 #if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
 # error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
 #endif
 		after = ibuf_index_page_calc_free_bits(0, max_ins_size
 						       - increase);
 #ifdef UNIV_IBUF_DEBUG
 		ut_a(after <= ibuf_index_page_calc_free(0, block));
 #endif
 	} else {
 		after = ibuf_index_page_calc_free(0, block);
 	}
 	if (after == 0) {
 		/* We move the page to the front of the buffer pool LRU list:
 		the purpose of this is to prevent those pages to which we
 		cannot make inserts using the insert buffer from slipping
 		out of the buffer pool */
 		buf_page_make_young(&block->page);
 	}
 	if (before > after) {
 		ibuf_set_free_bits(block, after, before);
 	}
 }
--- a/Show More
+++ b/Show More