1
0
mirror of https://github.com/MariaDB/server.git synced 2025-11-27 05:41:41 +03:00

Initial import of InnoDB-plugin 1.0.1 source tree

This commit is contained in:
Vadim Tkachenko
2008-11-30 22:10:29 -08:00
commit d6c7789c34
336 changed files with 203154 additions and 0 deletions

64
CMakeLists.txt Normal file
View File

@@ -0,0 +1,64 @@
# Copyright (C) 2006 MySQL AB
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB)
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
${CMAKE_SOURCE_DIR}/storage/innobase/include
${CMAKE_SOURCE_DIR}/storage/innobase/handler
${CMAKE_SOURCE_DIR}/sql
${CMAKE_SOURCE_DIR}/regex
${CMAKE_SOURCE_DIR}/extra/yassl/include)
SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
data/data0data.c data/data0type.c
dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
dyn/dyn0dyn.c
eval/eval0eval.c eval/eval0proc.c
fil/fil0fil.c
fsp/fsp0fsp.c
fut/fut0fut.c fut/fut0lst.c
ha/ha0ha.c ha/hash0hash.c ha/ha0storage.c
ibuf/ibuf0ibuf.c
pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
lock/lock0lock.c lock/lock0iter.c
log/log0log.c log/log0recv.c
mach/mach0data.c
mem/mem0mem.c mem/mem0pool.c
mtr/mtr0log.c mtr/mtr0mtr.c
os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
page/page0cur.c page/page0page.c page/page0zip.c
que/que0que.c
handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc handler/mysql_addons.cc
read/read0read.c
rem/rem0cmp.c rem/rem0rec.c
row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c
row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c
row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
srv/srv0que.c srv/srv0srv.c srv/srv0start.c
sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
thr/thr0loc.c
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c)
IF(NOT SOURCE_SUBLIBS)
ADD_LIBRARY(innobase ${INNOBASE_SOURCES})
ADD_DEPENDENCIES(innobase GenError)
ENDIF(NOT SOURCE_SUBLIBS)

352
COPYING Normal file
View File

@@ -0,0 +1,352 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
========
The licenses for most software are designed to take away your freedom
to share and change it. By contrast, the GNU General Public License is
intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not price.
Our General Public Licenses are designed to make sure that you have
the freedom to distribute copies of free software (and charge for this
service if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid anyone
to deny you these rights or to ask you to surrender the rights. These
restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether gratis
or for a fee, you must give the recipients all the rights that you
have. You must make sure that they, too, receive or can get the source
code. And you must show them these terms so they know their rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software patents.
We wish to avoid the danger that redistributors of a free program will
individually obtain patent licenses, in effect making the program
proprietary. To prevent this, we have made it clear that any patent
must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains a
notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program",
below, refers to any such program or work, and a "work based on
the Program" means either the Program or any derivative work under
copyright law: that is to say, a work containing the Program or a
portion of it, either verbatim or with modifications and/or
translated into another language. (Hereinafter, translation is
included without limitation in the term "modification".) Each
licensee is addressed as "you".
Activities other than copying, distribution and modification are
not covered by this License; they are outside its scope. The act
of running the Program is not restricted, and the output from the
Program is covered only if its contents constitute a work based on
the Program (independent of having been made by running the
Program). Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any
warranty; and give any other recipients of the Program a copy of
this License along with the Program.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange
for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a. You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b. You must cause any work that you distribute or publish, that
in whole or in part contains or is derived from the Program
or any part thereof, to be licensed as a whole at no charge
to all third parties under the terms of this License.
c. If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display
an announcement including an appropriate copyright notice and
a notice that there is no warranty (or else, saying that you
provide a warranty) and that users may redistribute the
program under these conditions, and telling the user how to
view a copy of this License. (Exception: if the Program
itself is interactive but does not normally print such an
announcement, your work based on the Program is not required
to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the
Program, and can be reasonably considered independent and separate
works in themselves, then this License, and its terms, do not
apply to those sections when you distribute them as separate
works. But when you distribute the same sections as part of a
whole which is a work based on the Program, the distribution of
the whole must be on the terms of this License, whose permissions
for other licensees extend to the entire whole, and thus to each
and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or
contest your rights to work written entirely by you; rather, the
intent is to exercise the right to control the distribution of
derivative or collective works based on the Program.
In addition, mere aggregation of another work not based on the
Program with the Program (or with a work based on the Program) on
a volume of a storage or distribution medium does not bring the
other work under the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms
of Sections 1 and 2 above provided that you also do one of the
following:
a. Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of
Sections 1 and 2 above on a medium customarily used for
software interchange; or,
b. Accompany it with a written offer, valid for at least three
years, to give any third-party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange; or,
c. Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with
such an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete
source code means all the source code for all modules it contains,
plus any associated interface definition files, plus the scripts
used to control compilation and installation of the executable.
However, as a special exception, the source code distributed need
not include anything that is normally distributed (in either
source or binary form) with the major components (compiler,
kernel, and so on) of the operating system on which the executable
runs, unless that component itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this
License. However, parties who have received copies, or rights,
from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify
or distribute the Program or its derivative works. These actions
are prohibited by law if you do not accept this License.
Therefore, by modifying or distributing the Program (or any work
based on the Program), you indicate your acceptance of this
License to do so, and all its terms and conditions for copying,
distributing or modifying the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program
subject to these terms and conditions. You may not impose any
further restrictions on the recipients' exercise of the rights
granted herein. You are not responsible for enforcing compliance
by third parties to this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent
issues), conditions are imposed on you (whether by court order,
agreement or otherwise) that contradict the conditions of this
License, they do not excuse you from the conditions of this
License. If you cannot distribute so as to satisfy simultaneously
your obligations under this License and any other pertinent
obligations, then as a consequence you may not distribute the
Program at all. For example, if a patent license would not permit
royalty-free redistribution of the Program by all those who
receive copies directly or indirectly through you, then the only
way you could satisfy both it and this License would be to refrain
entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable
under any particular circumstance, the balance of the section is
intended to apply and the section as a whole is intended to apply
in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of
any such claims; this section has the sole purpose of protecting
the integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is
willing to distribute software through any other system and a
licensee cannot impose that choice.
This section is intended to make thoroughly clear what is believed
to be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces,
the original copyright holder who places the Program under this
License may add an explicit geographical distribution limitation
excluding those countries, so that distribution is permitted only
in or among countries not thus excluded. In such case, this
License incorporates the limitation as if written in the body of
this License.
9. The Free Software Foundation may publish revised and/or new
versions of the General Public License from time to time. Such
new versions will be similar in spirit to the present version, but
may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies a version number of this License which applies
to it and "any later version", you have the option of following
the terms and conditions either of that version or of any later
version published by the Free Software Foundation. If the Program
does not specify a version number of this License, you may choose
any version ever published by the Free Software Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the
author to ask for permission. For software which is copyrighted
by the Free Software Foundation, write to the Free Software
Foundation; we sometimes make exceptions for this. Our decision
will be guided by the two goals of preserving the free status of
all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
SERVICING, REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
=============================================
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
Copyright (C) YYYY NAME OF AUTHOR
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the
appropriate parts of the General Public License. Of course, the
commands you use may be called something other than `show w' and `show
c'; they could even be mouse-clicks or menu items--whatever suits your
program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
SIGNATURE OF TY COON, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library,
you may consider it more useful to permit linking proprietary
applications with the library. If this is what you want to do, use the
GNU Library General Public License instead of this License.

70
ChangeLog Normal file
View File

@@ -0,0 +1,70 @@
2008-05-06 The InnoDB Team
* handler/ha_innodb.cc, include/srv0srv.h, include/sync0sync.h,
include/trx0sys.h, mysql-test/innodb-zip.result,
mysql-test/innodb-zip.test, srv/srv0srv.c, srv/srv0start.c,
sync/sync0sync.c, trx/trx0sys.c:
Implement the system tablespace tagging
* handler/ha_innodb.cc, handler/i_s.cc, include/univ.i,
srv/srv0start.c:
Add InnoDB version in INFORMATION_SCHEMA.PLUGINS.PLUGIN_VERSION,
in the startup message and in a server variable innodb_version.
* sync/sync0sync.c:
Fix a bug in the sync debug code where a lock with level
SYNC_LEVEL_VARYING would cause an assertion failure when a thread
tried to release it.
2008-04-30 The InnoDB Team
* Makefile.am:
Fix Bug#36434 ha_innodb.so is installed in the wrong directory
* handler/ha_innodb.cc:
Merge change from MySQL (Fix Bug#35406 5.1-opt crashes on select from
I_S.REFERENTIAL_CONSTRAINTS):
ChangeSet@1.2563, 2008-03-18 19:42:04+04:00, gluh@mysql.com +1 -0
* scripts/install_innodb_plugins.sql:
Added
* mysql-test/innodb.result:
Merge change from MySQL (this fixes the failing innodb test):
ChangeSet@1.1810.3601.4, 2008-02-07 02:33:21+04:00
* row/row0sel.c:
Fix Bug#35226 RBR event crashes slave
* handler/ha_innodb.cc:
Change the fix for Bug#32440 to show bytes instead of kilobytes in
INFORMATION_SCHEMA.TABLES.DATA_FREE
* handler/ha_innodb.cc, mysql-test/innodb.result,
mysql-test/innodb.test:
Fix Bug#29507 TRUNCATE shows to many rows effected
* handler/ha_innodb.cc, mysql-test/innodb.result,
mysql-test/innodb.test:
Fix Bug#35537 Innodb doesn't increment handler_update and
handler_delete
2008-04-29 The InnoDB Team
* handler/i_s.cc, include/srv0start.h, srv/srv0start.c:
Fix Bug#36310 InnoDB plugin crash
2008-04-23 The InnoDB Team
* mysql-test/innodb_bug36169.result, mysql-test/innodb_bug36169.test,
row/row0mysql.c:
Fix Bug#36169 create innodb compressed table with too large row size
crashed
* (outside the source tree):
Fix Bug#36222 New InnoDB plugin 1.0 has wrong MKDIR_P defined in
Makefile.in
2008-04-15 The InnoDB Team
InnoDB Plugin 1.0.0 released

195
Makefile.am Normal file
View File

@@ -0,0 +1,195 @@
# Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Process this file with automake to create Makefile.in
MYSQLDATAdir= $(localstatedir)
MYSQLSHAREdir= $(pkgdatadir)
MYSQLBASEdir= $(prefix)
MYSQLLIBdir= $(pkglibdir)
pkgplugindir= $(pkglibdir)/plugin
INCLUDES= -I$(top_srcdir)/include -I$(top_builddir)/include \
-I$(top_srcdir)/regex \
-I$(top_srcdir)/storage/innobase/include \
-I$(top_srcdir)/sql \
-I$(srcdir)
DEFS= @DEFS@
noinst_HEADERS= include/btr0btr.h include/btr0btr.ic \
include/btr0cur.h include/btr0cur.ic \
include/btr0pcur.h include/btr0pcur.ic \
include/btr0sea.h include/btr0sea.ic \
include/btr0types.h include/buf0buddy.h \
include/buf0buddy.ic include/buf0buf.h \
include/buf0buf.ic include/buf0flu.h \
include/buf0flu.ic include/buf0lru.h \
include/buf0lru.ic include/buf0rea.h \
include/buf0types.h include/data0data.h \
include/data0data.ic include/data0type.h \
include/data0type.ic include/data0types.h \
include/db0err.h include/dict0boot.h \
include/dict0boot.ic include/dict0crea.h \
include/dict0crea.ic include/dict0dict.h \
include/dict0dict.ic include/dict0load.h \
include/dict0load.ic include/dict0mem.h \
include/dict0mem.ic include/dict0types.h \
include/dyn0dyn.h include/dyn0dyn.ic \
include/eval0eval.h include/eval0eval.ic \
include/eval0proc.h include/eval0proc.ic \
include/fil0fil.h include/fsp0fsp.h \
include/fsp0fsp.ic include/fut0fut.h \
include/fut0fut.ic include/fut0lst.h \
include/fut0lst.ic include/ha0ha.h \
include/ha0ha.ic \
include/ha0storage.h \
include/ha0storage.ic \
include/hash0hash.h \
include/hash0hash.ic include/ibuf0ibuf.h \
include/ibuf0ibuf.ic include/ibuf0types.h \
include/lock0iter.h \
include/lock0lock.h include/lock0lock.ic \
include/lock0priv.h include/lock0priv.ic \
include/lock0types.h include/log0log.h \
include/log0log.ic include/log0recv.h \
include/log0recv.ic include/mach0data.h \
include/mach0data.ic include/mem0dbg.h \
include/mem0dbg.ic mem/mem0dbg.c \
include/mem0mem.h include/mem0mem.ic \
include/mem0pool.h include/mem0pool.ic \
include/mtr0log.h include/mtr0log.ic \
include/mtr0mtr.h include/mtr0mtr.ic \
include/mtr0types.h \
include/mysql_addons.h \
include/os0file.h \
include/os0proc.h include/os0proc.ic \
include/os0sync.h include/os0sync.ic \
include/os0thread.h include/os0thread.ic \
include/page0cur.h include/page0cur.ic \
include/page0page.h include/page0page.ic \
include/page0zip.h include/page0zip.ic \
include/page0types.h include/pars0grm.h \
include/pars0opt.h include/pars0opt.ic \
include/pars0pars.h include/pars0pars.ic \
include/pars0sym.h include/pars0sym.ic \
include/pars0types.h include/que0que.h \
include/que0que.ic include/que0types.h \
include/read0read.h include/read0read.ic \
include/read0types.h include/rem0cmp.h \
include/rem0cmp.ic include/rem0rec.h \
include/rem0rec.ic include/rem0types.h \
include/row0ext.h include/row0ext.ic \
include/row0ins.h include/row0ins.ic \
include/row0merge.h \
include/row0mysql.h include/row0mysql.ic \
include/row0purge.h include/row0purge.ic \
include/row0row.h include/row0row.ic \
include/row0sel.h include/row0sel.ic \
include/row0types.h include/row0uins.h \
include/row0uins.ic include/row0umod.h \
include/row0umod.ic include/row0undo.h \
include/row0undo.ic include/row0upd.h \
include/row0upd.ic include/row0vers.h \
include/row0vers.ic include/srv0que.h \
include/srv0srv.h include/srv0srv.ic \
include/srv0start.h include/sync0arr.h \
include/sync0arr.ic include/sync0rw.h \
include/sync0rw.ic include/sync0sync.h \
include/sync0sync.ic include/sync0types.h \
include/thr0loc.h include/thr0loc.ic \
include/trx0i_s.h \
include/trx0purge.h include/trx0purge.ic \
include/trx0rec.h include/trx0rec.ic \
include/trx0roll.h include/trx0roll.ic \
include/trx0rseg.h include/trx0rseg.ic \
include/trx0sys.h include/trx0sys.ic \
include/trx0trx.h include/trx0trx.ic \
include/trx0types.h include/trx0undo.h \
include/trx0undo.ic include/trx0xa.h \
include/univ.i include/usr0sess.h \
include/usr0sess.ic include/usr0types.h \
include/ut0byte.h include/ut0byte.ic \
include/ut0dbg.h include/ut0lst.h \
include/ut0mem.h include/ut0mem.ic \
include/ut0rnd.h include/ut0rnd.ic \
include/ut0sort.h include/ut0ut.h \
include/ut0ut.ic include/ut0vec.h \
include/ut0vec.ic include/ut0list.h \
include/ut0list.ic include/ut0wqueue.h \
include/ha_prototypes.h handler/ha_innodb.h \
include/handler0alter.h \
handler/i_s.h
EXTRA_LIBRARIES= libinnobase.a
noinst_LIBRARIES= @plugin_innobase_static_target@
libinnobase_a_SOURCES= btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \
btr/btr0sea.c buf/buf0buddy.c \
buf/buf0buf.c buf/buf0flu.c \
buf/buf0lru.c buf/buf0rea.c data/data0data.c \
data/data0type.c dict/dict0boot.c \
dict/dict0crea.c dict/dict0dict.c \
dict/dict0load.c dict/dict0mem.c dyn/dyn0dyn.c \
eval/eval0eval.c eval/eval0proc.c \
fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \
fut/fut0lst.c ha/ha0ha.c \
ha/ha0storage.c \
ha/hash0hash.c \
ibuf/ibuf0ibuf.c lock/lock0iter.c \
lock/lock0lock.c \
log/log0log.c log/log0recv.c mach/mach0data.c \
mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \
mtr/mtr0mtr.c os/os0file.c os/os0proc.c \
os/os0sync.c os/os0thread.c page/page0cur.c \
page/page0page.c page/page0zip.c \
pars/lexyy.c pars/pars0grm.c \
pars/pars0opt.c pars/pars0pars.c \
pars/pars0sym.c que/que0que.c read/read0read.c \
rem/rem0cmp.c rem/rem0rec.c row/row0ext.c \
row/row0ins.c row/row0merge.c \
row/row0mysql.c row/row0purge.c row/row0row.c \
row/row0sel.c row/row0uins.c row/row0umod.c \
row/row0undo.c row/row0upd.c row/row0vers.c \
srv/srv0que.c srv/srv0srv.c srv/srv0start.c \
sync/sync0arr.c sync/sync0rw.c \
sync/sync0sync.c thr/thr0loc.c \
trx/trx0i_s.c \
trx/trx0purge.c \
trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c \
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c \
usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c \
ut/ut0list.c ut/ut0mem.c ut/ut0rnd.c \
ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c \
handler/ha_innodb.cc handler/handler0alter.cc \
handler/i_s.cc \
handler/mysql_addons.cc
libinnobase_a_CXXFLAGS= $(AM_CFLAGS)
libinnobase_a_CFLAGS= $(AM_CFLAGS)
EXTRA_LTLIBRARIES= ha_innodb.la
pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@
ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir)
ha_innodb_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
ha_innodb_la_CFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES)
EXTRA_DIST= CMakeLists.txt plug.in \
pars/make_bison.sh pars/make_flex.sh \
pars/pars0grm.y pars/pars0lex.l
# Don't update the files from bitkeeper
%::SCCS/s.%

3138
Makefile.in Normal file

File diff suppressed because it is too large Load Diff

26
README Normal file
View File

@@ -0,0 +1,26 @@
This is the source of the InnoDB Plugin 1.0.1 for MySQL 5.1
===========================================================
Instructions for compiling the plugin:
--------------------------------------
1. Get the latest MySQL 5.1 sources from
http://dev.mysql.com/downloads/mysql/5.1.html#source
2. Replace the contents of the mysql-5.1.N/storage/innobase/ directory
with the contents of this directory.
3. Optional (only necessary if you are going to run tests from the
mysql-test suite): cd into the innobase directory and run ./setup.sh
4. Compile MySQL as usual.
5. Enjoy!
See the online documentation for more detailed instructions:
http://www.innodb.com/doc/innodb_plugin-1.0/innodb-plugin-installation.html
For more information about InnoDB visit
http://www.innodb.com
Thank you for using the InnoDB plugin!

3509
btr/btr0btr.c Normal file

File diff suppressed because it is too large Load Diff

4716
btr/btr0cur.c Normal file

File diff suppressed because it is too large Load Diff

567
btr/btr0pcur.c Normal file
View File

@@ -0,0 +1,567 @@
/******************************************************
The index tree persistent cursor
(c) 1996 Innobase Oy
Created 2/23/1996 Heikki Tuuri
*******************************************************/
#include "btr0pcur.h"
#ifdef UNIV_NONINL
#include "btr0pcur.ic"
#endif
#include "ut0byte.h"
#include "rem0cmp.h"
#include "trx0trx.h"
/******************************************************************
Allocates memory for a persistent cursor object and initializes the cursor. */
UNIV_INTERN
btr_pcur_t*
btr_pcur_create_for_mysql(void)
/*============================*/
/* out, own: persistent cursor */
{
btr_pcur_t* pcur;
pcur = mem_alloc(sizeof(btr_pcur_t));
pcur->btr_cur.index = NULL;
btr_pcur_init(pcur);
return(pcur);
}
/******************************************************************
Frees the memory for a persistent cursor object. */
UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor) /* in, own: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
mem_free(cursor->old_rec_buf);
cursor->old_rec_buf = NULL;
}
cursor->btr_cur.page_cur.rec = NULL;
cursor->old_rec = NULL;
cursor->old_n_fields = 0;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
mem_free(cursor);
}
/******************************************************************
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
page_cur_t* page_cursor;
buf_block_t* block;
rec_t* rec;
dict_index_t* index;
page_t* page;
ulint offs;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
block = btr_pcur_get_block(cursor);
index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
page_cursor = btr_pcur_get_page_cur(cursor);
rec = page_cur_get_rec(page_cursor);
page = page_align(rec);
offs = page_offset(rec);
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
|| mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
/* It must be an empty index tree; NOTE that in this case
we do not store the modify_clock, but always do a search
if we restore the cursor position */
ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
cursor->old_stored = BTR_PCUR_OLD_STORED;
if (page_rec_is_supremum_low(offs)) {
cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
} else {
cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
}
return;
}
if (page_rec_is_supremum_low(offs)) {
rec = page_rec_get_prev(rec);
cursor->rel_pos = BTR_PCUR_AFTER;
} else if (page_rec_is_infimum_low(offs)) {
rec = page_rec_get_next(rec);
cursor->rel_pos = BTR_PCUR_BEFORE;
} else {
cursor->rel_pos = BTR_PCUR_ON;
}
cursor->old_stored = BTR_PCUR_OLD_STORED;
cursor->old_rec = dict_index_copy_rec_order_prefix(
index, rec, &cursor->old_n_fields,
&cursor->old_rec_buf, &cursor->buf_size);
cursor->block_when_stored = block;
cursor->modify_clock = buf_block_get_modify_clock(block);
}
/******************************************************************
Copies the stored position of a pcur to another pcur. */
UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
btr_pcur_t* pcur_receive, /* in: pcur which will receive the
position info */
btr_pcur_t* pcur_donate) /* in: pcur from which the info is
copied */
{
if (pcur_receive->old_rec_buf) {
mem_free(pcur_receive->old_rec_buf);
}
ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
if (pcur_donate->old_rec_buf) {
pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
pcur_donate->buf_size);
pcur_receive->old_rec = pcur_receive->old_rec_buf
+ (pcur_donate->old_rec - pcur_donate->old_rec_buf);
}
pcur_receive->old_n_fields = pcur_donate->old_n_fields;
}
/******************************************************************
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
to the last record LESS OR EQUAL to the stored record;
(2) cursor was positioned on a page infimum record: restores the position to
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
restores to before first or after the last in the tree. */
UNIV_INTERN
ibool
btr_pcur_restore_position(
/*======================*/
/* out: TRUE if the cursor position
was stored when it was on a user record
and it can be restored on a user record
whose ordering fields are identical to
the ones of the original user record */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: detached persistent cursor */
mtr_t* mtr) /* in: mtr */
{
dict_index_t* index;
dtuple_t* tuple;
ulint mode;
ulint old_mode;
mem_heap_t* heap;
index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
|| UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
&& cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
if (cursor->trx_if_known) {
trx_print(stderr, cursor->trx_if_known, 0);
}
ut_error;
}
if (UNIV_UNLIKELY
(cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
/* In these cases we do not try an optimistic restoration,
but always do a search */
btr_cur_open_at_index_side(
cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
cursor->block_when_stored = btr_pcur_get_block(cursor);
return(FALSE);
}
ut_a(cursor->old_rec);
ut_a(cursor->old_n_fields);
if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
|| UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
/* Try optimistic restoration */
if (UNIV_LIKELY(buf_page_optimistic_get(
latch_mode,
cursor->block_when_stored,
cursor->modify_clock, mtr))) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(btr_pcur_get_block(cursor),
SYNC_TREE_NODE);
#endif /* UNIV_SYNC_DEBUG */
if (cursor->rel_pos == BTR_PCUR_ON) {
#ifdef UNIV_DEBUG
const rec_t* rec;
const ulint* offsets1;
const ulint* offsets2;
#endif /* UNIV_DEBUG */
cursor->latch_mode = latch_mode;
#ifdef UNIV_DEBUG
rec = btr_pcur_get_rec(cursor);
heap = mem_heap_create(256);
offsets1 = rec_get_offsets(
cursor->old_rec, index, NULL,
cursor->old_n_fields, &heap);
offsets2 = rec_get_offsets(
rec, index, NULL,
cursor->old_n_fields, &heap);
ut_ad(!cmp_rec_rec(cursor->old_rec,
rec, offsets1, offsets2,
index));
mem_heap_free(heap);
#endif /* UNIV_DEBUG */
return(TRUE);
}
return(FALSE);
}
}
/* If optimistic restoration did not succeed, open the cursor anew */
heap = mem_heap_create(256);
tuple = dict_index_build_data_tuple(index, cursor->old_rec,
cursor->old_n_fields, heap);
/* Save the old search mode of the cursor */
old_mode = cursor->search_mode;
if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
mode = PAGE_CUR_LE;
} else if (cursor->rel_pos == BTR_PCUR_AFTER) {
mode = PAGE_CUR_G;
} else {
ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
mode = PAGE_CUR_L;
}
btr_pcur_open_with_no_init(index, tuple, mode, latch_mode,
cursor, 0, mtr);
/* Restore the old search mode */
cursor->search_mode = old_mode;
if (cursor->rel_pos == BTR_PCUR_ON
&& btr_pcur_is_on_user_rec(cursor)
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
rec_get_offsets(
btr_pcur_get_rec(cursor), index,
NULL, ULINT_UNDEFINED, &heap))) {
/* We have to store the NEW value for the modify clock, since
the cursor can now be on a different page! But we can retain
the value of old_rec */
cursor->block_when_stored = btr_pcur_get_block(cursor);
cursor->modify_clock = buf_block_get_modify_clock(
cursor->block_when_stored);
cursor->old_stored = BTR_PCUR_OLD_STORED;
mem_heap_free(heap);
return(TRUE);
}
mem_heap_free(heap);
/* We have to store new position information, modify_clock etc.,
to the cursor because it can now be on a different page, the record
under it may have been removed, etc. */
btr_pcur_store_position(cursor, mtr);
return(FALSE);
}
/******************************************************************
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
UNIV_INTERN
void
btr_pcur_release_leaf(
/*==================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
block = btr_pcur_get_block(cursor);
btr_leaf_page_release(block, cursor->latch_mode, mtr);
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/*************************************************************
Moves the persistent cursor to the first record on the next page. Releases the
latch on the current page, and bufferunfixes it. Note that there must not be
modifications on the current page, as then the x-latch can be released only in
mtr_commit. */
UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
btr_pcur_t* cursor, /* in: persistent cursor; must be on the
last record of the current page */
mtr_t* mtr) /* in: mtr */
{
ulint next_page_no;
ulint space;
ulint zip_size;
page_t* page;
buf_block_t* next_block;
page_t* next_page;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
ut_ad(btr_pcur_is_after_last_on_page(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
page = btr_pcur_get_page(cursor);
next_page_no = btr_page_get_next(page, mtr);
space = buf_block_get_space(btr_pcur_get_block(cursor));
zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
ut_ad(next_page_no != FIL_NULL);
next_block = btr_block_get(space, zip_size, next_page_no,
cursor->latch_mode, mtr);
next_page = buf_block_get_frame(next_block);
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(next_page) == page_is_comp(page));
ut_a(btr_page_get_prev(next_page, mtr)
== buf_block_get_page_no(btr_pcur_get_block(cursor)));
#endif /* UNIV_BTR_DEBUG */
next_block->check_index_page_at_flush = TRUE;
btr_leaf_page_release(btr_pcur_get_block(cursor),
cursor->latch_mode, mtr);
page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
page_check_dir(next_page);
}
/*************************************************************
Moves the persistent cursor backward if it is on the first record of the page.
Commits mtr. Note that to prevent a possible deadlock, the operation
first stores the position of the cursor, commits mtr, acquires the necessary
latches and restores the cursor position again before returning. The
alphabetical position of the cursor is guaranteed to be sensible on
return, but it may happen that the cursor is not positioned on the last
record of any page, because the structure of the tree may have changed
during the time when the cursor had no latches. */
UNIV_INTERN
void
btr_pcur_move_backward_from_page(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor, must be on the first
record of the current page */
mtr_t* mtr) /* in: mtr */
{
ulint prev_page_no;
ulint space;
page_t* page;
buf_block_t* prev_block;
ulint latch_mode;
ulint latch_mode2;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
ut_ad(btr_pcur_is_before_first_on_page(cursor));
ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
latch_mode = cursor->latch_mode;
if (latch_mode == BTR_SEARCH_LEAF) {
latch_mode2 = BTR_SEARCH_PREV;
} else if (latch_mode == BTR_MODIFY_LEAF) {
latch_mode2 = BTR_MODIFY_PREV;
} else {
latch_mode2 = 0; /* To eliminate compiler warning */
ut_error;
}
btr_pcur_store_position(cursor, mtr);
mtr_commit(mtr);
mtr_start(mtr);
btr_pcur_restore_position(latch_mode2, cursor, mtr);
page = btr_pcur_get_page(cursor);
prev_page_no = btr_page_get_prev(page, mtr);
space = buf_block_get_space(btr_pcur_get_block(cursor));
if (prev_page_no == FIL_NULL) {
} else if (btr_pcur_is_before_first_on_page(cursor)) {
prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
btr_leaf_page_release(btr_pcur_get_block(cursor),
latch_mode, mtr);
page_cur_set_after_last(prev_block,
btr_pcur_get_page_cur(cursor));
} else {
/* The repositioned cursor did not end on an infimum record on
a page. Cursor repositioning acquired a latch also on the
previous page, but we do not need the latch: release it. */
prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
btr_leaf_page_release(prev_block, latch_mode, mtr);
}
cursor->latch_mode = latch_mode;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'. */
UNIV_INTERN
ibool
btr_pcur_move_to_prev(
/*==================*/
/* out: TRUE if the cursor was not before first
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
if (btr_pcur_is_before_first_on_page(cursor)) {
if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_backward_from_page(cursor, mtr);
return(TRUE);
}
btr_pcur_move_to_prev_on_page(cursor);
return(TRUE);
}
/******************************************************************
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
btr_pcur_open_on_user_rec(
/*======================*/
dict_index_t* index, /* in: index */
const dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ... */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /* in: memory buffer for persistent
cursor */
mtr_t* mtr) /* in: mtr */
{
btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
if (btr_pcur_is_after_last_on_page(cursor)) {
btr_pcur_move_to_next_user_rec(cursor, mtr);
}
} else {
ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
/* Not implemented yet */
ut_error;
}
}

1790
btr/btr0sea.c Normal file

File diff suppressed because it is too large Load Diff

664
buf/buf0buddy.c Normal file
View File

@@ -0,0 +1,664 @@
/******************************************************
Binary buddy allocator for compressed pages
(c) 2006 Innobase Oy
Created December 2006 by Marko Makela
*******************************************************/
#define THIS_MODULE
#include "buf0buddy.h"
#ifdef UNIV_NONINL
# include "buf0buddy.ic"
#endif
#undef THIS_MODULE
#include "buf0buf.h"
#include "buf0lru.h"
#include "buf0flu.h"
#include "page0zip.h"
/* Statistic counters */
#ifdef UNIV_DEBUG
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
static ulint buf_buddy_n_frames;
#endif /* UNIV_DEBUG */
/** Statistics of the buddy system, indexed by block size.
Protected by buf_pool_mutex. */
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
/**************************************************************************
Get the offset of the buddy of a compressed page frame. */
UNIV_INLINE
byte*
buf_buddy_get(
/*==========*/
/* out: the buddy relative of page */
byte* page, /* in: compressed page */
ulint size) /* in: page size in bytes */
{
ut_ad(ut_is_2pow(size));
ut_ad(size >= BUF_BUDDY_LOW);
ut_ad(size < BUF_BUDDY_HIGH);
ut_ad(!ut_align_offset(page, size));
if (((ulint) page) & size) {
return(page - size);
} else {
return(page + size);
}
}
/**************************************************************************
Add a block to the head of the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_add_to_free(
/*==================*/
buf_page_t* bpage, /* in,own: block to be freed */
ulint i) /* in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
ut_ad(buf_pool->zip_free[i].start != bpage);
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**************************************************************************
Remove a block from the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_remove_from_free(
/*=======================*/
buf_page_t* bpage, /* in: block to be removed */
ulint i) /* in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG_VALGRIND */
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**************************************************************************
Try to allocate a block from buf_pool->zip_free[]. */
static
void*
buf_buddy_alloc_zip(
/*================*/
/* out: allocated block, or NULL
if buf_pool->zip_free[] was empty */
ulint i) /* in: index of buf_pool->zip_free[] */
{
buf_page_t* bpage;
ut_ad(buf_pool_mutex_own());
ut_a(i < BUF_BUDDY_SIZES);
#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]);
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
buf_buddy_remove_from_free(bpage, i);
} else if (i + 1 < BUF_BUDDY_SIZES) {
/* Attempt to split. */
bpage = buf_buddy_alloc_zip(i + 1);
if (bpage) {
buf_page_t* buddy = (buf_page_t*)
(((char*) bpage) + (BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buddy));
ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
buddy->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(buddy, i);
}
}
#ifdef UNIV_DEBUG
if (bpage) {
memset(bpage, ~i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
return(bpage);
}
/**************************************************************************
Deallocate a buffer frame of UNIV_PAGE_SIZE. */
static
void
buf_buddy_block_free(
/*=================*/
void* buf) /* in: buffer frame to deallocate */
{
const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
buf_page_t* bpage;
buf_block_t* block;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
((buf_block_t*) bpage)->frame == buf);
ut_a(bpage);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
ut_ad(!bpage->in_page_hash);
ut_ad(bpage->in_zip_hash);
ut_d(bpage->in_zip_hash = FALSE);
HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
block = (buf_block_t*) bpage;
mutex_enter(&block->mutex);
buf_LRU_block_free_non_file_page(block);
mutex_exit(&block->mutex);
ut_ad(buf_buddy_n_frames > 0);
ut_d(buf_buddy_n_frames--);
}
/**************************************************************************
Allocate a buffer block to the buddy allocator. */
static
void
buf_buddy_block_register(
/*=====================*/
buf_block_t* block) /* in: buffer frame to allocate */
{
const ulint fold = BUF_POOL_ZIP_FOLD(block);
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
buf_block_set_state(block, BUF_BLOCK_MEMORY);
ut_a(block->frame);
ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
ut_ad(!block->page.in_page_hash);
ut_ad(!block->page.in_zip_hash);
ut_d(block->page.in_zip_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
ut_d(buf_buddy_n_frames++);
}
/**************************************************************************
Allocate a block from a bigger object. */
static
void*
buf_buddy_alloc_from(
/*=================*/
/* out: allocated block */
void* buf, /* in: a block that is free to use */
ulint i, /* in: index of buf_pool->zip_free[] */
ulint j) /* in: size of buf as an index
of buf_pool->zip_free[] */
{
ulint offs = BUF_BUDDY_LOW << j;
ut_ad(j <= BUF_BUDDY_SIZES);
ut_ad(j >= i);
ut_ad(!ut_align_offset(buf, offs));
/* Add the unused parts of the block to the free lists. */
while (j > i) {
buf_page_t* bpage;
offs >>= 1;
j--;
bpage = (buf_page_t*) ((byte*) buf + offs);
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
bpage->state = BUF_BLOCK_ZIP_FREE;
#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[j]);
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
buf_buddy_add_to_free(bpage, j);
}
return(buf);
}
/**************************************************************************
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL. */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
/* out: allocated block,
possibly NULL if lru==NULL */
ulint i, /* in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
ibool* lru) /* in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
{
buf_block_t* block;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
if (i < BUF_BUDDY_SIZES) {
/* Try to allocate from the buddy system. */
block = buf_buddy_alloc_zip(i);
if (block) {
goto func_exit;
}
}
/* Try allocating from the buf_pool->free list. */
block = buf_LRU_get_free_only();
if (block) {
goto alloc_big;
}
if (!lru) {
return(NULL);
}
/* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit();
block = buf_LRU_get_free_block(0);
*lru = TRUE;
buf_pool_mutex_enter();
alloc_big:
buf_buddy_block_register(block);
block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
func_exit:
buf_buddy_stat[i].used++;
return(block);
}
/**************************************************************************
Try to relocate the control block of a compressed page. */
static
ibool
buf_buddy_relocate_block(
/*=====================*/
/* out: TRUE if relocated */
buf_page_t* bpage, /* in: block to relocate */
buf_page_t* dpage) /* in: free block to relocate to */
{
buf_page_t* b;
ut_ad(buf_pool_mutex_own());
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
case BUF_BLOCK_ZIP_DIRTY:
/* Cannot relocate dirty pages. */
return(FALSE);
case BUF_BLOCK_ZIP_PAGE:
break;
}
mutex_enter(&buf_pool_zip_mutex);
if (!buf_page_can_relocate(bpage)) {
mutex_exit(&buf_pool_zip_mutex);
return(FALSE);
}
buf_relocate(bpage, dpage);
ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
/* relocate buf_pool->zip_clean */
b = UT_LIST_GET_PREV(list, dpage);
UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
if (b) {
UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
} else {
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
}
mutex_exit(&buf_pool_zip_mutex);
return(TRUE);
}
/**************************************************************************
Try to relocate a block. */
static
ibool
buf_buddy_relocate(
/*===============*/
/* out: TRUE if relocated */
void* src, /* in: block to relocate */
void* dst, /* in: free block to relocate to */
ulint i) /* in: index of buf_pool->zip_free[] */
{
buf_page_t* bpage;
const ulint size = BUF_BUDDY_LOW << i;
ullint usec = ut_time_us(NULL);
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(!ut_align_offset(src, size));
ut_ad(!ut_align_offset(dst, size));
UNIV_MEM_ASSERT_W(dst, size);
/* We assume that all memory from buf_buddy_alloc()
is used for either compressed pages or buf_page_t
objects covering compressed pages. */
/* We look inside the allocated objects returned by
buf_buddy_alloc() and assume that anything of
PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
a valid space_id and page_no in the page header. Should the
fields be invalid, we will be unable to relocate the block.
We also assume that anything that fits sizeof(buf_page_t)
actually is a properly initialized buf_page_t object. */
if (size >= PAGE_ZIP_MIN_SIZE) {
/* This is a compressed page. */
mutex_t* mutex;
/* The src block may be split into smaller blocks,
some of which may be free. Thus, the
mach_read_from_4() calls below may attempt to read
from free memory. The memory is "owned" by the buddy
allocator (and it has been allocated from the buffer
pool), so there is nothing wrong about this. The
mach_read_from_4() calls here will only trigger bogus
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
bpage = buf_page_hash_get(
mach_read_from_4((const byte*) src
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
mach_read_from_4((const byte*) src
+ FIL_PAGE_OFFSET));
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
allocated by buf_LRU_get_free_block() but not
added to buf_pool->page_hash yet. Obviously,
it cannot be relocated. */
return(FALSE);
}
if (page_zip_get_size(&bpage->zip) != size) {
/* The block is of different size. We would
have to relocate all blocks covered by src.
For the sake of simplicity, give up. */
ut_ad(page_zip_get_size(&bpage->zip) < size);
return(FALSE);
}
/* The block must have been allocated, but it may
contain uninitialized data. */
UNIV_MEM_ASSERT_W(src, size);
mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
if (buf_page_can_relocate(bpage)) {
/* Relocate the compressed page. */
ut_a(bpage->zip.data == src);
memcpy(dst, src, size);
bpage->zip.data = dst;
mutex_exit(mutex);
success:
UNIV_MEM_INVALID(src, size);
{
buf_buddy_stat_t* buddy_stat
= &buf_buddy_stat[i];
buddy_stat->relocated++;
buddy_stat->relocated_usec
+= ut_time_us(NULL) - usec;
}
return(TRUE);
}
mutex_exit(mutex);
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
/* This must be a buf_page_t object. */
UNIV_MEM_ASSERT_RW(src, size);
if (buf_buddy_relocate_block(src, dst)) {
goto success;
}
}
return(FALSE);
}
/**************************************************************************
Deallocate a block. */
UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
void* buf, /* in: block to be freed, must not be
pointed to by the buffer pool */
ulint i) /* in: index of buf_pool->zip_free[] */
{
buf_page_t* bpage;
buf_page_t* buddy;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES);
ut_ad(buf_buddy_stat[i].used > 0);
buf_buddy_stat[i].used--;
recombine:
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
if (i == BUF_BUDDY_SIZES) {
buf_buddy_block_free(buf);
return;
}
ut_ad(i < BUF_BUDDY_SIZES);
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buf));
/* Try to combine adjacent blocks. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
if (buddy->state != BUF_BLOCK_ZIP_FREE) {
goto buddy_nonfree;
}
/* The field buddy->state can only be trusted for free blocks.
If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
if (bpage == buddy) {
buddy_free:
/* The buddy is free: recombine */
buf_buddy_remove_from_free(bpage, i);
buddy_free2:
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
ut_ad(!buf_pool_contains_zip(buddy));
i++;
buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
goto recombine;
}
ut_a(bpage != buf);
{
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
bpage = next;
}
}
#ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree:
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]));
#endif /* UNIV_DEBUG_VALGRIND */
/* The buddy is not free. Is there a free block of this size? */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
/* Remove the block from the free list, because a successful
buf_buddy_relocate() will overwrite bpage->list. */
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
buf_buddy_remove_from_free(bpage, i);
/* Try to relocate the buddy of buf to the free block. */
if (buf_buddy_relocate(buddy, bpage, i)) {
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free2;
}
buf_buddy_add_to_free(bpage, i);
/* Try to relocate the buddy of the free block to buf. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
BUF_BUDDY_LOW << i);
#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
{
const buf_page_t* b;
/* The buddy must not be (completely) free, because
we always recombine adjacent free blocks.
(Parts of the buddy can be free in
buf_pool->zip_free[j] with j < i.)*/
for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
b; b = UT_LIST_GET_NEXT(list, b)) {
ut_a(b != buddy);
}
}
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
if (buf_buddy_relocate(buddy, buf, i)) {
buf = bpage;
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free;
}
}
/* Free the block to the buddy list. */
bpage = buf;
#ifdef UNIV_DEBUG
if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
/* This area has most likely been allocated for at
least one compressed-only block descriptor. Check
that there are no live objects in the area. This is
not a complete check: it may yield false positives as
well as false negatives. Also, due to buddy blocks
being recombined, it is possible (although unlikely)
that this branch is never reached. */
char* c;
# ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing
uninitialized memory. Besides, Valgrind performs a
more exhaustive check, at every memory access. */
const buf_page_t* b = buf;
const buf_page_t* const b_end = (buf_page_t*)
((char*) b + (BUF_BUDDY_LOW << i));
for (; b < b_end; b++) {
/* Avoid false positives (and cause false
negatives) by checking for b->space < 1000. */
if ((b->state == BUF_BLOCK_ZIP_PAGE
|| b->state == BUF_BLOCK_ZIP_DIRTY)
&& b->space > 0 && b->space < 1000) {
fprintf(stderr,
"buddy dirty %p %u (%u,%u) %p,%lu\n",
(void*) b,
b->state, b->space, b->offset,
buf, i);
}
}
# endif /* !UNIV_DEBUG_VALGRIND */
/* Scramble the block. This should make any pointers
invalid and trigger a segmentation violation. Because
the scrambling can be reversed, it may be possible to
track down the object pointing to the freed data by
dereferencing the unscrambled bpage->LRU or
bpage->list pointers. */
for (c = (char*) buf + (BUF_BUDDY_LOW << i);
c-- > (char*) buf; ) {
*c = ~*c ^ i;
}
} else {
/* Fill large blocks with a constant pattern. */
memset(bpage, i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(bpage, i);
}

3745
buf/buf0buf.c Normal file

File diff suppressed because it is too large Load Diff

1296
buf/buf0flu.c Normal file

File diff suppressed because it is too large Load Diff

1825
buf/buf0lru.c Normal file

File diff suppressed because it is too large Load Diff

793
buf/buf0rea.c Normal file
View File

@@ -0,0 +1,793 @@
/******************************************************
The database buffer read
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0rea.h"
#include "fil0fil.h"
#include "mtr0mtr.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "buf0lru.h"
#include "ibuf0ibuf.h"
#include "log0recv.h"
#include "trx0sys.h"
#include "os0file.h"
#include "srv0start.h"
extern ulint srv_read_ahead_rnd;
extern ulint srv_read_ahead_seq;
extern ulint srv_buf_pool_reads;
/* The size in blocks of the area where the random read-ahead algorithm counts
the accessed pages when deciding whether to read-ahead */
#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
/* There must be at least this many pages in buf_pool in the area to start
a random read-ahead */
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + buf_read_ahead_random_area / 8)
/* The linear read-ahead area size */
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
/* The linear read-ahead threshold */
#define LINEAR_AREA_THRESHOLD_COEF 5 / 8
/* If there are buf_pool->curr_size per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT 2
/************************************************************************
Low-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
flag is cleared and the x-lock released by an i/o-handler thread. */
static
ulint
buf_read_page_low(
/*==============*/
/* out: 1 if a read request was queued, 0 if the page
already resided in buf_pool, or if the page is in
the doublewrite buffer blocks in which case it is never
read into the pool, or if the tablespace does not
exist or is being dropped */
ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
trying to read from a non-existent tablespace, or a
tablespace which is just now being dropped */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
at read-ahead functions) */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size, or 0 */
ibool unzip, /* in: TRUE=request uncompressed page */
ib_int64_t tablespace_version, /* in: if the space memory object has
this timestamp different from what we are giving here,
treat the tablespace as dropped; this is a timestamp we
use to stop dangling page reads from a tablespace
which we have DISCARDed + IMPORTed back */
ulint offset) /* in: page number */
{
buf_page_t* bpage;
ulint wake_later;
*err = DB_SUCCESS;
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
if (trx_doublewrite && space == TRX_SYS_SPACE
&& ( (offset >= trx_doublewrite->block1
&& offset < trx_doublewrite->block1
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|| (offset >= trx_doublewrite->block2
&& offset < trx_doublewrite->block2
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: trying to read"
" doublewrite buffer page %lu\n",
(ulong) offset);
return(0);
}
if (ibuf_bitmap_page(zip_size, offset)
|| trx_sys_hdr_page(space, offset)) {
/* Trx sys header is so low in the latching order that we play
safe and do not leave the i/o-completion to an asynchronous
i/o-thread. Ibuf bitmap pages must always be read with
syncronous i/o, to make sure they do not get involved in
thread deadlocks. */
sync = TRUE;
}
/* The following call will also check if the tablespace does not exist
or is being dropped; if we succeed in initing the page in the buffer
pool for read, then DISCARD cannot proceed until the read has
completed */
bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
tablespace_version, offset);
if (bpage == NULL) {
return(0);
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Posting read request for page %lu, sync %lu\n",
(ulong) offset,
(ulong) sync);
}
#endif
ut_ad(buf_page_in_file(bpage));
if (zip_size) {
*err = fil_io(OS_FILE_READ | wake_later,
sync, space, zip_size, offset, 0, zip_size,
bpage->zip.data, bpage);
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
*err = fil_io(OS_FILE_READ | wake_later,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
((buf_block_t*) bpage)->frame, bpage);
}
ut_a(*err == DB_SUCCESS);
if (sync) {
/* The i/o is already completed when we arrive from
fil_read */
buf_page_io_complete(bpage);
}
return(1);
}
/************************************************************************
Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o. */
static
ulint
buf_read_ahead_random(
/*==================*/
/* out: number of page read requests issued; NOTE
that if we read ibuf pages, it may happen that
the page at the given page number does not get
read even if we return a value > 0! */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint offset) /* in: page number of a page which the current thread
wants to access */
{
ib_int64_t tablespace_version;
ulint recent_blocks = 0;
ulint count;
ulint LRU_recent_limit;
ulint ibuf_mode;
ulint low, high;
ulint err;
ulint i;
ulint buf_read_ahead_random_area;
if (srv_startup_is_before_trx_rollback_phase) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
if (ibuf_bitmap_page(zip_size, offset)
|| trx_sys_hdr_page(space, offset)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
order */
return(0);
}
/* Remember the tablespace version before we ask te tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
tablespace_version = fil_space_get_version(space);
buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
low = (offset / buf_read_ahead_random_area)
* buf_read_ahead_random_area;
high = (offset / buf_read_ahead_random_area + 1)
* buf_read_ahead_random_area;
if (high > fil_space_get_size(space)) {
high = fil_space_get_size(space);
}
/* Get the minimum LRU_position field value for an initial segment
of the LRU list, to determine which blocks have recently been added
to the start of the list. */
LRU_recent_limit = buf_LRU_get_recent_limit();
buf_pool_mutex_enter();
if (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
buf_pool_mutex_exit();
return(0);
}
/* Count how many blocks in the area have been recently accessed,
that is, reside near the start of the LRU list. */
for (i = low; i < high; i++) {
const buf_page_t* bpage = buf_page_hash_get(space, i);
if (bpage
&& buf_page_is_accessed(bpage)
&& (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
recent_blocks++;
if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
buf_pool_mutex_exit();
goto read_ahead;
}
}
}
buf_pool_mutex_exit();
/* Do nothing */
return(0);
read_ahead:
/* Read all the suitable blocks within the area */
if (ibuf_inside()) {
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
} else {
ibuf_mode = BUF_READ_ANY_PAGE;
}
count = 0;
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync aio
mode: hence FALSE as the first parameter */
if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
&err, FALSE,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, FALSE,
tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: in random"
" readahead trying to access\n"
"InnoDB: tablespace %lu page %lu,\n"
"InnoDB: but the tablespace does not"
" exist or is just being dropped.\n",
(ulong) space, (ulong) i);
}
}
}
/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
nothing: */
os_aio_simulated_wake_handler_threads();
#ifdef UNIV_DEBUG
if (buf_debug_prints && (count > 0)) {
fprintf(stderr,
"Random read-ahead space %lu offset %lu pages %lu\n",
(ulong) space, (ulong) offset,
(ulong) count);
}
#endif /* UNIV_DEBUG */
++srv_read_ahead_rnd;
return(count);
}
/************************************************************************
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. Does a random read-ahead if it seems
sensible. */
UNIV_INTERN
ulint
buf_read_page(
/*==========*/
/* out: number of page read requests issued: this can
be > 1 if read-ahead occurred */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint offset) /* in: page number */
{
ib_int64_t tablespace_version;
ulint count;
ulint count2;
ulint err;
tablespace_version = fil_space_get_version(space);
count = buf_read_ahead_random(space, zip_size, offset);
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
tablespace_version, offset);
srv_buf_pool_reads+= count2;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: trying to access"
" tablespace %lu page no. %lu,\n"
"InnoDB: but the tablespace does not exist"
" or is just being dropped.\n",
(ulong) space, (ulong) offset);
}
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
/* Increment number of I/O operations used for LRU policy. */
buf_LRU_stat_inc_io();
return(count + count2);
}
/************************************************************************
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io. */
UNIV_INTERN
ulint
buf_read_ahead_linear(
/*==================*/
/* out: number of page read requests issued */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint offset) /* in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
{
ib_int64_t tablespace_version;
buf_page_t* bpage;
buf_frame_t* frame;
buf_page_t* pred_bpage = NULL;
ulint pred_offset;
ulint succ_offset;
ulint count;
int asc_or_desc;
ulint new_offset;
ulint fail_count;
ulint ibuf_mode;
ulint low, high;
ulint err;
ulint i;
const ulint buf_read_ahead_linear_area
= BUF_READ_AHEAD_LINEAR_AREA;
if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
low = (offset / buf_read_ahead_linear_area)
* buf_read_ahead_linear_area;
high = (offset / buf_read_ahead_linear_area + 1)
* buf_read_ahead_linear_area;
if ((offset != low) && (offset != high - 1)) {
/* This is not a border page of the area: return */
return(0);
}
if (ibuf_bitmap_page(zip_size, offset)
|| trx_sys_hdr_page(space, offset)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
order */
return(0);
}
/* Remember the tablespace version before we ask te tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
tablespace_version = fil_space_get_version(space);
buf_pool_mutex_enter();
if (high > fil_space_get_size(space)) {
buf_pool_mutex_exit();
/* The area is not whole, return */
return(0);
}
if (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
buf_pool_mutex_exit();
return(0);
}
/* Check that almost all pages in the area have been accessed; if
offset == low, the accesses must be in a descending order, otherwise,
in an ascending order. */
asc_or_desc = 1;
if (offset == low) {
asc_or_desc = -1;
}
fail_count = 0;
for (i = low; i < high; i++) {
bpage = buf_page_hash_get(space, i);
if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
/* Not accessed */
fail_count++;
} else if (pred_bpage
&& (ut_ulint_cmp(
buf_page_get_LRU_position(bpage),
buf_page_get_LRU_position(pred_bpage))
!= asc_or_desc)) {
/* Accesses not in the right order */
fail_count++;
pred_bpage = bpage;
}
}
if (fail_count > buf_read_ahead_linear_area
* LINEAR_AREA_THRESHOLD_COEF) {
/* Too many failures: return */
buf_pool_mutex_exit();
return(0);
}
/* If we got this far, we know that enough pages in the area have
been accessed in the right order: linear read-ahead can be sensible */
bpage = buf_page_hash_get(space, offset);
if (bpage == NULL) {
buf_pool_mutex_exit();
return(0);
}
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
frame = bpage->zip.data;
break;
case BUF_BLOCK_FILE_PAGE:
frame = ((buf_block_t*) bpage)->frame;
break;
default:
ut_error;
break;
}
/* Read the natural predecessor and successor page addresses from
the page; NOTE that because the calling thread may have an x-latch
on the page, we do not acquire an s-latch on the page, this is to
prevent deadlocks. Even if we read values which are nonsense, the
algorithm will work. */
pred_offset = fil_page_get_prev(frame);
succ_offset = fil_page_get_next(frame);
buf_pool_mutex_exit();
if ((offset == low) && (succ_offset == offset + 1)) {
/* This is ok, we can continue */
new_offset = pred_offset;
} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
/* This is ok, we can continue */
new_offset = succ_offset;
} else {
/* Successor or predecessor not in the right order */
return(0);
}
low = (new_offset / buf_read_ahead_linear_area)
* buf_read_ahead_linear_area;
high = (new_offset / buf_read_ahead_linear_area + 1)
* buf_read_ahead_linear_area;
if ((new_offset != low) && (new_offset != high - 1)) {
/* This is not a border page of the area: return */
return(0);
}
if (high > fil_space_get_size(space)) {
/* The area is not whole, return */
return(0);
}
/* If we got this far, read-ahead can be sensible: do it */
if (ibuf_inside()) {
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
} else {
ibuf_mode = BUF_READ_ANY_PAGE;
}
count = 0;
/* Since Windows XP seems to schedule the i/o handler thread
very eagerly, and consequently it does not wait for the
full read batch to be posted, we use special heuristics here */
os_aio_simulated_put_read_threads_to_sleep();
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
&err, FALSE,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, FALSE, tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: in"
" linear readahead trying to access\n"
"InnoDB: tablespace %lu page %lu,\n"
"InnoDB: but the tablespace does not"
" exist or is just being dropped.\n",
(ulong) space, (ulong) i);
}
}
}
/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
nothing: */
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints && (count > 0)) {
fprintf(stderr,
"LINEAR read-ahead space %lu offset %lu pages %lu\n",
(ulong) space, (ulong) offset, (ulong) count);
}
#endif /* UNIV_DEBUG */
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
++srv_read_ahead_seq;
return(count);
}
/************************************************************************
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
ibool sync, /* in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
const ulint* space_ids, /* in: array of space ids */
const ib_int64_t* space_versions,/* in: the spaces must have
this version number
(timestamp), otherwise we
discard the read; we use this
to cancel reads if DISCARD +
IMPORT may have changed the
tablespace size */
const ulint* page_nos, /* in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored) /* in: number of elements
in the arrays */
{
ulint i;
ut_ad(!ibuf_inside());
#ifdef UNIV_IBUF_DEBUG
ut_a(n_stored < UNIV_PAGE_SIZE);
#endif
while (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
os_thread_sleep(500000);
}
for (i = 0; i < n_stored; i++) {
ulint zip_size = fil_space_get_zip_size(space_ids[i]);
ulint err;
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
goto tablespace_deleted;
}
buf_read_page_low(&err, sync && (i + 1 == n_stored),
BUF_READ_ANY_PAGE, space_ids[i],
zip_size, TRUE, space_versions[i],
page_nos[i]);
if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
tablespace_deleted:
/* We have deleted or are deleting the single-table
tablespace: remove the entries for that page */
ibuf_merge_or_delete_for_page(NULL, space_ids[i],
page_nos[i],
zip_size, FALSE);
}
}
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Ibuf merge read-ahead space %lu pages %lu\n",
(ulong) space_ids[0], (ulong) n_stored);
}
#endif /* UNIV_DEBUG */
}
/************************************************************************
Issues read requests for pages which recovery wants to read in. */
UNIV_INTERN
void
buf_read_recv_pages(
/*================*/
ibool sync, /* in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
ulint space, /* in: space id */
ulint zip_size, /* in: compressed page size in
bytes, or 0 */
const ulint* page_nos, /* in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored) /* in: number of page numbers
in the array */
{
ib_int64_t tablespace_version;
ulint count;
ulint err;
ulint i;
zip_size = fil_space_get_zip_size(space);
tablespace_version = fil_space_get_version(space);
for (i = 0; i < n_stored; i++) {
count = 0;
os_aio_print_debug = FALSE;
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000);
count++;
if (count > 100) {
fprintf(stderr,
"InnoDB: Error: InnoDB has waited for"
" 50 seconds for pending\n"
"InnoDB: reads to the buffer pool to"
" be finished.\n"
"InnoDB: Number of pending reads %lu,"
" pending pread calls %lu\n",
(ulong) buf_pool->n_pend_reads,
(ulong)os_file_n_pending_preads);
os_aio_print_debug = TRUE;
}
}
os_aio_print_debug = FALSE;
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, TRUE, tablespace_version,
page_nos[i]);
} else {
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, TRUE,
tablespace_version, page_nos[i]);
}
}
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Recovery applies read-ahead pages %lu\n",
(ulong) n_stored);
}
#endif /* UNIV_DEBUG */
}

9
compile-innodb Executable file
View File

@@ -0,0 +1,9 @@
#! /bin/sh
path=`dirname $0`
. "$path/SETUP.sh"
extra_flags="$pentium_cflags $fast_cflags -g"
extra_configs="$pentium_configs $static_link --with-plugins=innobase"
. "$path/FINISH.sh"

9
compile-innodb-debug Executable file
View File

@@ -0,0 +1,9 @@
#! /bin/sh
path=`dirname $0`
. "$path/SETUP.sh" $@ --with-debug=full
extra_flags="$pentium_cflags $debug_cflags"
extra_configs="$pentium_configs $debug_configs --with-plugins=innobase"
. "$path/FINISH.sh"

737
data/data0data.c Normal file
View File

@@ -0,0 +1,737 @@
/************************************************************************
SQL data field and tuple
(c) 1994-1996 Innobase Oy
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "data0data.h"
#ifdef UNIV_NONINL
#include "data0data.ic"
#endif
#include "rem0rec.h"
#include "rem0cmp.h"
#include "page0page.h"
#include "page0zip.h"
#include "dict0dict.h"
#include "btr0cur.h"
#include <ctype.h>
#ifdef UNIV_DEBUG
/* data pointers of tuple fields are initialized to point here
for error checking */
UNIV_INTERN byte data_error;
/* this is used to fool the compiler in dtuple_validate */
UNIV_INTERN ulint data_dummy;
#endif /* UNIV_DEBUG */
/*************************************************************************
Tests if dfield data length and content is equal to the given. */
UNIV_INTERN
ibool
dfield_data_is_binary_equal(
/*========================*/
/* out: TRUE if equal */
const dfield_t* field, /* in: field */
ulint len, /* in: data length or UNIV_SQL_NULL */
const byte* data) /* in: data */
{
if (len != dfield_get_len(field)) {
return(FALSE);
}
if (len == UNIV_SQL_NULL) {
return(TRUE);
}
if (0 != memcmp(dfield_get_data(field), data, len)) {
return(FALSE);
}
return(TRUE);
}
/****************************************************************
Compare two data tuples, respecting the collation of character fields. */
UNIV_INTERN
int
dtuple_coll_cmp(
/*============*/
/* out: 1, 0 , -1 if tuple1 is greater, equal,
less, respectively, than tuple2 */
const dtuple_t* tuple1, /* in: tuple 1 */
const dtuple_t* tuple2) /* in: tuple 2 */
{
ulint n_fields;
ulint i;
ut_ad(tuple1 && tuple2);
ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(tuple1));
ut_ad(dtuple_check_typed(tuple2));
n_fields = dtuple_get_n_fields(tuple1);
if (n_fields != dtuple_get_n_fields(tuple2)) {
return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
}
for (i = 0; i < n_fields; i++) {
int cmp;
const dfield_t* field1 = dtuple_get_nth_field(tuple1, i);
const dfield_t* field2 = dtuple_get_nth_field(tuple2, i);
cmp = cmp_dfield_dfield(field1, field2);
if (cmp) {
return(cmp);
}
}
return(0);
}
/*************************************************************************
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields) /* in: number of fields */
{
ut_ad(tuple);
tuple->n_fields = n_fields;
tuple->n_fields_cmp = n_fields;
}
/**************************************************************
Checks that a data field is typed. */
static
ibool
dfield_check_typed_no_assert(
/*=========================*/
/* out: TRUE if ok */
const dfield_t* field) /* in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
fprintf(stderr,
"InnoDB: Error: data field type %lu, len %lu\n",
(ulong) dfield_get_type(field)->mtype,
(ulong) dfield_get_len(field));
return(FALSE);
}
return(TRUE);
}
/**************************************************************
Checks that a data tuple is typed. */
UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
/* out: TRUE if ok */
const dtuple_t* tuple) /* in: tuple */
{
const dfield_t* field;
ulint i;
if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
fprintf(stderr,
"InnoDB: Error: index entry has %lu fields\n",
(ulong) dtuple_get_n_fields(tuple));
dump:
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, tuple);
putc('\n', stderr);
return(FALSE);
}
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
field = dtuple_get_nth_field(tuple, i);
if (!dfield_check_typed_no_assert(field)) {
goto dump;
}
}
return(TRUE);
}
/**************************************************************
Checks that a data field is typed. Asserts an error if not. */
UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
/* out: TRUE if ok */
const dfield_t* field) /* in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
fprintf(stderr,
"InnoDB: Error: data field type %lu, len %lu\n",
(ulong) dfield_get_type(field)->mtype,
(ulong) dfield_get_len(field));
ut_error;
}
return(TRUE);
}
/**************************************************************
Checks that a data tuple is typed. Asserts an error if not. */
UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
/* out: TRUE if ok */
const dtuple_t* tuple) /* in: tuple */
{
const dfield_t* field;
ulint i;
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
field = dtuple_get_nth_field(tuple, i);
ut_a(dfield_check_typed(field));
}
return(TRUE);
}
#ifdef UNIV_DEBUG
/**************************************************************
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set. */
UNIV_INTERN
ibool
dtuple_validate(
/*============*/
/* out: TRUE if ok */
const dtuple_t* tuple) /* in: tuple */
{
const dfield_t* field;
const byte* data;
ulint n_fields;
ulint len;
ulint i;
ulint j;
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
n_fields = dtuple_get_n_fields(tuple);
/* We dereference all the data of each field to test
for memory traps */
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
len = dfield_get_len(field);
if (!dfield_is_null(field)) {
data = dfield_get_data(field);
UNIV_MEM_ASSERT_RW(data, len);
for (j = 0; j < len; j++) {
data_dummy += *data; /* fool the compiler not
to optimize out this
code */
data++;
}
}
}
ut_a(dtuple_check_typed(tuple));
return(TRUE);
}
#endif /* UNIV_DEBUG */
/*****************************************************************
Pretty prints a dfield value according to its data type. */
UNIV_INTERN
void
dfield_print(
/*=========*/
const dfield_t* dfield) /* in: dfield */
{
const byte* data;
ulint len;
ulint i;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
if (dfield_is_null(dfield)) {
fputs("NULL", stderr);
return;
}
switch (dtype_get_mtype(dfield_get_type(dfield))) {
case DATA_CHAR:
case DATA_VARCHAR:
for (i = 0; i < len; i++) {
int c = *data++;
putc(isprint(c) ? c : ' ', stderr);
}
if (dfield_is_ext(dfield)) {
fputs("(external)", stderr);
}
break;
case DATA_INT:
ut_a(len == 4); /* only works for 32-bit integers */
fprintf(stderr, "%d", (int)mach_read_from_4(data));
break;
default:
ut_error;
}
}
/*****************************************************************
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
const dfield_t* dfield) /* in: dfield */
{
const byte* data;
ulint len;
ulint prtype;
ulint i;
ibool print_also_hex;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
if (dfield_is_null(dfield)) {
fputs("NULL", stderr);
return;
}
prtype = dtype_get_prtype(dfield_get_type(dfield));
switch (dtype_get_mtype(dfield_get_type(dfield))) {
dulint id;
case DATA_INT:
switch (len) {
ulint val;
case 1:
val = mach_read_from_1(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x80;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 2:
val = mach_read_from_2(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x8000;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 3:
val = mach_read_from_3(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x800000;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 4:
val = mach_read_from_4(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x80000000;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 6:
id = mach_read_from_6(data);
fprintf(stderr, "{%lu %lu}",
ut_dulint_get_high(id),
ut_dulint_get_low(id));
break;
case 7:
id = mach_read_from_7(data);
fprintf(stderr, "{%lu %lu}",
ut_dulint_get_high(id),
ut_dulint_get_low(id));
break;
case 8:
id = mach_read_from_8(data);
fprintf(stderr, "{%lu %lu}",
ut_dulint_get_high(id),
ut_dulint_get_low(id));
break;
default:
goto print_hex;
}
break;
case DATA_SYS:
switch (prtype & DATA_SYS_PRTYPE_MASK) {
case DATA_TRX_ID:
id = mach_read_from_6(data);
fprintf(stderr, "trx_id " TRX_ID_FMT,
TRX_ID_PREP_PRINTF(id));
break;
case DATA_ROLL_PTR:
id = mach_read_from_7(data);
fprintf(stderr, "roll_ptr {%lu %lu}",
ut_dulint_get_high(id), ut_dulint_get_low(id));
break;
case DATA_ROW_ID:
id = mach_read_from_6(data);
fprintf(stderr, "row_id {%lu %lu}",
ut_dulint_get_high(id), ut_dulint_get_low(id));
break;
default:
id = mach_dulint_read_compressed(data);
fprintf(stderr, "mix_id {%lu %lu}",
ut_dulint_get_high(id), ut_dulint_get_low(id));
}
break;
case DATA_CHAR:
case DATA_VARCHAR:
print_also_hex = FALSE;
for (i = 0; i < len; i++) {
int c = *data++;
if (!isprint(c)) {
print_also_hex = TRUE;
fprintf(stderr, "\\x%02x", (unsigned char) c);
} else {
putc(c, stderr);
}
}
if (dfield_is_ext(dfield)) {
fputs("(external)", stderr);
}
if (!print_also_hex) {
break;
}
data = dfield_get_data(dfield);
/* fall through */
case DATA_BINARY:
default:
print_hex:
fputs(" Hex: ",stderr);
for (i = 0; i < len; i++) {
fprintf(stderr, "%02lx", (ulint) *data++);
}
if (dfield_is_ext(dfield)) {
fputs("(external)", stderr);
}
}
}
/*****************************************************************
Print a dfield value using ut_print_buf. */
static
void
dfield_print_raw(
/*=============*/
FILE* f, /* in: output stream */
const dfield_t* dfield) /* in: dfield */
{
ulint len = dfield_get_len(dfield);
if (!dfield_is_null(dfield)) {
ulint print_len = ut_min(len, 1000);
ut_print_buf(f, dfield_get_data(dfield), print_len);
if (len != print_len) {
fprintf(f, "(total %lu bytes%s)",
(ulong) len,
dfield_is_ext(dfield) ? ", external" : "");
}
} else {
fputs(" SQL NULL", f);
}
}
/**************************************************************
The following function prints the contents of a tuple. */
UNIV_INTERN
void
dtuple_print(
/*=========*/
FILE* f, /* in: output stream */
const dtuple_t* tuple) /* in: tuple */
{
ulint n_fields;
ulint i;
n_fields = dtuple_get_n_fields(tuple);
fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
for (i = 0; i < n_fields; i++) {
fprintf(f, " %lu:", (ulong) i);
dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
putc(';', f);
}
putc('\n', f);
ut_ad(dtuple_validate(tuple));
}
/******************************************************************
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index. */
UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
/* out, own: created big record vector,
NULL if we are not able to shorten
the entry enough, i.e., if there are
too many fixed-length or short fields
in entry or the index is clustered */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in/out: index entry */
ulint* n_ext) /* in/out: number of
externally stored columns */
{
mem_heap_t* heap;
big_rec_t* vector;
dfield_t* dfield;
dict_field_t* ifield;
ulint size;
ulint n_fields;
ulint local_len;
ulint local_prefix_len;
if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
return(NULL);
}
if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
/* up to MySQL 5.1: store a 768-byte prefix locally */
local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
} else {
/* new-format table: do not store any BLOB prefix locally */
local_len = BTR_EXTERN_FIELD_REF_SIZE;
}
ut_a(dtuple_check_typed_no_assert(entry));
size = rec_get_converted_size(index, entry, *n_ext);
if (UNIV_UNLIKELY(size > 1000000000)) {
fprintf(stderr,
"InnoDB: Warning: tuple size very big: %lu\n",
(ulong) size);
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, entry);
putc('\n', stderr);
}
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t) + 1000);
vector = mem_heap_alloc(heap, sizeof(big_rec_t));
vector->heap = heap;
vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t));
/* Decide which fields to shorten: the algorithm is to look for
a variable-length field that yields the biggest savings when
stored externally */
n_fields = 0;
while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
*n_ext),
dict_table_is_comp(index->table),
dict_table_zip_size(index->table))) {
ulint i;
ulint longest = 0;
ulint longest_i = ULINT_MAX;
byte* data;
big_rec_field_t* b;
for (i = dict_index_get_n_unique_in_tree(index);
i < dtuple_get_n_fields(entry); i++) {
ulint savings;
dfield = dtuple_get_nth_field(entry, i);
ifield = dict_index_get_nth_field(index, i);
/* Skip fixed-length, NULL, externally stored,
or short columns */
if (ifield->fixed_len
|| dfield_is_null(dfield)
|| dfield_is_ext(dfield)
|| dfield_get_len(dfield) <= local_len
|| dfield_get_len(dfield)
<= BTR_EXTERN_FIELD_REF_SIZE * 2) {
goto skip_field;
}
savings = dfield_get_len(dfield) - local_len;
/* Check that there would be savings */
if (longest >= savings) {
goto skip_field;
}
longest_i = i;
longest = savings;
skip_field:
continue;
}
if (!longest) {
/* Cannot shorten more */
mem_heap_free(heap);
return(NULL);
}
/* Move data from field longest_i to big rec vector.
We store the first bytes locally to the record. Then
we can calculate all ordering fields in all indexes
from locally stored data. */
dfield = dtuple_get_nth_field(entry, longest_i);
ifield = dict_index_get_nth_field(index, longest_i);
local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
b = &vector->fields[n_fields];
b->field_no = longest_i;
b->len = dfield_get_len(dfield) - local_prefix_len;
b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
/* Allocate the locally stored part of the column. */
data = mem_heap_alloc(heap, local_len);
/* Copy the local prefix. */
memcpy(data, dfield_get_data(dfield), local_prefix_len);
/* Clear the extern field reference (BLOB pointer). */
memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
#if 0
/* The following would fail the Valgrind checks in
page_cur_insert_rec_low() and page_cur_insert_rec_zip().
The BLOB pointers in the record will be initialized after
the record and the BLOBs have been written. */
UNIV_MEM_ALLOC(data + local_prefix_len,
BTR_EXTERN_FIELD_REF_SIZE);
#endif
dfield_set_data(dfield, data, local_len);
dfield_set_ext(dfield);
n_fields++;
(*n_ext)++;
ut_ad(n_fields < dtuple_get_n_fields(entry));
}
vector->n_fields = n_fields;
return(vector);
}
/******************************************************************
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index __attribute__((unused)), /* in: index */
dtuple_t* entry, /* in: entry whose data was put to vector */
big_rec_t* vector) /* in, own: big rec vector; it is
freed in this function */
{
big_rec_field_t* b = vector->fields;
const big_rec_field_t* const end = b + vector->n_fields;
for (; b < end; b++) {
dfield_t* dfield;
ulint local_len;
dfield = dtuple_get_nth_field(entry, b->field_no);
local_len = dfield_get_len(dfield);
ut_ad(dfield_is_ext(dfield));
ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
dfield_set_data(dfield,
(char*) b->data - local_len,
b->len + local_len);
}
mem_heap_free(vector->heap);
}

284
data/data0type.c Normal file
View File

@@ -0,0 +1,284 @@
/******************************************************
Data types
(c) 1996 Innobase Oy
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#include "data0type.h"
#ifdef UNIV_NONINL
#include "data0type.ic"
#endif
/**********************************************************************
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.
NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
this function, you MUST change also the prototype here! */
UNIV_INTERN
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
/* out: number of bytes occupied by the first
n characters */
ulint charset_id, /* in: character set id */
ulint prefix_len, /* in: prefix length in bytes of the index
(this has to be divided by mbmaxlen to get the
number of CHARACTERS n in the prefix) */
ulint data_len, /* in: length of the string in bytes */
const char* str); /* in: character string */
/* At the database startup we store the default-charset collation number of
this MySQL installation to this global variable. If we have < 4.1.2 format
column definitions, or records in the insert buffer, we use this
charset-collation code for them. */
UNIV_INTERN ulint data_mysql_default_charset_coll;
/*************************************************************************
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy. */
UNIV_INTERN
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
/* out: length of the prefix,
in bytes */
ulint prtype, /* in: precise type */
ulint mbminlen, /* in: minimum length of a
multi-byte character */
ulint mbmaxlen, /* in: maximum length of a
multi-byte character */
ulint prefix_len, /* in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
ulint data_len, /* in: length of str (in bytes) */
const char* str) /* in: the string whose prefix
length is being determined */
{
#ifndef UNIV_HOTBACKUP
ut_a(data_len != UNIV_SQL_NULL);
ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
if (mbminlen != mbmaxlen) {
ut_a(!(prefix_len % mbmaxlen));
return(innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(prtype),
prefix_len, data_len, str));
}
if (prefix_len < data_len) {
return(prefix_len);
}
return(data_len);
#else /* UNIV_HOTBACKUP */
/* This function depends on MySQL code that is not included in
InnoDB Hot Backup builds. Besides, this function should never
be called in InnoDB Hot Backup. */
ut_error;
#endif /* UNIV_HOTBACKUP */
}
/*************************************************************************
Checks if a data main type is a string type. Also a BLOB is considered a
string type. */
UNIV_INTERN
ibool
dtype_is_string_type(
/*=================*/
/* out: TRUE if string type */
ulint mtype) /* in: InnoDB main data type code: DATA_CHAR, ... */
{
if (mtype <= DATA_BLOB
|| mtype == DATA_MYSQL
|| mtype == DATA_VARMYSQL) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE. */
UNIV_INTERN
ibool
dtype_is_binary_string_type(
/*========================*/
/* out: TRUE if binary string type */
ulint mtype, /* in: main data type */
ulint prtype) /* in: precise type */
{
if ((mtype == DATA_FIXBINARY)
|| (mtype == DATA_BINARY)
|| (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE. */
UNIV_INTERN
ibool
dtype_is_non_binary_string_type(
/*============================*/
/* out: TRUE if non-binary string type */
ulint mtype, /* in: main data type */
ulint prtype) /* in: precise type */
{
if (dtype_is_string_type(mtype) == TRUE
&& dtype_is_binary_string_type(mtype, prtype) == FALSE) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */
UNIV_INTERN
ulint
dtype_form_prtype(
/*==============*/
ulint old_prtype, /* in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
ulint charset_coll) /* in: MySQL charset-collation code */
{
ut_a(old_prtype < 256 * 256);
ut_a(charset_coll < 256);
return(old_prtype + (charset_coll << 16));
}
/*************************************************************************
Validates a data type structure. */
UNIV_INTERN
ibool
dtype_validate(
/*===========*/
/* out: TRUE if ok */
const dtype_t* type) /* in: type struct to validate */
{
ut_a(type);
ut_a(type->mtype >= DATA_VARCHAR);
ut_a(type->mtype <= DATA_MYSQL);
if (type->mtype == DATA_SYS) {
ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
}
ut_a(type->mbminlen <= type->mbmaxlen);
return(TRUE);
}
/*************************************************************************
Prints a data type structure. */
UNIV_INTERN
void
dtype_print(
/*========*/
const dtype_t* type) /* in: type */
{
ulint mtype;
ulint prtype;
ulint len;
ut_a(type);
mtype = type->mtype;
prtype = type->prtype;
switch (mtype) {
case DATA_VARCHAR:
fputs("DATA_VARCHAR", stderr);
break;
case DATA_CHAR:
fputs("DATA_CHAR", stderr);
break;
case DATA_BINARY:
fputs("DATA_BINARY", stderr);
break;
case DATA_FIXBINARY:
fputs("DATA_FIXBINARY", stderr);
break;
case DATA_BLOB:
fputs("DATA_BLOB", stderr);
break;
case DATA_INT:
fputs("DATA_INT", stderr);
break;
case DATA_MYSQL:
fputs("DATA_MYSQL", stderr);
break;
case DATA_SYS:
fputs("DATA_SYS", stderr);
break;
default:
fprintf(stderr, "type %lu", (ulong) mtype);
break;
}
len = type->len;
if ((type->mtype == DATA_SYS)
|| (type->mtype == DATA_VARCHAR)
|| (type->mtype == DATA_CHAR)) {
putc(' ', stderr);
if (prtype == DATA_ROW_ID) {
fputs("DATA_ROW_ID", stderr);
len = DATA_ROW_ID_LEN;
} else if (prtype == DATA_ROLL_PTR) {
fputs("DATA_ROLL_PTR", stderr);
len = DATA_ROLL_PTR_LEN;
} else if (prtype == DATA_TRX_ID) {
fputs("DATA_TRX_ID", stderr);
len = DATA_TRX_ID_LEN;
} else if (prtype == DATA_ENGLISH) {
fputs("DATA_ENGLISH", stderr);
} else {
fprintf(stderr, "prtype %lu", (ulong) prtype);
}
} else {
if (prtype & DATA_UNSIGNED) {
fputs(" DATA_UNSIGNED", stderr);
}
if (prtype & DATA_BINARY_TYPE) {
fputs(" DATA_BINARY_TYPE", stderr);
}
if (prtype & DATA_NOT_NULL) {
fputs(" DATA_NOT_NULL", stderr);
}
}
fprintf(stderr, " len %lu", (ulong) len);
}

441
dict/dict0boot.c Normal file
View File

@@ -0,0 +1,441 @@
/******************************************************
Data dictionary creation and booting
(c) 1996 Innobase Oy
Created 4/18/1996 Heikki Tuuri
*******************************************************/
#include "dict0boot.h"
#ifdef UNIV_NONINL
#include "dict0boot.ic"
#endif
#include "dict0crea.h"
#include "btr0btr.h"
#include "dict0load.h"
#include "dict0load.h"
#include "trx0trx.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "log0recv.h"
#include "os0file.h"
/**************************************************************************
Gets a pointer to the dictionary header and x-latches its page. */
UNIV_INTERN
dict_hdr_t*
dict_hdr_get(
/*=========*/
/* out: pointer to the dictionary header,
page x-latched */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
dict_hdr_t* header;
block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
RW_X_LATCH, mtr);
header = DICT_HDR + buf_block_get_frame(block);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
#endif /* UNIV_SYNC_DEBUG */
return(header);
}
/**************************************************************************
Returns a new table, index, or tree id. */
UNIV_INTERN
dulint
dict_hdr_get_new_id(
/*================*/
/* out: the new id */
ulint type) /* in: DICT_HDR_ROW_ID, ... */
{
dict_hdr_t* dict_hdr;
dulint id;
mtr_t mtr;
ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID));
mtr_start(&mtr);
dict_hdr = dict_hdr_get(&mtr);
id = mtr_read_dulint(dict_hdr + type, &mtr);
id = ut_dulint_add(id, 1);
mlog_write_dulint(dict_hdr + type, id, &mtr);
mtr_commit(&mtr);
return(id);
}
/**************************************************************************
Writes the current value of the row id counter to the dictionary header file
page. */
UNIV_INTERN
void
dict_hdr_flush_row_id(void)
/*=======================*/
{
dict_hdr_t* dict_hdr;
dulint id;
mtr_t mtr;
ut_ad(mutex_own(&(dict_sys->mutex)));
id = dict_sys->row_id;
mtr_start(&mtr);
dict_hdr = dict_hdr_get(&mtr);
mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
mtr_commit(&mtr);
}
/*********************************************************************
Creates the file page for the dictionary header. This function is
called only at the database creation. */
static
ibool
dict_hdr_create(
/*============*/
/* out: TRUE if succeed */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
dict_hdr_t* dict_header;
ulint root_page_no;
ut_ad(mtr);
/* Create the dictionary header file block in a new, allocated file
segment in the system tablespace */
block = fseg_create(DICT_HDR_SPACE, 0,
DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
dict_header = dict_hdr_get(mtr);
/* Start counting row, table, index, and tree ids from
DICT_HDR_FIRST_ID */
mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
/* Obsolete, but we must initialize it to 0 anyway. */
mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
/* Create the B-tree roots for the clustered indexes of the basic
system tables */
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_TABLES_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
DICT_TABLE_IDS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
srv_sys->dummy_ind1, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
return(TRUE);
}
/*********************************************************************
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
UNIV_INTERN
void
dict_boot(void)
/*===========*/
{
dict_table_t* table;
dict_index_t* index;
dict_hdr_t* dict_hdr;
mem_heap_t* heap;
mtr_t mtr;
ulint error;
mtr_start(&mtr);
/* Create the hash tables etc. */
dict_init();
heap = mem_heap_create(450);
mutex_enter(&(dict_sys->mutex));
/* Get the dictionary header */
dict_hdr = dict_hdr_get(&mtr);
/* Because we only write new row ids to disk-based data structure
(dictionary header) when it is divisible by
DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
the latest value of the row id counter. Therefore we advance
the counter at the database startup to avoid overlapping values.
Note that when a user after database startup first time asks for
a new row id, then because the counter is now divisible by
..._MARGIN, it will immediately be updated to the disk-based
header. */
dict_sys->row_id = ut_dulint_add(
ut_dulint_align_up(mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
&mtr),
DICT_HDR_ROW_ID_WRITE_MARGIN),
DICT_HDR_ROW_ID_WRITE_MARGIN);
/* Insert into the dictionary cache the descriptions of the basic
system tables */
/*-------------------------*/
table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
/* TYPE is either DICT_TABLE_ORDINARY, or (TYPE & DICT_TF_COMPACT)
and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
table->id = DICT_TABLES_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_tables = table;
mem_heap_empty(heap);
index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 1);
dict_mem_index_add_field(index, "NAME", 0);
index->id = DICT_TABLES_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_TABLES,
MLOG_4BYTES, &mtr));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
index = dict_mem_index_create("SYS_TABLES", "ID_IND",
DICT_HDR_SPACE, DICT_UNIQUE, 1);
dict_mem_index_add_field(index, "ID", 0);
index->id = DICT_TABLE_IDS_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_TABLE_IDS,
MLOG_4BYTES, &mtr));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
table->id = DICT_COLUMNS_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_columns = table;
mem_heap_empty(heap);
index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "TABLE_ID", 0);
dict_mem_index_add_field(index, "POS", 0);
index->id = DICT_COLUMNS_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_COLUMNS,
MLOG_4BYTES, &mtr));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
/* The '+ 2' below comes from the 2 system fields */
#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
#endif
#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
#endif
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif
table->id = DICT_INDEXES_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_indexes = table;
mem_heap_empty(heap);
index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "TABLE_ID", 0);
dict_mem_index_add_field(index, "ID", 0);
index->id = DICT_INDEXES_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_INDEXES,
MLOG_4BYTES, &mtr));
ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
table->id = DICT_FIELDS_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_fields = table;
mem_heap_free(heap);
index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "INDEX_ID", 0);
dict_mem_index_add_field(index, "POS", 0);
index->id = DICT_FIELDS_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_FIELDS,
MLOG_4BYTES, &mtr));
ut_a(error == DB_SUCCESS);
mtr_commit(&mtr);
/*-------------------------*/
/* Initialize the insert buffer table and index for each tablespace */
ibuf_init_at_db_start();
/* Load definitions of other indexes on system tables */
dict_load_sys_table(dict_sys->sys_tables);
dict_load_sys_table(dict_sys->sys_columns);
dict_load_sys_table(dict_sys->sys_indexes);
dict_load_sys_table(dict_sys->sys_fields);
mutex_exit(&(dict_sys->mutex));
}
/*********************************************************************
Inserts the basic system table data into themselves in the database
creation. */
static
void
dict_insert_initial_data(void)
/*==========================*/
{
/* Does nothing yet */
}
/*********************************************************************
Creates and initializes the data dictionary at the database creation. */
UNIV_INTERN
void
dict_create(void)
/*=============*/
{
mtr_t mtr;
mtr_start(&mtr);
dict_hdr_create(&mtr);
mtr_commit(&mtr);
dict_boot();
dict_insert_initial_data();
}

1502
dict/dict0crea.c Normal file

File diff suppressed because it is too large Load Diff

4601
dict/dict0dict.c Normal file

File diff suppressed because it is too large Load Diff

1443
dict/dict0load.c Normal file

File diff suppressed because it is too large Load Diff

291
dict/dict0mem.c Normal file
View File

@@ -0,0 +1,291 @@
/**********************************************************************
Data dictionary memory object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
#include "dict0mem.h"
#ifdef UNIV_NONINL
#include "dict0mem.ic"
#endif
#include "rem0rec.h"
#include "data0type.h"
#include "mach0data.h"
#include "dict0dict.h"
#include "que0que.h"
#include "pars0pars.h"
#include "lock0lock.h"
#define DICT_HEAP_SIZE 100 /* initial memory heap size when
creating a table or index object */
/**************************************************************************
Creates a table memory object. */
UNIV_INTERN
dict_table_t*
dict_mem_table_create(
/*==================*/
/* out, own: table object */
const char* name, /* in: table name */
ulint space, /* in: space where the clustered index of
the table is placed; this parameter is
ignored if the table is made a member of
a cluster */
ulint n_cols, /* in: number of columns */
ulint flags) /* in: table flags */
{
dict_table_t* table;
mem_heap_t* heap;
ut_ad(name);
ut_a(!(flags & (~0 << DICT_TF_BITS)));
heap = mem_heap_create(DICT_HEAP_SIZE);
table = mem_heap_zalloc(heap, sizeof(dict_table_t));
table->heap = heap;
table->flags = (unsigned int) flags;
table->name = mem_heap_strdup(heap, name);
table->space = (unsigned int) space;
table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
/* The actual increment value will be set by MySQL, we simply
default to 1 here.*/
table->autoinc_increment = 1;
#ifdef UNIV_DEBUG
table->magic_n = DICT_TABLE_MAGIC_N;
#endif /* UNIV_DEBUG */
return(table);
}
/********************************************************************
Free a table memory object. */
UNIV_INTERN
void
dict_mem_table_free(
/*================*/
dict_table_t* table) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
mutex_free(&(table->autoinc_mutex));
mem_heap_free(table->heap);
}
/********************************************************************
Append 'name' to 'col_names' (@see dict_table_t::col_names). */
static
const char*
dict_add_col_name(
/*==============*/
/* out: new column names array */
const char* col_names, /* in: existing column names, or
NULL */
ulint cols, /* in: number of existing columns */
const char* name, /* in: new column name */
mem_heap_t* heap) /* in: heap */
{
ulint old_len;
ulint new_len;
ulint total_len;
char* res;
ut_ad(!cols == !col_names);
/* Find out length of existing array. */
if (col_names) {
const char* s = col_names;
ulint i;
for (i = 0; i < cols; i++) {
s += strlen(s) + 1;
}
old_len = s - col_names;
} else {
old_len = 0;
}
new_len = strlen(name) + 1;
total_len = old_len + new_len;
res = mem_heap_alloc(heap, total_len);
if (old_len > 0) {
memcpy(res, col_names, old_len);
}
memcpy(res + old_len, name, new_len);
return(res);
}
/**************************************************************************
Adds a column definition to a table. */
UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
dict_table_t* table, /* in: table */
mem_heap_t* heap, /* in: temporary memory heap, or NULL */
const char* name, /* in: column name, or NULL */
ulint mtype, /* in: main datatype */
ulint prtype, /* in: precise type */
ulint len) /* in: precision */
{
dict_col_t* col;
ulint mbminlen;
ulint mbmaxlen;
ulint i;
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(!heap == !name);
i = table->n_def++;
if (name) {
if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
heap = table->heap;
}
if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) {
/* All preceding column names are empty. */
char* s = mem_heap_zalloc(heap, table->n_def);
table->col_names = s;
}
table->col_names = dict_add_col_name(table->col_names,
i, name, heap);
}
col = dict_table_get_nth_col(table, i);
col->ind = (unsigned int) i;
col->ord_part = 0;
col->mtype = (unsigned int) mtype;
col->prtype = (unsigned int) prtype;
col->len = (unsigned int) len;
dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
col->mbminlen = (unsigned int) mbminlen;
col->mbmaxlen = (unsigned int) mbmaxlen;
}
/**************************************************************************
Creates an index memory object. */
UNIV_INTERN
dict_index_t*
dict_mem_index_create(
/*==================*/
/* out, own: index object */
const char* table_name, /* in: table name */
const char* index_name, /* in: index name */
ulint space, /* in: space where the index tree is
placed, ignored if the index is of
the clustered type */
ulint type, /* in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields) /* in: number of fields */
{
dict_index_t* index;
mem_heap_t* heap;
ut_ad(table_name && index_name);
heap = mem_heap_create(DICT_HEAP_SIZE);
index = mem_heap_zalloc(heap, sizeof(dict_index_t));
index->heap = heap;
index->type = type;
index->space = (unsigned int) space;
index->name = mem_heap_strdup(heap, index_name);
index->table_name = table_name;
index->n_fields = (unsigned int) n_fields;
index->fields = mem_heap_alloc(heap, 1 + n_fields
* sizeof(dict_field_t));
/* The '1 +' above prevents allocation
of an empty mem block */
#ifdef UNIV_DEBUG
index->magic_n = DICT_INDEX_MAGIC_N;
#endif /* UNIV_DEBUG */
return(index);
}
/**************************************************************************
Creates and initializes a foreign constraint memory object. */
UNIV_INTERN
dict_foreign_t*
dict_mem_foreign_create(void)
/*=========================*/
/* out, own: foreign constraint struct */
{
dict_foreign_t* foreign;
mem_heap_t* heap;
heap = mem_heap_create(100);
foreign = mem_heap_zalloc(heap, sizeof(dict_foreign_t));
foreign->heap = heap;
return(foreign);
}
/**************************************************************************
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /* in: index */
const char* name, /* in: column name */
ulint prefix_len) /* in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
{
dict_field_t* field;
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
index->n_def++;
field = dict_index_get_nth_field(index, index->n_def - 1);
field->name = name;
field->prefix_len = (unsigned int) prefix_len;
}
/**************************************************************************
Frees an index memory object. */
UNIV_INTERN
void
dict_mem_index_free(
/*================*/
dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
mem_heap_free(index->heap);
}

48
dyn/dyn0dyn.c Normal file
View File

@@ -0,0 +1,48 @@
/******************************************************
The dynamically allocated array
(c) 1996 Innobase Oy
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#include "dyn0dyn.h"
#ifdef UNIV_NONINL
#include "dyn0dyn.ic"
#endif
/****************************************************************
Adds a new block to a dyn array. */
UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
/* out: created block */
dyn_array_t* arr) /* in: dyn array */
{
mem_heap_t* heap;
dyn_block_t* block;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
if (arr->heap == NULL) {
UT_LIST_INIT(arr->base);
UT_LIST_ADD_FIRST(list, arr->base, arr);
arr->heap = mem_heap_create(sizeof(dyn_block_t));
}
block = dyn_array_get_last_block(arr);
block->used = block->used | DYN_BLOCK_FULL_FLAG;
heap = arr->heap;
block = mem_heap_alloc(heap, sizeof(dyn_block_t));
block->used = 0;
UT_LIST_ADD_LAST(list, arr->base, block);
return(block);
}

835
eval/eval0eval.c Normal file
View File

@@ -0,0 +1,835 @@
/******************************************************
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
(c) 1997 Innobase Oy
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#include "eval0eval.h"
#ifdef UNIV_NONINL
#include "eval0eval.ic"
#endif
#include "data0data.h"
#include "row0sel.h"
/* The RND function seed */
static ulint eval_rnd = 128367121;
/* Dummy adress used when we should allocate a buffer of size 0 in
the function below */
static byte eval_dummy;
/*********************************************************************
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has an allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field. */
UNIV_INTERN
byte*
eval_node_alloc_val_buf(
/*====================*/
/* out: pointer to allocated buffer */
que_node_t* node, /* in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size) /* in: buffer size */
{
dfield_t* dfield;
byte* data;
ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
|| que_node_get_type(node) == QUE_NODE_FUNC);
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (data && data != &eval_dummy) {
mem_free(data);
}
if (size == 0) {
data = &eval_dummy;
} else {
data = mem_alloc(size);
}
que_node_set_val_buf_size(node, size);
dfield_set_data(dfield, data, size);
return(data);
}
/*********************************************************************
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
que_node_t* node) /* in: query graph node */
{
dfield_t* dfield;
byte* data;
ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
|| que_node_get_type(node) == QUE_NODE_FUNC);
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (que_node_get_val_buf_size(node) > 0) {
ut_a(data);
mem_free(data);
}
}
/*********************************************************************
Evaluates a comparison node. */
UNIV_INTERN
ibool
eval_cmp(
/*=====*/
/* out: the result of the comparison */
func_node_t* cmp_node) /* in: comparison node */
{
que_node_t* arg1;
que_node_t* arg2;
int res;
ibool val;
int func;
ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
arg1 = cmp_node->args;
arg2 = que_node_get_next(arg1);
res = cmp_dfield_dfield(que_node_get_val(arg1),
que_node_get_val(arg2));
val = TRUE;
func = cmp_node->func;
if (func == '=') {
if (res != 0) {
val = FALSE;
}
} else if (func == '<') {
if (res != -1) {
val = FALSE;
}
} else if (func == PARS_LE_TOKEN) {
if (res == 1) {
val = FALSE;
}
} else if (func == PARS_NE_TOKEN) {
if (res == 0) {
val = FALSE;
}
} else if (func == PARS_GE_TOKEN) {
if (res == -1) {
val = FALSE;
}
} else {
ut_ad(func == '>');
if (res != 1) {
val = FALSE;
}
}
eval_node_set_ibool_val(cmp_node, val);
return(val);
}
/*********************************************************************
Evaluates a logical operation node. */
UNIV_INLINE
void
eval_logical(
/*=========*/
func_node_t* logical_node) /* in: logical operation node */
{
que_node_t* arg1;
que_node_t* arg2;
ibool val1;
ibool val2 = 0; /* remove warning */
ibool val = 0; /* remove warning */
int func;
ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
arg1 = logical_node->args;
arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
val1 = eval_node_get_ibool_val(arg1);
if (arg2) {
val2 = eval_node_get_ibool_val(arg2);
}
func = logical_node->func;
if (func == PARS_AND_TOKEN) {
val = val1 & val2;
} else if (func == PARS_OR_TOKEN) {
val = val1 | val2;
} else if (func == PARS_NOT_TOKEN) {
val = TRUE - val1;
} else {
ut_error;
}
eval_node_set_ibool_val(logical_node, val);
}
/*********************************************************************
Evaluates an arithmetic operation node. */
UNIV_INLINE
void
eval_arith(
/*=======*/
func_node_t* arith_node) /* in: arithmetic operation node */
{
que_node_t* arg1;
que_node_t* arg2;
lint val1;
lint val2 = 0; /* remove warning */
lint val;
int func;
ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
arg1 = arith_node->args;
arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
val1 = eval_node_get_int_val(arg1);
if (arg2) {
val2 = eval_node_get_int_val(arg2);
}
func = arith_node->func;
if (func == '+') {
val = val1 + val2;
} else if ((func == '-') && arg2) {
val = val1 - val2;
} else if (func == '-') {
val = -val1;
} else if (func == '*') {
val = val1 * val2;
} else {
ut_ad(func == '/');
val = val1 / val2;
}
eval_node_set_int_val(arith_node, val);
}
/*********************************************************************
Evaluates an aggregate operation node. */
UNIV_INLINE
void
eval_aggregate(
/*===========*/
func_node_t* node) /* in: aggregate operation node */
{
que_node_t* arg;
lint val;
lint arg_val;
int func;
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
val = eval_node_get_int_val(node);
func = node->func;
if (func == PARS_COUNT_TOKEN) {
val = val + 1;
} else {
ut_ad(func == PARS_SUM_TOKEN);
arg = node->args;
arg_val = eval_node_get_int_val(arg);
val = val + arg_val;
}
eval_node_set_int_val(node, val);
}
/*********************************************************************
Evaluates a predefined function node where the function is not relevant
in benchmarks. */
static
void
eval_predefined_2(
/*==============*/
func_node_t* func_node) /* in: predefined function node */
{
que_node_t* arg;
que_node_t* arg1;
que_node_t* arg2 = 0; /* remove warning (??? bug ???) */
lint int_val;
byte* data;
ulint len1;
ulint len2;
int func;
ulint i;
ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
arg1 = func_node->args;
if (arg1) {
arg2 = que_node_get_next(arg1);
}
func = func_node->func;
if (func == PARS_PRINTF_TOKEN) {
arg = arg1;
while (arg) {
dfield_print(que_node_get_val(arg));
arg = que_node_get_next(arg);
}
putc('\n', stderr);
} else if (func == PARS_ASSERT_TOKEN) {
if (!eval_node_get_ibool_val(arg1)) {
fputs("SQL assertion fails in a stored procedure!\n",
stderr);
}
ut_a(eval_node_get_ibool_val(arg1));
/* This function, or more precisely, a debug procedure,
returns no value */
} else if (func == PARS_RND_TOKEN) {
len1 = (ulint)eval_node_get_int_val(arg1);
len2 = (ulint)eval_node_get_int_val(arg2);
ut_ad(len2 >= len1);
if (len2 > len1) {
int_val = (lint) (len1
+ (eval_rnd % (len2 - len1 + 1)));
} else {
int_val = (lint) len1;
}
eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
eval_node_set_int_val(func_node, int_val);
} else if (func == PARS_RND_STR_TOKEN) {
len1 = (ulint)eval_node_get_int_val(arg1);
data = eval_node_ensure_val_buf(func_node, len1);
for (i = 0; i < len1; i++) {
data[i] = (byte)(97 + (eval_rnd % 3));
eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
}
} else {
ut_error;
}
}
/*********************************************************************
Evaluates a notfound-function node. */
UNIV_INLINE
void
eval_notfound(
/*==========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
sym_node_t* cursor;
sel_node_t* sel_node;
ibool ibool_val;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
cursor = arg1;
ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
if (cursor->token_type == SYM_LIT) {
ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
"SQL", 3) == 0);
sel_node = cursor->sym_table->query_graph->last_sel_node;
} else {
sel_node = cursor->alias->cursor_def;
}
if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
ibool_val = TRUE;
} else {
ibool_val = FALSE;
}
eval_node_set_ibool_val(func_node, ibool_val);
}
/*********************************************************************
Evaluates a substr-function node. */
UNIV_INLINE
void
eval_substr(
/*========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
que_node_t* arg3;
dfield_t* dfield;
byte* str1;
ulint len1;
ulint len2;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
arg3 = que_node_get_next(arg2);
str1 = dfield_get_data(que_node_get_val(arg1));
len1 = (ulint)eval_node_get_int_val(arg2);
len2 = (ulint)eval_node_get_int_val(arg3);
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1 + len1, len2);
}
/*********************************************************************
Evaluates a replstr-procedure node. */
static
void
eval_replstr(
/*=========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
que_node_t* arg3;
que_node_t* arg4;
byte* str1;
byte* str2;
ulint len1;
ulint len2;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
arg3 = que_node_get_next(arg2);
arg4 = que_node_get_next(arg3);
str1 = dfield_get_data(que_node_get_val(arg1));
str2 = dfield_get_data(que_node_get_val(arg2));
len1 = (ulint)eval_node_get_int_val(arg3);
len2 = (ulint)eval_node_get_int_val(arg4);
if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
|| (dfield_get_len(que_node_get_val(arg2)) < len2)) {
ut_error;
}
ut_memcpy(str1 + len1, str2, len2);
}
/*********************************************************************
Evaluates an instr-function node. */
static
void
eval_instr(
/*=======*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
dfield_t* dfield1;
dfield_t* dfield2;
lint int_val;
byte* str1;
byte* str2;
byte match_char;
ulint len1;
ulint len2;
ulint i;
ulint j;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
dfield1 = que_node_get_val(arg1);
dfield2 = que_node_get_val(arg2);
str1 = dfield_get_data(dfield1);
str2 = dfield_get_data(dfield2);
len1 = dfield_get_len(dfield1);
len2 = dfield_get_len(dfield2);
if (len2 == 0) {
ut_error;
}
match_char = str2[0];
for (i = 0; i < len1; i++) {
/* In this outer loop, the number of matched characters is 0 */
if (str1[i] == match_char) {
if (i + len2 > len1) {
break;
}
for (j = 1;; j++) {
/* We have already matched j characters */
if (j == len2) {
int_val = i + 1;
goto match_found;
}
if (str1[i + j] != str2[j]) {
break;
}
}
}
}
int_val = 0;
match_found:
eval_node_set_int_val(func_node, int_val);
}
/*********************************************************************
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_binary_to_number(
/*==================*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
dfield_t* dfield;
byte* str1;
byte* str2;
ulint len1;
ulint int_val;
arg1 = func_node->args;
dfield = que_node_get_val(arg1);
str1 = dfield_get_data(dfield);
len1 = dfield_get_len(dfield);
if (len1 > 4) {
ut_error;
}
if (len1 == 4) {
str2 = str1;
} else {
int_val = 0;
str2 = (byte*)&int_val;
ut_memcpy(str2 + (4 - len1), str1, len1);
}
eval_node_copy_and_alloc_val(func_node, str2, 4);
}
/*********************************************************************
Evaluates a predefined function node. */
static
void
eval_concat(
/*========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg;
dfield_t* dfield;
byte* data;
ulint len;
ulint len1;
arg = func_node->args;
len = 0;
while (arg) {
len1 = dfield_get_len(que_node_get_val(arg));
len += len1;
arg = que_node_get_next(arg);
}
data = eval_node_ensure_val_buf(func_node, len);
arg = func_node->args;
len = 0;
while (arg) {
dfield = que_node_get_val(arg);
len1 = dfield_get_len(dfield);
ut_memcpy(data + len, dfield_get_data(dfield), len1);
len += len1;
arg = que_node_get_next(arg);
}
}
/*********************************************************************
Evaluates a predefined function node. If the first argument is an integer,
this function looks at the second argument which is the integer length in
bytes, and converts the integer to a VARCHAR.
If the first argument is of some other type, this function converts it to
BINARY. */
UNIV_INLINE
void
eval_to_binary(
/*===========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
dfield_t* dfield;
byte* str1;
ulint len;
ulint len1;
arg1 = func_node->args;
str1 = dfield_get_data(que_node_get_val(arg1));
if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
len = dfield_get_len(que_node_get_val(arg1));
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1, len);
return;
}
arg2 = que_node_get_next(arg1);
len1 = (ulint)eval_node_get_int_val(arg2);
if (len1 > 4) {
ut_error;
}
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1 + (4 - len1), len1);
}
/*********************************************************************
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_predefined(
/*============*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
lint int_val;
byte* data;
int func;
func = func_node->func;
arg1 = func_node->args;
if (func == PARS_LENGTH_TOKEN) {
int_val = (lint)dfield_get_len(que_node_get_val(arg1));
} else if (func == PARS_TO_CHAR_TOKEN) {
/* Convert number to character string as a
signed decimal integer. */
ulint uint_val;
int int_len;
int_val = eval_node_get_int_val(arg1);
/* Determine the length of the string. */
if (int_val == 0) {
int_len = 1; /* the number 0 occupies 1 byte */
} else {
int_len = 0;
if (int_val < 0) {
uint_val = ((ulint) -int_val - 1) + 1;
int_len++; /* reserve space for minus sign */
} else {
uint_val = (ulint) int_val;
}
for (; uint_val > 0; int_len++) {
uint_val /= 10;
}
}
/* allocate the string */
data = eval_node_ensure_val_buf(func_node, int_len + 1);
/* add terminating NUL character */
data[int_len] = 0;
/* convert the number */
if (int_val == 0) {
data[0] = '0';
} else {
int tmp;
if (int_val < 0) {
data[0] = '-'; /* preceding minus sign */
uint_val = ((ulint) -int_val - 1) + 1;
} else {
uint_val = (ulint) int_val;
}
for (tmp = int_len; uint_val > 0; uint_val /= 10) {
data[--tmp] = (byte)
('0' + (byte)(uint_val % 10));
}
}
dfield_set_len(que_node_get_val(func_node), int_len);
return;
} else if (func == PARS_TO_NUMBER_TOKEN) {
int_val = atoi((char*)
dfield_get_data(que_node_get_val(arg1)));
} else if (func == PARS_SYSDATE_TOKEN) {
int_val = (lint)ut_time();
} else {
eval_predefined_2(func_node);
return;
}
eval_node_set_int_val(func_node, int_val);
}
/*********************************************************************
Evaluates a function node. */
UNIV_INTERN
void
eval_func(
/*======*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg;
ulint class;
ulint func;
ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
class = func_node->class;
func = func_node->func;
arg = func_node->args;
/* Evaluate first the argument list */
while (arg) {
eval_exp(arg);
/* The functions are not defined for SQL null argument
values, except for eval_cmp and notfound */
if (dfield_is_null(que_node_get_val(arg))
&& (class != PARS_FUNC_CMP)
&& (func != PARS_NOTFOUND_TOKEN)
&& (func != PARS_PRINTF_TOKEN)) {
ut_error;
}
arg = que_node_get_next(arg);
}
if (class == PARS_FUNC_CMP) {
eval_cmp(func_node);
} else if (class == PARS_FUNC_ARITH) {
eval_arith(func_node);
} else if (class == PARS_FUNC_AGGREGATE) {
eval_aggregate(func_node);
} else if (class == PARS_FUNC_PREDEFINED) {
if (func == PARS_NOTFOUND_TOKEN) {
eval_notfound(func_node);
} else if (func == PARS_SUBSTR_TOKEN) {
eval_substr(func_node);
} else if (func == PARS_REPLSTR_TOKEN) {
eval_replstr(func_node);
} else if (func == PARS_INSTR_TOKEN) {
eval_instr(func_node);
} else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
eval_binary_to_number(func_node);
} else if (func == PARS_CONCAT_TOKEN) {
eval_concat(func_node);
} else if (func == PARS_TO_BINARY_TOKEN) {
eval_to_binary(func_node);
} else {
eval_predefined(func_node);
}
} else {
ut_ad(class == PARS_FUNC_LOGICAL);
eval_logical(func_node);
}
}

278
eval/eval0proc.c Normal file
View File

@@ -0,0 +1,278 @@
/******************************************************
Executes SQL stored procedures and their control structures
(c) 1998 Innobase Oy
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#include "eval0proc.h"
#ifdef UNIV_NONINL
#include "eval0proc.ic"
#endif
/**************************************************************************
Performs an execution step of an if-statement node. */
UNIV_INTERN
que_thr_t*
if_step(
/*====*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
if_node_t* node;
elsif_node_t* elsif_node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_IF);
if (thr->prev_node == que_node_get_parent(node)) {
/* Evaluate the condition */
eval_exp(node->cond);
if (eval_node_get_ibool_val(node->cond)) {
/* The condition evaluated to TRUE: start execution
from the first statement in the statement list */
thr->run_node = node->stat_list;
} else if (node->else_part) {
thr->run_node = node->else_part;
} else if (node->elsif_list) {
elsif_node = node->elsif_list;
for (;;) {
eval_exp(elsif_node->cond);
if (eval_node_get_ibool_val(
elsif_node->cond)) {
/* The condition evaluated to TRUE:
start execution from the first
statement in the statement list */
thr->run_node = elsif_node->stat_list;
break;
}
elsif_node = que_node_get_next(elsif_node);
if (elsif_node == NULL) {
thr->run_node = NULL;
break;
}
}
} else {
thr->run_node = NULL;
}
} else {
/* Move to the next statement */
ut_ad(que_node_get_next(thr->prev_node) == NULL);
thr->run_node = NULL;
}
if (thr->run_node == NULL) {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**************************************************************************
Performs an execution step of a while-statement node. */
UNIV_INTERN
que_thr_t*
while_step(
/*=======*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
while_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
ut_ad((thr->prev_node == que_node_get_parent(node))
|| (que_node_get_next(thr->prev_node) == NULL));
/* Evaluate the condition */
eval_exp(node->cond);
if (eval_node_get_ibool_val(node->cond)) {
/* The condition evaluated to TRUE: start execution
from the first statement in the statement list */
thr->run_node = node->stat_list;
} else {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**************************************************************************
Performs an execution step of an assignment statement node. */
UNIV_INTERN
que_thr_t*
assign_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
assign_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
/* Evaluate the value to assign */
eval_exp(node->val);
eval_node_copy_val(node->var->alias, node->val);
thr->run_node = que_node_get_parent(node);
return(thr);
}
/**************************************************************************
Performs an execution step of a for-loop node. */
UNIV_INTERN
que_thr_t*
for_step(
/*=====*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
for_node_t* node;
que_node_t* parent;
lint loop_var_value;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
parent = que_node_get_parent(node);
if (thr->prev_node != parent) {
/* Move to the next statement */
thr->run_node = que_node_get_next(thr->prev_node);
if (thr->run_node != NULL) {
return(thr);
}
/* Increment the value of loop_var */
loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
} else {
/* Initialize the loop */
eval_exp(node->loop_start_limit);
eval_exp(node->loop_end_limit);
loop_var_value = eval_node_get_int_val(node->loop_start_limit);
node->loop_end_value
= (int) eval_node_get_int_val(node->loop_end_limit);
}
/* Check if we should do another loop */
if (loop_var_value > node->loop_end_value) {
/* Enough loops done */
thr->run_node = parent;
} else {
eval_node_set_int_val(node->loop_var, loop_var_value);
thr->run_node = node->stat_list;
}
return(thr);
}
/**************************************************************************
Performs an execution step of an exit statement node. */
UNIV_INTERN
que_thr_t*
exit_step(
/*======*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
exit_node_t* node;
que_node_t* loop_node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_EXIT);
/* Loops exit by setting thr->run_node as the loop node's parent, so
find our containing loop node and get its parent. */
loop_node = que_node_get_containing_loop_node(node);
/* If someone uses an EXIT statement outside of a loop, this will
trigger. */
ut_a(loop_node);
thr->run_node = que_node_get_parent(loop_node);
return(thr);
}
/**************************************************************************
Performs an execution step of a return-statement node. */
UNIV_INTERN
que_thr_t*
return_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
return_node_t* node;
que_node_t* parent;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
parent = node;
while (que_node_get_type(parent) != QUE_NODE_PROC) {
parent = que_node_get_parent(parent);
}
ut_a(parent);
thr->run_node = que_node_get_parent(parent);
return(thr);
}

4822
fil/fil0fil.c Normal file

File diff suppressed because it is too large Load Diff

4267
fsp/fsp0fsp.c Normal file

File diff suppressed because it is too large Load Diff

14
fut/fut0fut.c Normal file
View File

@@ -0,0 +1,14 @@
/**********************************************************************
File-based utilities
(c) 1995 Innobase Oy
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#include "fut0fut.h"
#ifdef UNIV_NONINL
#include "fut0fut.ic"
#endif

513
fut/fut0lst.c Normal file
View File

@@ -0,0 +1,513 @@
/**********************************************************************
File-based list utilities
(c) 1995 Innobase Oy
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "fut0lst.h"
#ifdef UNIV_NONINL
#include "fut0lst.ic"
#endif
#include "buf0buf.h"
#include "page0page.h"
/************************************************************************
Adds a node to an empty list. */
static
void
flst_add_to_empty(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of
empty list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
ut_a(len == 0);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* Update first and last fields of base node */
flst_write_addr(base + FLST_FIRST, node_addr, mtr);
flst_write_addr(base + FLST_LAST, node_addr, mtr);
/* Set prev and next fields of node to add */
flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
/* Update len of base node */
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Adds a node as the last node in a list. */
UNIV_INTERN
void
flst_add_last(
/*==========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
fil_addr_t last_addr;
flst_node_t* last_node;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
last_addr = flst_get_last(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* If the list is not empty, call flst_insert_after */
if (len != 0) {
if (last_addr.page == node_addr.page) {
last_node = page_align(node) + last_addr.boffset;
} else {
ulint zip_size = fil_space_get_zip_size(space);
last_node = fut_get_ptr(space, zip_size, last_addr,
RW_X_LATCH, mtr);
}
flst_insert_after(base, last_node, node, mtr);
} else {
/* else call flst_add_to_empty */
flst_add_to_empty(base, node, mtr);
}
}
/************************************************************************
Adds a node as the first node in a list. */
UNIV_INTERN
void
flst_add_first(
/*===========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
fil_addr_t first_addr;
flst_node_t* first_node;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
first_addr = flst_get_first(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* If the list is not empty, call flst_insert_before */
if (len != 0) {
if (first_addr.page == node_addr.page) {
first_node = page_align(node) + first_addr.boffset;
} else {
ulint zip_size = fil_space_get_zip_size(space);
first_node = fut_get_ptr(space, zip_size, first_addr,
RW_X_LATCH, mtr);
}
flst_insert_before(base, node, first_node, mtr);
} else {
/* else call flst_add_to_empty */
flst_add_to_empty(base, node, mtr);
}
}
/************************************************************************
Inserts a node after another in a list. */
UNIV_INTERN
void
flst_insert_after(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node1, /* in: node to insert after */
flst_node_t* node2, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
flst_node_t* node3;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node1 && node2 && base);
ut_ad(base != node1);
ut_ad(base != node2);
ut_ad(node2 != node1);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
node3_addr = flst_get_next_addr(node1, mtr);
/* Set prev and next fields of node2 */
flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
ulint zip_size = fil_space_get_zip_size(space);
node3 = fut_get_ptr(space, zip_size,
node3_addr, RW_X_LATCH, mtr);
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
} else {
/* node1 was last in list: update last field in base */
flst_write_addr(base + FLST_LAST, node2_addr, mtr);
}
/* Set next field of node1 */
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Inserts a node before another in a list. */
UNIV_INTERN
void
flst_insert_before(
/*===============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to insert */
flst_node_t* node3, /* in: node to insert before */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node2 && node3 && base);
ut_ad(base != node2);
ut_ad(base != node3);
ut_ad(node2 != node3);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
node1_addr = flst_get_prev_addr(node3, mtr);
/* Set prev and next fields of node2 */
flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node1_addr)) {
ulint zip_size = fil_space_get_zip_size(space);
/* Update next field of node1 */
node1 = fut_get_ptr(space, zip_size, node1_addr,
RW_X_LATCH, mtr);
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
} else {
/* node3 was first in list: update first field in base */
flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
}
/* Set prev field of node3 */
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Removes a node. */
UNIV_INTERN
void
flst_remove(
/*========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to remove */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
ulint zip_size;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
flst_node_t* node3;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
zip_size = fil_space_get_zip_size(space);
node1_addr = flst_get_prev_addr(node2, mtr);
node3_addr = flst_get_next_addr(node2, mtr);
if (!fil_addr_is_null(node1_addr)) {
/* Update next field of node1 */
if (node1_addr.page == node2_addr.page) {
node1 = page_align(node2) + node1_addr.boffset;
} else {
node1 = fut_get_ptr(space, zip_size,
node1_addr, RW_X_LATCH, mtr);
}
ut_ad(node1 != node2);
flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
} else {
/* node2 was first in list: update first field in base */
flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
}
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
if (node3_addr.page == node2_addr.page) {
node3 = page_align(node2) + node3_addr.boffset;
} else {
node3 = fut_get_ptr(space, zip_size,
node3_addr, RW_X_LATCH, mtr);
}
ut_ad(node2 != node3);
flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
} else {
/* node2 was last in list: update last field in base */
flst_write_addr(base + FLST_LAST, node1_addr, mtr);
}
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len > 0);
mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_cut_end(
/*=========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node to remove */
ulint n_nodes,/* in: number of nodes to remove,
must be >= 1 */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
ulint len;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
ut_ad(n_nodes > 0);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
node1_addr = flst_get_prev_addr(node2, mtr);
if (!fil_addr_is_null(node1_addr)) {
/* Update next field of node1 */
if (node1_addr.page == node2_addr.page) {
node1 = page_align(node2) + node1_addr.boffset;
} else {
node1 = fut_get_ptr(space,
fil_space_get_zip_size(space),
node1_addr, RW_X_LATCH, mtr);
}
flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
} else {
/* node2 was first in list: update the field in base */
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
}
flst_write_addr(base + FLST_LAST, node1_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len >= n_nodes);
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
/************************************************************************
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_truncate_end(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node not to remove */
ulint n_nodes,/* in: number of nodes to remove */
mtr_t* mtr) /* in: mini-transaction handle */
{
fil_addr_t node2_addr;
ulint len;
ulint space;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
if (n_nodes == 0) {
ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
return;
}
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
/* Update next field of node2 */
flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len >= n_nodes);
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
/************************************************************************
Validates a file-based list. */
UNIV_INTERN
ibool
flst_validate(
/*==========*/
/* out: TRUE if ok */
const flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr1) /* in: mtr */
{
ulint space;
ulint zip_size;
const flst_node_t* node;
fil_addr_t node_addr;
fil_addr_t base_addr;
ulint len;
ulint i;
mtr_t mtr2;
ut_ad(base);
ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
/* We use two mini-transaction handles: the first is used to
lock the base node, and prevent other threads from modifying the
list. The second is used to traverse the list. We cannot run the
second mtr without committing it at times, because if the list
is long, then the x-locked pages could fill the buffer resulting
in a deadlock. */
/* Find out the space id */
buf_ptr_get_fsp_addr(base, &space, &base_addr);
zip_size = fil_space_get_zip_size(space);
len = flst_get_len(base, mtr1);
node_addr = flst_get_first(base, mtr1);
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
node = fut_get_ptr(space, zip_size,
node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_next_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
becoming full */
}
ut_a(fil_addr_is_null(node_addr));
node_addr = flst_get_last(base, mtr1);
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
node = fut_get_ptr(space, zip_size,
node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_prev_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
becoming full */
}
ut_a(fil_addr_is_null(node_addr));
return(TRUE);
}
/************************************************************************
Prints info of a file-based list. */
UNIV_INTERN
void
flst_print(
/*=======*/
const flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr) /* in: mtr */
{
const buf_frame_t* frame;
ulint len;
ut_ad(base && mtr);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
frame = page_align((byte*) base);
len = flst_get_len(base, mtr);
fprintf(stderr,
"FILE-BASED LIST:\n"
"Base node in space %lu page %lu byte offset %lu; len %lu\n",
(ulong) page_get_space_id(frame),
(ulong) page_get_page_no(frame),
(ulong) page_offset(base), (ulong) len);
}

409
ha/ha0ha.c Normal file
View File

@@ -0,0 +1,409 @@
/************************************************************************
The hash table with external chains
(c) 1994-1997 Innobase Oy
Created 8/22/1994 Heikki Tuuri
*************************************************************************/
#include "ha0ha.h"
#ifdef UNIV_NONINL
#include "ha0ha.ic"
#endif
#ifdef UNIV_DEBUG
# include "buf0buf.h"
#endif /* UNIV_DEBUG */
#ifdef UNIV_SYNC_DEBUG
# include "btr0sea.h"
#endif /* UNIV_SYNC_DEBUG */
#include "page0page.h"
/*****************************************************************
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n. */
UNIV_INTERN
hash_table_t*
ha_create_func(
/*===========*/
/* out, own: created table */
ulint n, /* in: number of array cells */
#ifdef UNIV_SYNC_DEBUG
ulint mutex_level, /* in: level of the mutexes in the latching
order: this is used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
ulint n_mutexes) /* in: number of mutexes to protect the
hash table: must be a power of 2, or 0 */
{
hash_table_t* table;
ulint i;
table = hash_create(n);
#ifdef UNIV_DEBUG
table->adaptive = TRUE;
#endif /* UNIV_DEBUG */
/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
but in practise it never should in this case, hence the asserts. */
if (n_mutexes == 0) {
table->heap = mem_heap_create_in_btr_search(
ut_min(4096, MEM_MAX_ALLOC_IN_BUF));
ut_a(table->heap);
return(table);
}
hash_create_mutexes(table, n_mutexes, mutex_level);
table->heaps = mem_alloc(n_mutexes * sizeof(void*));
for (i = 0; i < n_mutexes; i++) {
table->heaps[i] = mem_heap_create_in_btr_search(4096);
ut_a(table->heaps[i]);
}
return(table);
}
/*****************************************************************
Empties a hash table and frees the memory heaps. */
UNIV_INTERN
void
ha_clear(
/*=====*/
hash_table_t* table) /* in, own: hash table */
{
ulint i;
ulint n;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
/* Free the memory heaps. */
n = table->n_mutexes;
for (i = 0; i < n; i++) {
mem_heap_free(table->heaps[i]);
}
/* Clear the hash table. */
n = hash_get_n_cells(table);
for (i = 0; i < n; i++) {
hash_get_nth_cell(table, i)->node = NULL;
}
}
/*****************************************************************
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted. */
UNIV_INTERN
ibool
ha_insert_for_fold_func(
/*====================*/
/* out: TRUE if succeed, FALSE if no more
memory could be allocated */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
#ifdef UNIV_DEBUG
buf_block_t* block, /* in: buffer block containing the data */
#endif /* UNIV_DEBUG */
void* data) /* in: data, must not be NULL */
{
hash_cell_t* cell;
ha_node_t* node;
ha_node_t* prev_node;
ulint hash;
ut_ad(table && data);
ut_ad(block->frame == page_align(data));
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
hash = hash_calc_hash(fold, table);
cell = hash_get_nth_cell(table, hash);
prev_node = cell->node;
while (prev_node != NULL) {
if (prev_node->fold == fold) {
#ifdef UNIV_DEBUG
if (table->adaptive) {
buf_block_t* prev_block = prev_node->block;
ut_a(prev_block->frame
== page_align(prev_node->data));
ut_a(prev_block->n_pointers > 0);
prev_block->n_pointers--;
block->n_pointers++;
}
prev_node->block = block;
#endif /* UNIV_DEBUG */
prev_node->data = data;
return(TRUE);
}
prev_node = prev_node->next;
}
/* We have to allocate a new chain node */
node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
if (node == NULL) {
/* It was a btr search type memory heap and at the moment
no more memory could be allocated: return */
ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
return(FALSE);
}
ha_node_set_data(node, block, data);
#ifdef UNIV_DEBUG
if (table->adaptive) {
block->n_pointers++;
}
#endif /* UNIV_DEBUG */
node->fold = fold;
node->next = NULL;
prev_node = cell->node;
if (prev_node == NULL) {
cell->node = node;
return(TRUE);
}
while (prev_node->next != NULL) {
prev_node = prev_node->next;
}
prev_node->next = node;
return(TRUE);
}
/***************************************************************
Deletes a hash node. */
UNIV_INTERN
void
ha_delete_hash_node(
/*================*/
hash_table_t* table, /* in: hash table */
ha_node_t* del_node) /* in: node to be deleted */
{
#ifdef UNIV_DEBUG
if (table->adaptive) {
ut_a(del_node->block->frame = page_align(del_node->data));
ut_a(del_node->block->n_pointers > 0);
del_node->block->n_pointers--;
}
#endif /* UNIV_DEBUG */
HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
}
/*****************************************************************
Deletes an entry from a hash table. */
UNIV_INTERN
void
ha_delete(
/*======*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data */
void* data) /* in: data, must not be NULL and must exist
in the hash table */
{
ha_node_t* node;
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
node = ha_search_with_data(table, fold, data);
ut_a(node);
ha_delete_hash_node(table, node);
}
/*************************************************************
Looks for an element when we know the pointer to the data, and updates
the pointer to data, if found. */
UNIV_INTERN
void
ha_search_and_update_if_found_func(
/*===============================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data, /* in: pointer to the data */
#ifdef UNIV_DEBUG
buf_block_t* new_block,/* in: block containing new_data */
#endif
void* new_data)/* in: new pointer to the data */
{
ha_node_t* node;
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
ut_ad(new_block->frame == page_align(new_data));
node = ha_search_with_data(table, fold, data);
if (node) {
#ifdef UNIV_DEBUG
if (table->adaptive) {
ut_a(node->block->n_pointers > 0);
node->block->n_pointers--;
new_block->n_pointers++;
}
node->block = new_block;
#endif /* UNIV_DEBUG */
node->data = new_data;
}
}
/*********************************************************************
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: fold value */
const page_t* page) /* in: buffer page */
{
ha_node_t* node;
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
node = ha_chain_get_first(table, fold);
while (node) {
if (page_align(ha_node_get_data(node)) == page) {
/* Remove the hash node */
ha_delete_hash_node(table, node);
/* Start again from the first node in the chain
because the deletion may compact the heap of
nodes and move other nodes! */
node = ha_chain_get_first(table, fold);
} else {
node = ha_chain_get_next(node);
}
}
#ifdef UNIV_DEBUG
/* Check that all nodes really got deleted */
node = ha_chain_get_first(table, fold);
while (node) {
ut_a(page_align(ha_node_get_data(node)) != page);
node = ha_chain_get_next(node);
}
#endif
}
/*****************************************************************
Validates a given range of the cells in hash table. */
UNIV_INTERN
ibool
ha_validate(
/*========*/
/* out: TRUE if ok */
hash_table_t* table, /* in: hash table */
ulint start_index, /* in: start index */
ulint end_index) /* in: end index */
{
hash_cell_t* cell;
ha_node_t* node;
ibool ok = TRUE;
ulint i;
ut_a(start_index <= end_index);
ut_a(start_index < hash_get_n_cells(table));
ut_a(end_index < hash_get_n_cells(table));
for (i = start_index; i <= end_index; i++) {
cell = hash_get_nth_cell(table, i);
node = cell->node;
while (node) {
if (hash_calc_hash(node->fold, table) != i) {
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: Error: hash table node"
" fold value %lu does not\n"
"InnoDB: match the cell number %lu.\n",
(ulong) node->fold, (ulong) i);
ok = FALSE;
}
node = node->next;
}
}
return(ok);
}
/*****************************************************************
Prints info of a hash table. */
UNIV_INTERN
void
ha_print_info(
/*==========*/
FILE* file, /* in: file where to print */
hash_table_t* table) /* in: hash table */
{
hash_cell_t* cell;
ulint cells = 0;
ulint n_bufs;
ulint i;
for (i = 0; i < hash_get_n_cells(table); i++) {
cell = hash_get_nth_cell(table, i);
if (cell->node) {
cells++;
}
}
fprintf(file,
"Hash table size %lu, used cells %lu",
(ulong) hash_get_n_cells(table), (ulong) cells);
if (table->heaps == NULL && table->heap != NULL) {
/* This calculation is intended for the adaptive hash
index: how many buffer frames we have reserved? */
n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
if (table->heap->free_block) {
n_bufs++;
}
fprintf(file, ", node heap has %lu buffer(s)\n",
(ulong) n_bufs);
}
}

166
ha/ha0storage.c Normal file
View File

@@ -0,0 +1,166 @@
/******************************************************
Hash storage.
Provides a data structure that stores chunks of data in
its own storage, avoiding duplicates.
(c) 2007 Innobase Oy
Created September 22, 2007 Vasil Dimov
*******************************************************/
#include "univ.i"
#include "ha0storage.h"
#include "hash0hash.h"
#include "mem0mem.h"
#include "ut0rnd.h"
#ifdef UNIV_NONINL
#include "ha0storage.ic"
#endif
/***********************************************************************
Retrieves a data from a storage. If it is present, a pointer to the
stored copy of data is returned, otherwise NULL is returned. */
static
const void*
ha_storage_get(
/*===========*/
ha_storage_t* storage, /* in: hash storage */
const void* data, /* in: data to check for */
ulint data_len) /* in: data length */
{
ha_storage_node_t* node;
ulint fold;
/* avoid repetitive calls to ut_fold_binary() in the HASH_SEARCH
macro */
fold = ut_fold_binary(data, data_len);
#define IS_FOUND \
node->data_len == data_len && memcmp(node->data, data, data_len) == 0
HASH_SEARCH(
next, /* node->"next" */
storage->hash, /* the hash table */
fold, /* key */
ha_storage_node_t*, /* type of node->next */
node, /* auxiliary variable */
IS_FOUND); /* search criteria */
if (node == NULL) {
return(NULL);
}
/* else */
return(node->data);
}
/***********************************************************************
Copies data into the storage and returns a pointer to the copy. If the
same data chunk is already present, then pointer to it is returned.
Data chunks are considered to be equal if len1 == len2 and
memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
data_len bytes need to be allocated) and the size of storage is going to
become more than "memlim" then "data" is not added and NULL is returned.
To disable this behavior "memlim" can be set to 0, which stands for
"no limit". */
UNIV_INTERN
const void*
ha_storage_put_memlim(
/*==================*/
ha_storage_t* storage, /* in/out: hash storage */
const void* data, /* in: data to store */
ulint data_len, /* in: data length */
ulint memlim) /* in: memory limit to obey */
{
void* raw;
ha_storage_node_t* node;
const void* data_copy;
ulint fold;
/* check if data chunk is already present */
data_copy = ha_storage_get(storage, data, data_len);
if (data_copy != NULL) {
return(data_copy);
}
/* not present */
/* check if we are allowed to allocate data_len bytes */
if (memlim > 0
&& ha_storage_get_size(storage) + data_len > memlim) {
return(NULL);
}
/* we put the auxiliary node struct and the data itself in one
continuous block */
raw = mem_heap_alloc(storage->heap,
sizeof(ha_storage_node_t) + data_len);
node = (ha_storage_node_t*) raw;
data_copy = (byte*) raw + sizeof(*node);
memcpy((byte*) raw + sizeof(*node), data, data_len);
node->data_len = data_len;
node->data = data_copy;
/* avoid repetitive calls to ut_fold_binary() in the HASH_INSERT
macro */
fold = ut_fold_binary(data, data_len);
HASH_INSERT(
ha_storage_node_t, /* type used in the hash chain */
next, /* node->"next" */
storage->hash, /* the hash table */
fold, /* key */
node); /* add this data to the hash */
/* the output should not be changed because it will spoil the
hash table */
return(data_copy);
}
#ifdef UNIV_COMPILE_TEST_FUNCS
void
test_ha_storage()
{
ha_storage_t* storage;
char buf[1024];
int i;
const void* stored[256];
const void* p;
storage = ha_storage_create(0, 0);
for (i = 0; i < 256; i++) {
memset(buf, i, sizeof(buf));
stored[i] = ha_storage_put(storage, buf, sizeof(buf));
}
//ha_storage_empty(&storage);
for (i = 255; i >= 0; i--) {
memset(buf, i, sizeof(buf));
p = ha_storage_put(storage, buf, sizeof(buf));
if (p != stored[i]) {
fprintf(stderr, "ha_storage_put() returned %p "
"instead of %p, i=%d\n", p, stored[i], i);
return;
}
}
fprintf(stderr, "all ok\n");
ha_storage_free(storage);
}
#endif /* UNIV_COMPILE_TEST_FUNCS */

149
ha/hash0hash.c Normal file
View File

@@ -0,0 +1,149 @@
/******************************************************
The simple hash table utility
(c) 1997 Innobase Oy
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#include "hash0hash.h"
#ifdef UNIV_NONINL
#include "hash0hash.ic"
#endif
#include "mem0mem.h"
/****************************************************************
Reserves the mutex for a fold value in a hash table. */
UNIV_INTERN
void
hash_mutex_enter(
/*=============*/
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
mutex_enter(hash_get_mutex(table, fold));
}
/****************************************************************
Releases the mutex for a fold value in a hash table. */
UNIV_INTERN
void
hash_mutex_exit(
/*============*/
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
mutex_exit(hash_get_mutex(table, fold));
}
/****************************************************************
Reserves all the mutexes of a hash table, in an ascending order. */
UNIV_INTERN
void
hash_mutex_enter_all(
/*=================*/
hash_table_t* table) /* in: hash table */
{
ulint i;
for (i = 0; i < table->n_mutexes; i++) {
mutex_enter(table->mutexes + i);
}
}
/****************************************************************
Releases all the mutexes of a hash table. */
UNIV_INTERN
void
hash_mutex_exit_all(
/*================*/
hash_table_t* table) /* in: hash table */
{
ulint i;
for (i = 0; i < table->n_mutexes; i++) {
mutex_exit(table->mutexes + i);
}
}
/*****************************************************************
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n. */
UNIV_INTERN
hash_table_t*
hash_create(
/*========*/
/* out, own: created table */
ulint n) /* in: number of array cells */
{
hash_cell_t* array;
ulint prime;
hash_table_t* table;
prime = ut_find_prime(n);
table = mem_alloc(sizeof(hash_table_t));
array = ut_malloc(sizeof(hash_cell_t) * prime);
#ifdef UNIV_DEBUG
table->adaptive = FALSE;
#endif /* UNIV_DEBUG */
table->array = array;
table->n_cells = prime;
table->n_mutexes = 0;
table->mutexes = NULL;
table->heaps = NULL;
table->heap = NULL;
table->magic_n = HASH_TABLE_MAGIC_N;
/* Initialize the cell array */
hash_table_clear(table);
return(table);
}
/*****************************************************************
Frees a hash table. */
UNIV_INTERN
void
hash_table_free(
/*============*/
hash_table_t* table) /* in, own: hash table */
{
ut_a(table->mutexes == NULL);
ut_free(table->array);
mem_free(table);
}
/*****************************************************************
Creates a mutex array to protect a hash table. */
UNIV_INTERN
void
hash_create_mutexes_func(
/*=====================*/
hash_table_t* table, /* in: hash table */
#ifdef UNIV_SYNC_DEBUG
ulint sync_level, /* in: latching order level of the
mutexes: used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
ulint n_mutexes) /* in: number of mutexes, must be a
power of 2 */
{
ulint i;
ut_a(n_mutexes > 0);
ut_a(ut_is_2pow(n_mutexes));
table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
for (i = 0; i < n_mutexes; i++) {
mutex_create(table->mutexes + i, sync_level);
}
table->n_mutexes = n_mutexes;
}

9639
handler/ha_innodb.cc Normal file

File diff suppressed because it is too large Load Diff

267
handler/ha_innodb.h Normal file
View File

@@ -0,0 +1,267 @@
/* Copyright (C) 2000-2005 MySQL AB && Innobase Oy
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
This file is based on ha_berkeley.h of MySQL distribution
This file defines the Innodb handler: the interface between MySQL and
Innodb
*/
#ifdef USE_PRAGMA_INTERFACE
#pragma interface /* gcc class implementation */
#endif
typedef struct st_innobase_share {
THR_LOCK lock;
pthread_mutex_t mutex;
char *table_name;
uint table_name_length,use_count;
} INNOBASE_SHARE;
struct dict_index_struct;
struct row_prebuilt_struct;
typedef struct dict_index_struct dict_index_t;
typedef struct row_prebuilt_struct row_prebuilt_t;
/* The class defining a handle to an Innodb table */
class ha_innobase: public handler
{
row_prebuilt_t* prebuilt; /* prebuilt struct in InnoDB, used
to save CPU time with prebuilt data
structures*/
THD* user_thd; /* the thread handle of the user
currently using the handle; this is
set in external_lock function */
THR_LOCK_DATA lock;
INNOBASE_SHARE *share;
uchar* upd_buff; /* buffer used in updates */
uchar* key_val_buff; /* buffer used in converting
search key values from MySQL format
to Innodb format */
ulong upd_and_key_val_buff_len;
/* the length of each of the previous
two buffers */
Table_flags int_table_flags;
uint primary_key;
ulong start_of_scan; /* this is set to 1 when we are
starting a table scan but have not
yet fetched any row, else 0 */
uint last_match_mode;/* match mode of the latest search:
ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
or undefined */
uint num_write_row; /* number of write_row() calls */
uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
const uchar* record);
inline void update_thd(THD* thd);
void update_thd();
int change_active_index(uint keynr);
int general_fetch(uchar* buf, uint direction, uint match_mode);
int innobase_read_and_init_auto_inc(ulonglong* ret);
ulong innobase_autoinc_lock();
ulong innobase_set_max_autoinc(ulonglong auto_inc);
ulong innobase_reset_autoinc(ulonglong auto_inc);
ulong innobase_get_auto_increment(ulonglong* value);
dict_index_t* innobase_get_index(uint keynr);
/* Init values for the class: */
public:
ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
~ha_innobase();
/*
Get the row type from the storage engine. If this method returns
ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
*/
enum row_type get_row_type() const;
const char* table_type() const;
const char* index_type(uint key_number);
const char** bas_ext() const;
Table_flags table_flags() const;
ulong index_flags(uint idx, uint part, bool all_parts) const;
uint max_supported_keys() const;
uint max_supported_key_length() const;
uint max_supported_key_part_length() const;
const key_map* keys_to_use_for_scanning();
int open(const char *name, int mode, uint test_if_locked);
int close(void);
double scan_time();
double read_time(uint index, uint ranges, ha_rows rows);
int write_row(uchar * buf);
int update_row(const uchar * old_data, uchar * new_data);
int delete_row(const uchar * buf);
bool was_semi_consistent_read();
void try_semi_consistent_read(bool yes);
void unlock_row();
#ifdef ROW_MERGE_IS_INDEX_USABLE
/** Check if an index can be used by this transaction.
* @param keynr key number to check
* @return true if available, false if the index
* does not contain old records that exist
* in the read view of this transaction */
bool is_index_available(uint keynr);
#endif /* ROW_MERGE_IS_INDEX_USABLE */
int index_init(uint index, bool sorted);
int index_end();
int index_read(uchar * buf, const uchar * key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_idx(uchar * buf, uint index, const uchar * key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_last(uchar * buf, const uchar * key, uint key_len);
int index_next(uchar * buf);
int index_next_same(uchar * buf, const uchar *key, uint keylen);
int index_prev(uchar * buf);
int index_first(uchar * buf);
int index_last(uchar * buf);
int rnd_init(bool scan);
int rnd_end();
int rnd_next(uchar *buf);
int rnd_pos(uchar * buf, uchar *pos);
void position(const uchar *record);
int info(uint);
int analyze(THD* thd,HA_CHECK_OPT* check_opt);
int optimize(THD* thd,HA_CHECK_OPT* check_opt);
int discard_or_import_tablespace(my_bool discard);
int extra(enum ha_extra_function operation);
int reset();
int external_lock(THD *thd, int lock_type);
int transactional_table_lock(THD *thd, int lock_type);
int start_stmt(THD *thd, thr_lock_type lock_type);
void position(uchar *record);
ha_rows records_in_range(uint inx, key_range *min_key, key_range
*max_key);
ha_rows estimate_rows_upper_bound();
void update_create_info(HA_CREATE_INFO* create_info);
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
int delete_all_rows();
int delete_table(const char *name);
int rename_table(const char* from, const char* to);
int check(THD* thd, HA_CHECK_OPT* check_opt);
char* update_table_comment(const char* comment);
char* get_foreign_key_create_info();
int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
bool can_switch_engines();
uint referenced_by_foreign_key();
void free_foreign_key_create_info(char* str);
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
void init_table_handle_for_HANDLER();
virtual void get_auto_increment(ulonglong offset, ulonglong increment,
ulonglong nb_desired_values,
ulonglong *first_value,
ulonglong *nb_reserved_values);
int reset_auto_increment(ulonglong value);
virtual bool get_error_message(int error, String *buf);
uint8 table_cache_type();
/*
ask handler about permission to cache table during query registration
*/
my_bool register_query_cache_table(THD *thd, char *table_key,
uint key_length,
qc_engine_callback *call_back,
ulonglong *engine_data);
static char *get_mysql_bin_log_name();
static ulonglong get_mysql_bin_log_pos();
bool primary_key_is_clustered();
int cmp_ref(const uchar *ref1, const uchar *ref2);
/** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
int prepare_drop_index(TABLE *table_arg, uint *key_num,
uint num_of_keys);
int final_drop_index(TABLE *table_arg);
/** @} */
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
};
/* Some accessor functions which the InnoDB plugin needs, but which
can not be added to mysql/plugin.h as part of the public interface;
the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
#ifndef INNODB_COMPATIBILITY_HOOKS
#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
#endif
extern "C" {
struct charset_info_st *thd_charset(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
/** Get the file name of the MySQL binlog.
* @return the name of the binlog file
*/
const char* mysql_bin_log_file_name(void);
/** Get the current position of the MySQL binlog.
* @return byte offset from the beginning of the binlog
*/
ulonglong mysql_bin_log_file_pos(void);
/**
Check if a user thread is a replication slave thread
@param thd user thread
@retval 0 the user thread is not a replication slave thread
@retval 1 the user thread is a replication slave thread
*/
int thd_slave_thread(const MYSQL_THD thd);
/**
Check if a user thread is running a non-transactional update
@param thd user thread
@retval 0 the user thread is not running a non-transactional update
@retval 1 the user thread is running a non-transactional update
*/
int thd_non_transactional_update(const MYSQL_THD thd);
/**
Get the user thread's binary logging format
@param thd user thread
@return Value to be used as index into the binlog_format_names array
*/
int thd_binlog_format(const MYSQL_THD thd);
/**
Mark transaction to rollback and mark error as fatal to a sub-statement.
@param thd Thread handle
@param all TRUE <=> rollback main transaction.
*/
void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
}
typedef struct trx_struct trx_t;
/************************************************************************
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
extern "C"
int
convert_error_code_to_mysql(
/*========================*/
/* out: MySQL error code */
int error, /* in: InnoDB error code */
ulint flags, /* in: InnoDB table flags, or 0 */
MYSQL_THD thd); /* in: user thread handle or NULL */

1166
handler/handler0alter.cc Normal file

File diff suppressed because it is too large Load Diff

1559
handler/i_s.cc Normal file

File diff suppressed because it is too large Load Diff

20
handler/i_s.h Normal file
View File

@@ -0,0 +1,20 @@
/******************************************************
InnoDB INFORMATION SCHEMA tables interface to MySQL.
(c) 2007 Innobase Oy
Created July 18, 2007 Vasil Dimov
*******************************************************/
#ifndef i_s_h
#define i_s_h
extern struct st_mysql_plugin i_s_innodb_trx;
extern struct st_mysql_plugin i_s_innodb_locks;
extern struct st_mysql_plugin i_s_innodb_lock_waits;
extern struct st_mysql_plugin i_s_innodb_cmp;
extern struct st_mysql_plugin i_s_innodb_cmp_reset;
extern struct st_mysql_plugin i_s_innodb_cmpmem;
extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
#endif /* i_s_h */

38
handler/mysql_addons.cc Normal file
View File

@@ -0,0 +1,38 @@
/******************************************************
This file contains functions that need to be added to
MySQL code but have not been added yet.
Whenever you add a function here submit a MySQL bug
report (feature request) with the implementation. Then
write the bug number in the comment before the
function in this file.
When MySQL commits the function it can be deleted from
here. In a perfect world this file exists but is empty.
(c) 2007 Innobase Oy
Created November 07, 2007 Vasil Dimov
*******************************************************/
#ifndef MYSQL_SERVER
#define MYSQL_SERVER
#endif /* MYSQL_SERVER */
#include <mysql_priv.h>
#include "mysql_addons.h"
#include "univ.i"
/***********************************************************************
Retrieve THD::thread_id
http://bugs.mysql.com/30930 */
extern "C" UNIV_INTERN
unsigned long
ib_thd_get_thread_id(
/*=================*/
/* out: THD::thread_id */
const void* thd) /* in: THD */
{
return((unsigned long) ((THD*) thd)->thread_id);
}

3724
ibuf/ibuf0ibuf.c Normal file

File diff suppressed because it is too large Load Diff

479
include/btr0btr.h Normal file
View File

@@ -0,0 +1,479 @@
/******************************************************
The B-tree
(c) 1994-1996 Innobase Oy
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#ifndef btr0btr_h
#define btr0btr_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "page0cur.h"
#include "rem0rec.h"
#include "mtr0mtr.h"
#include "btr0types.h"
/* Maximum record size which can be stored on a page, without using the
special big record storage structure */
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
such; none of the tree operations avoid producing trees bigger than this. It
is instead a "max depth that other code must work with", useful for e.g.
fixed-size arrays that must store some information about each level in a
tree. In other words: if a B-tree with bigger depth than this is
encountered, it is not acceptable for it to lead to mysterious memory
corruption, but it is acceptable for the program to die with a clear assert
failure. */
#define BTR_MAX_LEVELS 100
/* Latching modes for btr_cur_search_to_nth_level(). */
#define BTR_SEARCH_LEAF RW_S_LATCH
#define BTR_MODIFY_LEAF RW_X_LATCH
#define BTR_NO_LATCHES RW_NO_LATCH
#define BTR_MODIFY_TREE 33
#define BTR_CONT_MODIFY_TREE 34
#define BTR_SEARCH_PREV 35
#define BTR_MODIFY_PREV 36
/* If this is ORed to the latch mode, it means that the search tuple will be
inserted to the index, at the searched position */
#define BTR_INSERT 512
/* This flag ORed to latch mode says that we do the search in query
optimization */
#define BTR_ESTIMATE 1024
/* This flag ORed to latch mode says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use the
insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
/******************************************************************
Gets the root node of a tree and x-latches it. */
UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
/* out: root page, x-latched */
dict_index_t* index, /* in: index tree */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
buf_block_t*
btr_block_get(
/*==========*/
ulint space, /* in: space id */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /* in: page number */
ulint mode, /* in: latch mode */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
page_t*
btr_page_get(
/*=========*/
ulint space, /* in: space id */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /* in: page number */
ulint mode, /* in: latch mode */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets the index id field of a page. */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
/* out: index id */
const page_t* page); /* in: index page */
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
/* out: level, leaf level == 0 */
const page_t* page); /* in: index page */
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
/* out: level, leaf level == 0 */
const page_t* page, /* in: index page */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************
Gets the next index page number. */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
/* out: next page number */
const page_t* page, /* in: index page */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************
Gets the previous index page number. */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
/* out: prev page number */
const page_t* page, /* in: index page */
mtr_t* mtr); /* in: mini-transaction handle */
/*****************************************************************
Gets pointer to the previous user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor. */
UNIV_INTERN
rec_t*
btr_get_prev_user_rec(
/*==================*/
/* out: previous user record, NULL if there is none */
rec_t* rec, /* in: record on leaf level */
mtr_t* mtr); /* in: mtr holding a latch on the page, and if
needed, also to the previous page */
/*****************************************************************
Gets pointer to the next user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor. */
UNIV_INTERN
rec_t*
btr_get_next_user_rec(
/*==================*/
/* out: next user record, NULL if there is none */
rec_t* rec, /* in: record on leaf level */
mtr_t* mtr); /* in: mtr holding a latch on the page, and if
needed, also to the next page */
/******************************************************************
Releases the latch on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
buf_block_t* block, /* in: buffer block */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets the child node file address in a node pointer. */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
const rec_t* rec, /* in: node pointer record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
/****************************************************************
Creates the root node for a new index tree. */
UNIV_INTERN
ulint
btr_create(
/*=======*/
/* out: page number of the created root,
FIL_NULL if did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
ulint zip_size,/* in: compressed page size in bytes
or 0 for uncompressed pages */
dulint index_id,/* in: index id */
dict_index_t* index, /* in: index */
mtr_t* mtr); /* in: mini-transaction handle */
/****************************************************************
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
UNIV_INTERN
void
btr_free_but_not_root(
/*==================*/
ulint space, /* in: space where created */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint root_page_no); /* in: root page number */
/****************************************************************
Frees the B-tree root page. Other tree MUST already have been freed. */
UNIV_INTERN
void
btr_free_root(
/*==========*/
ulint space, /* in: space where created */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint root_page_no, /* in: root page number */
mtr_t* mtr); /* in: a mini-transaction which has already
been started */
/*****************************************************************
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
guaranteed to be available before this function is called. */
UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
/* out: inserted record */
btr_cur_t* cursor, /* in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
const dtuple_t* tuple, /* in: tuple to insert */
ulint n_ext, /* in: number of externally stored columns */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Reorganizes an index page.
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
page of a non-clustered index, the caller must update the insert
buffer free bits in the same mini-transaction in such a way that the
modification will be redo-logged. */
UNIV_INTERN
ibool
btr_page_reorganize(
/*================*/
/* out: TRUE on success, FALSE on failure */
buf_block_t* block, /* in: page to be reorganized */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
UNIV_INTERN
ibool
btr_page_get_split_rec_to_left(
/*===========================*/
/* out: TRUE if split recommended */
btr_cur_t* cursor, /* in: cursor at which to insert */
rec_t** split_rec);/* out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to right. */
UNIV_INTERN
ibool
btr_page_get_split_rec_to_right(
/*============================*/
/* out: TRUE if split recommended */
btr_cur_t* cursor, /* in: cursor at which to insert */
rec_t** split_rec);/* out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
/*****************************************************************
Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
is released within this function! NOTE that the operation of this
function must always succeed, we cannot reverse it: therefore
enough free disk space must be guaranteed to be available before
this function is called. */
UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
/* out: inserted record; NOTE: the tree
x-latch is released! NOTE: 2 free disk
pages must be available! */
btr_cur_t* cursor, /* in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
const dtuple_t* tuple, /* in: tuple to insert */
ulint n_ext, /* in: number of externally stored columns */
mtr_t* mtr); /* in: mtr */
/***********************************************************
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
UNIV_INTERN
void
btr_insert_on_non_leaf_level(
/*=========================*/
dict_index_t* index, /* in: index */
ulint level, /* in: level, must be > 0 */
dtuple_t* tuple, /* in: the record to be inserted */
mtr_t* mtr); /* in: mtr */
/********************************************************************
Sets a record as the predefined minimum record. */
UNIV_INTERN
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in/out: record */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes on the upper level the node pointer to a page. */
UNIV_INTERN
void
btr_node_ptr_delete(
/*================*/
dict_index_t* index, /* in: index tree */
buf_block_t* block, /* in: page whose node pointer is deleted */
mtr_t* mtr); /* in: mtr */
#ifdef UNIV_DEBUG
/****************************************************************
Checks that the node pointer to a page is appropriate. */
UNIV_INTERN
ibool
btr_check_node_ptr(
/*===============*/
/* out: TRUE */
dict_index_t* index, /* in: index tree */
buf_block_t* block, /* in: index page */
mtr_t* mtr); /* in: mtr */
#endif /* UNIV_DEBUG */
/*****************************************************************
Tries to merge the page first to the left immediate brother if such a
brother exists, and the node pointers to the current page and to the
brother reside on the same page. If the left brother does not satisfy these
conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to
the brothers, if they exist. */
UNIV_INTERN
ibool
btr_compress(
/*=========*/
/* out: TRUE on success */
btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
the page must not be empty: in record delete
use btr_discard_page if the page would become
empty */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
be used for the root page, which is allowed to be empty. */
UNIV_INTERN
void
btr_discard_page(
/*=============*/
btr_cur_t* cursor, /* in: cursor on the page to discard: not on
the root page */
mtr_t* mtr); /* in: mtr */
/********************************************************************
Parses the redo log record for setting an index record as the predefined
minimum record. */
UNIV_INTERN
byte*
btr_parse_set_min_rec_mark(
/*=======================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
Parses a redo log record of reorganizing a page. */
UNIV_INTERN
byte*
btr_parse_page_reorganize(
/*======================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
buf_block_t* block, /* in: page to be reorganized, or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/******************************************************************
Gets the number of pages in a B-tree. */
UNIV_INTERN
ulint
btr_get_size(
/*=========*/
/* out: number of pages */
dict_index_t* index, /* in: index */
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
/******************************************************************
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */
UNIV_INTERN
buf_block_t*
btr_page_alloc(
/*===========*/
/* out: new allocated block, x-latched;
NULL if out of space */
dict_index_t* index, /* in: index tree */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
UNIV_INTERN
void
btr_page_free(
/*==========*/
dict_index_t* index, /* in: index tree */
buf_block_t* block, /* in: block to be freed, x-latched */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages, because the page level 0 can be given as an
argument. */
UNIV_INTERN
void
btr_page_free_low(
/*==============*/
dict_index_t* index, /* in: index tree */
buf_block_t* block, /* in: block to be freed, x-latched */
ulint level, /* in: page level */
mtr_t* mtr); /* in: mtr */
#ifdef UNIV_BTR_PRINT
/*****************************************************************
Prints size info of a B-tree. */
UNIV_INTERN
void
btr_print_size(
/*===========*/
dict_index_t* index); /* in: index tree */
/******************************************************************
Prints directories and other info of all nodes in the index. */
UNIV_INTERN
void
btr_print_index(
/*============*/
dict_index_t* index, /* in: index */
ulint width); /* in: print this many entries from start
and end */
#endif /* UNIV_BTR_PRINT */
/****************************************************************
Checks the size and number of fields in a record based on the definition of
the index. */
UNIV_INTERN
ibool
btr_index_rec_validate(
/*===================*/
/* out: TRUE if ok */
const rec_t* rec, /* in: index record */
const dict_index_t* index, /* in: index */
ibool dump_on_error); /* in: TRUE if the function
should print hex dump of record
and page on error */
/******************************************************************
Checks the consistency of an index tree. */
UNIV_INTERN
ibool
btr_validate_index(
/*===============*/
/* out: TRUE if ok */
dict_index_t* index, /* in: index */
trx_t* trx); /* in: transaction or NULL */
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
#ifndef UNIV_NONINL
#include "btr0btr.ic"
#endif
#endif

285
include/btr0btr.ic Normal file
View File

@@ -0,0 +1,285 @@
/******************************************************
The B-tree
(c) 1994-1996 Innobase Oy
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "page0zip.h"
#define BTR_MAX_NODE_LEVEL 50 /* used in debug checking */
/******************************************************************
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
buf_block_t*
btr_block_get(
/*==========*/
ulint space, /* in: space id */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /* in: page number */
ulint mode, /* in: latch mode */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
block = buf_page_get(space, zip_size, page_no, mode, mtr);
#ifdef UNIV_SYNC_DEBUG
if (mode != RW_NO_LATCH) {
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
#endif
return(block);
}
/******************************************************************
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
page_t*
btr_page_get(
/*=========*/
ulint space, /* in: space id */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /* in: page number */
ulint mode, /* in: latch mode */
mtr_t* mtr) /* in: mtr */
{
return(buf_block_get_frame(btr_block_get(space, zip_size, page_no,
mode, mtr)));
}
/******************************************************************
Sets the index id field of a page. */
UNIV_INLINE
void
btr_page_set_index_id(
/*==================*/
page_t* page, /* in: page to be created */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
dulint id, /* in: index id */
mtr_t* mtr) /* in: mtr */
{
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_INDEX_ID),
8, mtr);
} else {
mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
id, mtr);
}
}
/******************************************************************
Gets the index id field of a page. */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
/* out: index id */
const page_t* page) /* in: index page */
{
return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
}
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
/* out: level, leaf level == 0 */
const page_t* page) /* in: index page */
{
ulint level;
ut_ad(page);
level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
return(level);
}
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
/* out: level, leaf level == 0 */
const page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused)))
/* in: mini-transaction handle */
{
ut_ad(page && mtr);
return(btr_page_get_level_low(page));
}
/************************************************************
Sets the node level field in an index page. */
UNIV_INLINE
void
btr_page_set_level(
/*===============*/
page_t* page, /* in: index page */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ulint level, /* in: level, leaf level == 0 */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_LEVEL),
2, mtr);
} else {
mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
MLOG_2BYTES, mtr);
}
}
/************************************************************
Gets the next index page number. */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
/* out: next page number */
const page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused)))
/* in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
|| mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
return(mach_read_from_4(page + FIL_PAGE_NEXT));
}
/************************************************************
Sets the next index page field. */
UNIV_INLINE
void
btr_page_set_next(
/*==============*/
page_t* page, /* in: index page */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ulint next, /* in: next page number */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_4(page + FIL_PAGE_NEXT, next);
page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
} else {
mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
}
}
/************************************************************
Gets the previous index page number. */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
/* out: prev page number */
const page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
return(mach_read_from_4(page + FIL_PAGE_PREV));
}
/************************************************************
Sets the previous index page field. */
UNIV_INLINE
void
btr_page_set_prev(
/*==============*/
page_t* page, /* in: index page */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ulint prev, /* in: previous page number */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_4(page + FIL_PAGE_PREV, prev);
page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
} else {
mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
}
}
/******************************************************************
Gets the child node file address in a node pointer. */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
const rec_t* rec, /* in: node pointer record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
const byte* field;
ulint len;
ulint page_no;
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1, &len);
ut_ad(len == 4);
page_no = mach_read_from_4(field);
if (UNIV_UNLIKELY(page_no == 0)) {
fprintf(stderr,
"InnoDB: a nonsensical page number 0"
" in a node ptr record at offset %lu\n",
(ulong) page_offset(rec));
buf_page_print(page_align(rec), 0);
}
return(page_no);
}
/******************************************************************
Releases the latches on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
buf_block_t* block, /* in: buffer block */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
mtr_t* mtr) /* in: mtr */
{
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
mtr_memo_release(mtr, block,
latch_mode == BTR_SEARCH_LEAF
? MTR_MEMO_PAGE_S_FIX
: MTR_MEMO_PAGE_X_FIX);
}

724
include/btr0cur.h Normal file
View File

@@ -0,0 +1,724 @@
/******************************************************
The index tree cursor
(c) 1994-1996 Innobase Oy
Created 10/16/1994 Heikki Tuuri
*******************************************************/
#ifndef btr0cur_h
#define btr0cur_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "page0cur.h"
#include "btr0types.h"
#include "que0types.h"
#include "row0types.h"
#include "ha0ha.h"
/* Mode flags for btr_cur operations; these can be ORed */
#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
update vector or inserted entry */
#define BTR_CUR_ADAPT
#define BTR_CUR_HASH_ADAPT
#ifdef UNIV_DEBUG
/*************************************************************
Returns the page cursor component of a tree cursor. */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
/* out: pointer to page cursor
component */
const btr_cur_t* cursor);/* in: tree cursor */
#else /* UNIV_DEBUG */
# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
#endif /* UNIV_DEBUG */
/*************************************************************
Returns the buffer block on which the tree cursor is positioned. */
UNIV_INLINE
buf_block_t*
btr_cur_get_block(
/*==============*/
/* out: pointer to buffer block */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the record pointer of a tree cursor. */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
/* out: pointer to record */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the compressed page on which the tree cursor is positioned. */
UNIV_INLINE
page_zip_des_t*
btr_cur_get_page_zip(
/*=================*/
/* out: pointer to compressed page,
or NULL if the page is not compressed */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the page of a tree cursor. */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
/* out: pointer to page */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the index of a cursor. */
UNIV_INLINE
dict_index_t*
btr_cur_get_index(
/*==============*/
/* out: index */
btr_cur_t* cursor);/* in: B-tree cursor */
/*************************************************************
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
dict_index_t* index, /* in: index */
rec_t* rec, /* in: record in tree */
buf_block_t* block, /* in: buffer block of rec */
btr_cur_t* cursor);/* in: cursor */
/************************************************************************
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
UNIV_INTERN
void
btr_cur_search_to_nth_level(
/*========================*/
dict_index_t* index, /* in: index */
ulint level, /* in: the tree level of search */
const dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be PAGE_CUR_LE,
not PAGE_CUR_GE, as the latter may end up on
the previous page of the record! Inserts
should always be made using PAGE_CUR_LE to
search the position! */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
NOTE that if has_search_latch
is != 0, we maybe do not have a latch set
on the cursor page, we assume
the caller uses his search latch
to protect the record! */
btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
mtr_t* mtr); /* in: mtr */
/*********************************************************************
Opens a cursor at either end of an index. */
UNIV_INTERN
void
btr_cur_open_at_index_side(
/*=======================*/
ibool from_left, /* in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: latch mode */
btr_cur_t* cursor, /* in: cursor */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
void
btr_cur_open_at_rnd_pos(
/*====================*/
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /* in/out: B-tree cursor */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record. */
UNIV_INTERN
ulint
btr_cur_optimistic_insert(
/*======================*/
/* out: DB_SUCCESS, DB_WAIT_LOCK,
DB_FAIL, or error number */
ulint flags, /* in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
btr_cur_t* cursor, /* in: cursor on page after which to insert;
cursor stays valid */
dtuple_t* entry, /* in/out: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
ulint n_ext, /* in: number of externally stored columns */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr; if this function returns
DB_SUCCESS on a leaf page of a secondary
index in a compressed tablespace, the
mtr must be committed before latching
any further pages */
/*****************************************************************
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist. */
UNIV_INTERN
ulint
btr_cur_pessimistic_insert(
/*=======================*/
/* out: DB_SUCCESS or error number */
ulint flags, /* in: undo logging and locking flags: if not
zero, the parameter thr should be
specified; if no undo logging is specified,
then the caller must have reserved enough
free extents in the file space so that the
insertion will certainly succeed */
btr_cur_t* cursor, /* in: cursor after which to insert;
cursor stays valid */
dtuple_t* entry, /* in/out: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
ulint n_ext, /* in: number of externally stored columns */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Updates a record when the update causes no size changes in its fields. */
UNIV_INTERN
ulint
btr_cur_update_in_place(
/*====================*/
/* out: DB_SUCCESS or error number */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
const upd_t* update, /* in: update vector */
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr; must be committed before
latching any further pages */
/*****************************************************************
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended. */
UNIV_INTERN
ulint
btr_cur_optimistic_update(
/*======================*/
/* out: DB_SUCCESS, or DB_OVERFLOW if the
updated record does not fit, DB_UNDERFLOW
if the page would become too empty, or
DB_ZIP_OVERFLOW if there is not enough
space left on the compressed page */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
const upd_t* update, /* in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr; must be committed before
latching any further pages */
/*****************************************************************
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist. */
UNIV_INTERN
ulint
btr_cur_pessimistic_update(
/*=======================*/
/* out: DB_SUCCESS or error code */
ulint flags, /* in: undo logging, locking, and rollback
flags */
btr_cur_t* cursor, /* in: cursor on the record to update */
mem_heap_t** heap, /* in/out: pointer to memory heap, or NULL */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
const upd_t* update, /* in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr; must be committed before
latching any further pages */
/***************************************************************
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
undo log record created. */
UNIV_INTERN
ulint
btr_cur_del_mark_set_clust_rec(
/*===========================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
number */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor */
ibool val, /* in: value to set */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Sets a secondary index record delete mark to TRUE or FALSE. */
UNIV_INTERN
ulint
btr_cur_del_mark_set_sec_rec(
/*=========================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
number */
ulint flags, /* in: locking flag */
btr_cur_t* cursor, /* in: cursor */
ibool val, /* in: value to set */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Sets a secondary index record delete mark to FALSE. This function is
only used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
rec_t* rec, /* in/out: record to delete unmark */
page_zip_des_t* page_zip, /* in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done! */
UNIV_INTERN
ibool
btr_cur_compress_if_useful(
/*=======================*/
/* out: TRUE if compression occurred */
btr_cur_t* cursor, /* in: cursor on the page to compress;
cursor does not stay valid if compression
occurs */
mtr_t* mtr); /* in: mtr */
/***********************************************************
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
but no latch on the whole tree. */
UNIV_INTERN
ibool
btr_cur_optimistic_delete(
/*======================*/
/* out: TRUE if success, i.e., the page
did not become too empty */
btr_cur_t* cursor, /* in: cursor on the record to delete;
cursor stays valid: if deletion succeeds,
on function exit it points to the successor
of the deleted record */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
exist. */
UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
/* out: TRUE if compression occurred */
ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
these may actually grow in size */
ibool has_reserved_extents, /* in: TRUE if the
caller has already reserved enough free
extents so that he knows that the operation
will succeed */
btr_cur_t* cursor, /* in: cursor on the record to delete;
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
ibool in_rollback,/* in: TRUE if called in rollback */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Parses a redo log record of updating a record in-place. */
UNIV_INTERN
byte*
btr_cur_parse_update_in_place(
/*==========================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index); /* in: index corresponding to page */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a clustered
index record. */
UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index); /* in: index corresponding to page */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a secondary
index record. */
UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip);/* in/out: compressed page, or NULL */
/***********************************************************************
Estimates the number of rows in a given index range. */
UNIV_INTERN
ib_int64_t
btr_estimate_n_rows_in_range(
/*=========================*/
/* out: estimated number of rows */
dict_index_t* index, /* in: index */
const dtuple_t* tuple1, /* in: range start, may also be empty tuple */
ulint mode1, /* in: search mode for range start */
const dtuple_t* tuple2, /* in: range end, may also be empty tuple */
ulint mode2); /* in: search mode for range end */
/***********************************************************************
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
UNIV_INTERN
void
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index); /* in: index */
/***********************************************************************
Marks not updated extern fields as not-owned by this record. The ownership
is transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
UNIV_INTERN
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /* in/out: record in a clustered index */
dict_index_t* index, /* in: index of the page */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
const upd_t* update, /* in: update vector */
mtr_t* mtr); /* in: mtr, or NULL if not logged */
/***********************************************************************
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
in entry, so that they are not freed in a rollback. */
UNIV_INTERN
void
btr_cur_mark_dtuple_inherited_extern(
/*=================================*/
dtuple_t* entry, /* in/out: updated entry to be
inserted to clustered index */
const upd_t* update); /* in: update vector */
/***********************************************************************
Marks all extern fields in a dtuple as owned by the record. */
UNIV_INTERN
void
btr_cur_unmark_dtuple_extern_fields(
/*================================*/
dtuple_t* entry); /* in/out: clustered index entry */
/***********************************************************************
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree. */
UNIV_INTERN
ulint
btr_store_big_rec_extern_fields(
/*============================*/
/* out: DB_SUCCESS or error */
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
buf_block_t* rec_block, /* in/out: block containing rec */
rec_t* rec, /* in: record */
const ulint* offsets, /* in: rec_get_offsets(rec, index);
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
mtr_t* local_mtr); /* in: mtr containing the latch to
rec and to the tree */
/***********************************************************************
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
UNIV_INTERN
void
btr_free_externally_stored_field(
/*=============================*/
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched; if the tree
height is 1, then also the root page
must be X-latched! (this is relevant
in the case this function is called
from purge where 'data' is located on
an undo log page, not an index
page) */
byte* field_ref, /* in/out: field reference */
const rec_t* rec, /* in: record containing field_ref, for
page_zip_write_blob_ptr(), or NULL */
const ulint* offsets, /* in: rec_get_offsets(rec, index),
or NULL */
page_zip_des_t* page_zip, /* in: compressed page corresponding
to rec, or NULL if rec == NULL */
ulint i, /* in: field number of field_ref;
ignored if rec == NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
mtr_t* local_mtr); /* in: mtr containing the latch to
data an an X-latch to the index
tree */
/***********************************************************************
Copies the prefix of an externally stored field of a record. The
clustered index record must be protected by a lock or a page latch. */
UNIV_INTERN
ulint
btr_copy_externally_stored_field_prefix(
/*====================================*/
/* out: the length of the copied field */
byte* buf, /* out: the field, or a prefix of it */
ulint len, /* in: length of buf, in bytes */
ulint zip_size,/* in: nonzero=compressed BLOB page size,
zero for uncompressed BLOBs */
const byte* data, /* in: 'internally' stored part of the
field containing also the reference to
the external part; must be protected by
a lock or a page latch */
ulint local_len);/* in: length of data, in bytes */
/***********************************************************************
Copies an externally stored field of a record to mem heap. */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
/*=================================*/
/* out: the field copied to heap */
const rec_t* rec, /* in: record in a clustered index;
must be protected by a lock or a page latch */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint zip_size,/* in: nonzero=compressed BLOB page size,
zero for uncompressed BLOBs */
ulint no, /* in: field number */
ulint* len, /* out: length of the field */
mem_heap_t* heap); /* in: mem heap */
/***********************************************************************
Flags the data tuple fields that are marked as extern storage in the
update vector. We use this function to remember which fields we must
mark as extern storage in a record inserted for an update. */
UNIV_INTERN
ulint
btr_push_update_extern_fields(
/*==========================*/
/* out: number of flagged external columns */
dtuple_t* tuple, /* in/out: data tuple */
const upd_t* update, /* in: update vector */
mem_heap_t* heap) /* in: memory heap */
__attribute__((nonnull));
/*######################################################################*/
/* In the pessimistic delete, if the page data size drops below this
limit, merging it to a neighbor is tried */
#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
/* A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
typedef struct btr_path_struct btr_path_t;
struct btr_path_struct{
ulint nth_rec; /* index of the record
where the page cursor stopped on
this level (index in alphabetical
order); value ULINT_UNDEFINED
denotes array end */
ulint n_recs; /* number of records on the page */
};
#define BTR_PATH_ARRAY_N_SLOTS 250 /* size of path array (in slots) */
/* The tree cursor: the definition appears here only for the compiler
to know struct size! */
struct btr_cur_struct {
dict_index_t* index; /* index where positioned */
page_cur_t page_cur; /* page cursor */
buf_block_t* left_block; /* this field is used to store
a pointer to the left neighbor
page, in the cases
BTR_SEARCH_PREV and
BTR_MODIFY_PREV */
/*------------------------------*/
que_thr_t* thr; /* this field is only used when
btr_cur_search_... is called for an
index entry insertion: the calling
query thread is passed here to be
used in the insert buffer */
/*------------------------------*/
/* The following fields are used in btr_cur_search... to pass
information: */
ulint flag; /* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
BTR_CUR_BINARY, or
BTR_CUR_INSERT_TO_IBUF */
ulint tree_height; /* Tree height if the search is done
for a pessimistic insert or update
operation */
ulint up_match; /* If the search mode was PAGE_CUR_LE,
the number of matched fields to the
the first user record to the right of
the cursor record after
btr_cur_search_...;
for the mode PAGE_CUR_GE, the matched
fields to the first user record AT THE
CURSOR or to the right of it;
NOTE that the up_match and low_match
values may exceed the correct values
for comparison to the adjacent user
record if that record is on a
different leaf page! (See the note in
row_ins_duplicate_key.) */
ulint up_bytes; /* number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint low_match; /* if search mode was PAGE_CUR_LE,
the number of matched fields to the
first user record AT THE CURSOR or
to the left of it after
btr_cur_search_...;
NOT defined for PAGE_CUR_GE or any
other search modes; see also the NOTE
in up_match! */
ulint low_bytes; /* number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint n_fields; /* prefix length used in a hash
search if hash_node != NULL */
ulint n_bytes; /* hash prefix bytes if hash_node !=
NULL */
ulint fold; /* fold value used in the search if
flag is BTR_CUR_HASH */
/*------------------------------*/
btr_path_t* path_arr; /* in estimating the number of
rows in range, we store in this array
information of the path through
the tree */
};
/* Values for the flag documenting the used search method */
#define BTR_CUR_HASH 1 /* successful shortcut using the hash
index */
#define BTR_CUR_HASH_FAIL 2 /* failure using hash, success using
binary search: the misleading hash
reference is stored in the field
hash_node, and might be necessary to
update */
#define BTR_CUR_BINARY 3 /* success using the binary search */
#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to
the insert buffer */
/* If pessimistic delete fails because of lack of file space,
there is still a good change of success a little later: try this many times,
and sleep this many microseconds in between */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
#define BTR_CUR_RETRY_SLEEP_TIME 50000
/* The reference in a field for which data is stored on a different page.
The reference is at the end of the 'locally' stored part of the field.
'Locally' means storage in the index record.
We store locally a long enough prefix of each column so that we can determine
the ordering parts of each index record without looking into the externally
stored part. */
/*--------------------------------------*/
#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header
on that page */
#define BTR_EXTERN_LEN 12 /* 8 bytes containing the
length of the externally
stored part of the BLOB.
The 2 highest bits are
reserved to the flags below. */
/*--------------------------------------*/
/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
at lowest address) is set to 1 if this field does not 'own' the externally
stored field; only the owner field is allowed to free the field in purge!
If the 2nd highest bit is 1 then it means that the externally stored field
was inherited from an earlier version of the row. In rollback we are not
allowed to free an inherited external field. */
#define BTR_EXTERN_OWNER_FLAG 128
#define BTR_EXTERN_INHERITED_FLAG 64
extern ulint btr_cur_n_non_sea;
extern ulint btr_cur_n_sea;
extern ulint btr_cur_n_non_sea_old;
extern ulint btr_cur_n_sea_old;
#ifndef UNIV_NONINL
#include "btr0cur.ic"
#endif
#endif

184
include/btr0cur.ic Normal file
View File

@@ -0,0 +1,184 @@
/******************************************************
The index tree cursor
(c) 1994-1996 Innobase Oy
Created 10/16/1994 Heikki Tuuri
*******************************************************/
#include "btr0btr.h"
#ifdef UNIV_DEBUG
/*************************************************************
Returns the page cursor component of a tree cursor. */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
/* out: pointer to page cursor
component */
const btr_cur_t* cursor) /* in: tree cursor */
{
return(&((btr_cur_t*) cursor)->page_cur);
}
#endif /* UNIV_DEBUG */
/*************************************************************
Returns the buffer block on which the tree cursor is positioned. */
UNIV_INLINE
buf_block_t*
btr_cur_get_block(
/*==============*/
/* out: pointer to buffer block */
btr_cur_t* cursor) /* in: tree cursor */
{
return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
}
/*************************************************************
Returns the record pointer of a tree cursor. */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
/* out: pointer to record */
btr_cur_t* cursor) /* in: tree cursor */
{
return(page_cur_get_rec(&(cursor->page_cur)));
}
/*************************************************************
Returns the compressed page on which the tree cursor is positioned. */
UNIV_INLINE
page_zip_des_t*
btr_cur_get_page_zip(
/*=================*/
/* out: pointer to compressed page,
or NULL if the page is not compressed */
btr_cur_t* cursor) /* in: tree cursor */
{
return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
}
/*************************************************************
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
btr_cur_t* cursor) /* in: tree cursor */
{
page_cur_invalidate(&(cursor->page_cur));
}
/*************************************************************
Returns the page of a tree cursor. */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
/* out: pointer to page */
btr_cur_t* cursor) /* in: tree cursor */
{
return(page_align(page_cur_get_rec(&(cursor->page_cur))));
}
/*************************************************************
Returns the index of a cursor. */
UNIV_INLINE
dict_index_t*
btr_cur_get_index(
/*==============*/
/* out: index */
btr_cur_t* cursor) /* in: B-tree cursor */
{
return(cursor->index);
}
/*************************************************************
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
dict_index_t* index, /* in: index */
rec_t* rec, /* in: record in tree */
buf_block_t* block, /* in: buffer block of rec */
btr_cur_t* cursor) /* out: cursor */
{
ut_ad(page_align(rec) == block->frame);
page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
cursor->index = index;
}
/*************************************************************************
Checks if compressing an index page where a btr cursor is placed makes
sense. */
UNIV_INLINE
ibool
btr_cur_compress_recommendation(
/*============================*/
/* out: TRUE if compression is recommended */
btr_cur_t* cursor, /* in: btr cursor */
mtr_t* mtr) /* in: mtr */
{
page_t* page;
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
/* The page fillfactor has dropped below a predefined
minimum value OR the level in the B-tree contains just
one page: we recommend compression if this is not the
root page. */
return(dict_index_get_page(cursor->index)
!= page_get_page_no(page));
}
return(FALSE);
}
/*************************************************************************
Checks if the record on which the cursor is placed can be deleted without
making tree compression necessary (or, recommended). */
UNIV_INLINE
ibool
btr_cur_can_delete_without_compress(
/*================================*/
/* out: TRUE if can be deleted without
recommended compression */
btr_cur_t* cursor, /* in: btr cursor */
ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
mtr_t* mtr) /* in: mtr */
{
page_t* page;
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))
|| (page_get_n_recs(page) < 2)) {
/* The page fillfactor will drop below a predefined
minimum value, OR the level in the B-tree contains just
one page, OR the page will become empty: we recommend
compression if this is not the root page. */
return(dict_index_get_page(cursor->index)
== page_get_page_no(page));
}
return(TRUE);
}

529
include/btr0pcur.h Normal file
View File

@@ -0,0 +1,529 @@
/******************************************************
The index tree persistent cursor
(c) 1996 Innobase Oy
Created 2/23/1996 Heikki Tuuri
*******************************************************/
#ifndef btr0pcur_h
#define btr0pcur_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "mtr0mtr.h"
#include "page0cur.h"
#include "btr0cur.h"
#include "btr0btr.h"
#include "btr0types.h"
/* Relative positions for a stored cursor position */
#define BTR_PCUR_ON 1
#define BTR_PCUR_BEFORE 2
#define BTR_PCUR_AFTER 3
/* Note that if the tree is not empty, btr_pcur_store_position does not
use the following, but only uses the above three alternatives, where the
position is stored relative to a specific record: this makes implementation
of a scroll cursor easier */
#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
/******************************************************************
Allocates memory for a persistent cursor object and initializes the cursor. */
UNIV_INTERN
btr_pcur_t*
btr_pcur_create_for_mysql(void);
/*============================*/
/* out, own: persistent cursor */
/******************************************************************
Frees the memory for a persistent cursor object. */
UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor); /* in, own: persistent cursor */
/******************************************************************
Copies the stored position of a pcur to another pcur. */
UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
btr_pcur_t* pcur_receive, /* in: pcur which will receive the
position info */
btr_pcur_t* pcur_donate); /* in: pcur from which the info is
copied */
/******************************************************************
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
btr_pcur_t* pcur); /* in: persistent cursor */
/******************************************************************
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
dict_index_t* index, /* in: index */
const dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
dict_index_t* index, /* in: index */
const dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
mtr_t* mtr); /* in: mtr */
/*********************************************************************
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
ibool from_left, /* in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: latch mode */
btr_pcur_t* pcur, /* in: cursor */
ibool do_init, /* in: TRUE if should be initialized */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets the up_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_GE,
otherwise undefined */
btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
/******************************************************************
Gets the low_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_LE,
otherwise undefined */
btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
/******************************************************************
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
btr_pcur_open_on_user_rec(
/*======================*/
dict_index_t* index, /* in: index */
const dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ... */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /* in: memory buffer for persistent
cursor */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in/out: B-tree pcur */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees the possible old_rec_buf buffer of a persistent cursor and sets the
latch mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
btr_pcur_t* cursor); /* in: persistent cursor */
/******************************************************************
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
to the last record LESS OR EQUAL to the stored record;
(2) cursor was positioned on a page infimum record: restores the position to
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
restores to before first or after the last in the tree. */
UNIV_INTERN
ibool
btr_pcur_restore_position(
/*======================*/
/* out: TRUE if the cursor position
was stored when it was on a user record
and it can be restored on a user record
whose ordering fields are identical to
the ones of the original user record */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: detached persistent cursor */
mtr_t* mtr); /* in: mtr */
/******************************************************************
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
UNIV_INTERN
void
btr_pcur_release_leaf(
/*==================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Gets the rel_pos field for a cursor whose position has been stored. */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
/* out: BTR_PCUR_ON, ... */
const btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in, own: mtr */
/*************************************************************
Gets the mtr field for a pcur. */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
/* out: mtr */
btr_pcur_t* cursor); /* in: persistent cursor */
/******************************************************************
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit(
/*============*/
btr_pcur_t* pcur); /* in: persistent cursor */
/******************************************************************
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr to commit */
/******************************************************************
Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
/* out: TRUE if detached */
btr_pcur_t* pcur); /* in: persistent cursor */
/*************************************************************
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
/* out: TRUE if the cursor was not after last
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'. */
UNIV_INTERN
ibool
btr_pcur_move_to_prev(
/*==================*/
/* out: TRUE if the cursor was not before first
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
/* out: TRUE if the cursor moved forward,
ending on a user record */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the first record on the next page.
Releases the latch on the current page, and bufferunfixes it.
Note that there must not be modifications on the current page,
as then the x-latch can be released only in mtr_commit. */
UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
btr_pcur_t* cursor, /* in: persistent cursor; must be on the
last record of the current page */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor backward if it is on the first record
of the page. Releases the latch on the current page, and bufferunfixes
it. Note that to prevent a possible deadlock, the operation first
stores the position of the cursor, releases the leaf latch, acquires
necessary latches and restores the cursor position again before returning.
The alphabetical position of the cursor is guaranteed to be sensible
on return, but it may happen that the cursor is not positioned on the
last record of any page, because the structure of the tree may have
changed while the cursor had no latches. */
UNIV_INTERN
void
btr_pcur_move_backward_from_page(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor, must be on the
first record of the current page */
mtr_t* mtr); /* in: mtr */
#ifdef UNIV_DEBUG
/*************************************************************
Returns the btr cursor component of a persistent cursor. */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
/* out: pointer to
btr cursor component */
const btr_pcur_t* cursor); /* in: persistent cursor */
/*************************************************************
Returns the page cursor component of a persistent cursor. */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
/* out: pointer to
page cursor component */
const btr_pcur_t* cursor); /* in: persistent cursor */
#else /* UNIV_DEBUG */
# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
#endif /* UNIV_DEBUG */
/*************************************************************
Returns the page of a persistent cursor. */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
/* out: pointer to the page */
btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Returns the buffer block of a persistent cursor. */
UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
/*===============*/
/* out: pointer to the block */
btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Returns the record of a persistent cursor. */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
/* out: pointer to the record */
btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
const btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
const btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
const btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
btr_pcur_t* cursor);/* in/out: persistent cursor */
/*************************************************************
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
btr_pcur_t* cursor);/* in/out: persistent cursor */
/* The persistent B-tree cursor structure. This is used mainly for SQL
selects, updates, and deletes. */
struct btr_pcur_struct{
btr_cur_t btr_cur; /* a B-tree cursor */
ulint latch_mode; /* see TODO note below!
BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
BTR_MODIFY_TREE, or BTR_NO_LATCHES,
depending on the latching state of
the page and tree where the cursor is
positioned; the last value means that
the cursor is not currently positioned:
we say then that the cursor is
detached; it can be restored to
attached if the old position was
stored in old_rec */
ulint old_stored; /* BTR_PCUR_OLD_STORED
or BTR_PCUR_OLD_NOT_STORED */
rec_t* old_rec; /* if cursor position is stored,
contains an initial segment of the
latest record cursor was positioned
either on, before, or after */
ulint old_n_fields; /* number of fields in old_rec */
ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the
old_rec record */
buf_block_t* block_when_stored;/* buffer block when the position was
stored */
ib_uint64_t modify_clock; /* the modify clock value of the
buffer block when the cursor position
was stored */
ulint pos_state; /* see TODO note below!
BTR_PCUR_IS_POSITIONED,
BTR_PCUR_WAS_POSITIONED,
BTR_PCUR_NOT_POSITIONED */
ulint search_mode; /* PAGE_CUR_G, ... */
trx_t* trx_if_known; /* the transaction, if we know it;
otherwise this field is not defined;
can ONLY BE USED in error prints in
fatal assertion failures! */
/*-----------------------------*/
/* NOTE that the following fields may possess dynamically allocated
memory which should be freed if not needed anymore! */
mtr_t* mtr; /* NULL, or this field may contain
a mini-transaction which holds the
latch on the cursor page */
byte* old_rec_buf; /* NULL, or a dynamically allocated
buffer for old_rec */
ulint buf_size; /* old_rec_buf size if old_rec_buf
is not NULL */
};
#define BTR_PCUR_IS_POSITIONED 1997660512 /* TODO: currently, the state
can be BTR_PCUR_IS_POSITIONED,
though it really should be
BTR_PCUR_WAS_POSITIONED,
because we have no obligation
to commit the cursor with
mtr; similarly latch_mode may
be out of date. This can
lead to problems if btr_pcur
is not used the right way;
all current code should be
ok. */
#define BTR_PCUR_WAS_POSITIONED 1187549791
#define BTR_PCUR_NOT_POSITIONED 1328997689
#define BTR_PCUR_OLD_STORED 908467085
#define BTR_PCUR_OLD_NOT_STORED 122766467
#ifndef UNIV_NONINL
#include "btr0pcur.ic"
#endif
#endif

640
include/btr0pcur.ic Normal file
View File

@@ -0,0 +1,640 @@
/******************************************************
The index tree persistent cursor
(c) 1996 Innobase Oy
Created 2/23/1996 Heikki Tuuri
*******************************************************/
/*************************************************************
Gets the rel_pos field for a cursor whose position has been stored. */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
/* out: BTR_PCUR_ON, ... */
const btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor);
ut_ad(cursor->old_rec);
ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
|| cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(cursor->rel_pos);
}
/*************************************************************
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in, own: mtr */
{
ut_ad(cursor);
cursor->mtr = mtr;
}
/*************************************************************
Gets the mtr field for a pcur. */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
/* out: mtr */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor);
return(cursor->mtr);
}
#ifdef UNIV_DEBUG
/*************************************************************
Returns the btr cursor component of a persistent cursor. */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
/* out: pointer to
btr cursor component */
const btr_pcur_t* cursor) /* in: persistent cursor */
{
const btr_cur_t* btr_cur = &cursor->btr_cur;
return((btr_cur_t*) btr_cur);
}
/*************************************************************
Returns the page cursor component of a persistent cursor. */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
/* out: pointer to page cursor
component */
const btr_pcur_t* cursor) /* in: persistent cursor */
{
return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
}
#endif /* UNIV_DEBUG */
/*************************************************************
Returns the page of a persistent cursor. */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
/* out: pointer to the page */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
}
/*************************************************************
Returns the buffer block of a persistent cursor. */
UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
/*===============*/
/* out: pointer to the block */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
}
/*************************************************************
Returns the record of a persistent cursor. */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
/* out: pointer to the record */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
}
/******************************************************************
Gets the up_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_GE,
otherwise undefined */
btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
btr_cursor = btr_pcur_get_btr_cur(cursor);
ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
return(btr_cursor->up_match);
}
/******************************************************************
Gets the low_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_LE,
otherwise undefined */
btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
btr_cursor = btr_pcur_get_btr_cur(cursor);
ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
return(btr_cursor->low_match);
}
/*************************************************************
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
const btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
const btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
const btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_pcur_is_before_first_on_page(cursor)
|| btr_pcur_is_after_last_on_page(cursor)) {
return(FALSE);
}
return(TRUE);
}
/*************************************************************
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
return(FALSE);
}
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
return(FALSE);
}
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
btr_pcur_t* cursor) /* in/out: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
btr_pcur_t* cursor) /* in/out: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_set_after_last(btr_pcur_get_block(cursor),
btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
/* out: TRUE if the cursor moved forward,
ending on a user record */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
loop:
if (btr_pcur_is_after_last_on_page(cursor)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_to_next_page(cursor, mtr);
} else {
btr_pcur_move_to_next_on_page(cursor);
}
if (btr_pcur_is_on_user_rec(cursor)) {
return(TRUE);
}
goto loop;
}
/*************************************************************
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
/* out: TRUE if the cursor was not after last
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
if (btr_pcur_is_after_last_on_page(cursor)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_to_next_page(cursor, mtr);
return(TRUE);
}
btr_pcur_move_to_next_on_page(cursor);
return(TRUE);
}
/******************************************************************
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit(
/*============*/
btr_pcur_t* pcur) /* in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
mtr_commit(pcur->mtr);
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/******************************************************************
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr to commit */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
mtr_commit(mtr);
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/******************************************************************
Sets the pcur latch mode to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_detach(
/*============*/
btr_pcur_t* pcur) /* in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/******************************************************************
Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
/* out: TRUE if detached */
btr_pcur_t* pcur) /* in: persistent cursor */
{
if (pcur->latch_mode == BTR_NO_LATCHES) {
return(TRUE);
}
return(FALSE);
}
/******************************************************************
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
btr_pcur_t* pcur) /* in: persistent cursor */
{
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
pcur->old_rec_buf = NULL;
pcur->old_rec = NULL;
}
/******************************************************************
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
dict_index_t* index, /* in: index */
const dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
mtr_t* mtr) /* in: mtr */
{
btr_cur_t* btr_cursor;
/* Initialize the cursor */
btr_pcur_init(cursor);
cursor->latch_mode = latch_mode;
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, 0, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known = NULL;
}
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
dict_index_t* index, /* in: index */
const dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
mtr_t* mtr) /* in: mtr */
{
btr_cur_t* btr_cursor;
cursor->latch_mode = latch_mode;
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, has_search_latch, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->trx_if_known = NULL;
}
/*********************************************************************
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
ibool from_left, /* in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: latch mode */
btr_pcur_t* pcur, /* in: cursor */
ibool do_init, /* in: TRUE if should be initialized */
mtr_t* mtr) /* in: mtr */
{
pcur->latch_mode = latch_mode;
if (from_left) {
pcur->search_mode = PAGE_CUR_G;
} else {
pcur->search_mode = PAGE_CUR_L;
}
if (do_init) {
btr_pcur_init(pcur);
}
btr_cur_open_at_index_side(from_left, index, latch_mode,
btr_pcur_get_btr_cur(pcur), mtr);
pcur->pos_state = BTR_PCUR_IS_POSITIONED;
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
pcur->trx_if_known = NULL;
}
/**************************************************************************
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in/out: B-tree pcur */
mtr_t* mtr) /* in: mtr */
{
/* Initialize the cursor */
cursor->latch_mode = latch_mode;
cursor->search_mode = PAGE_CUR_G;
btr_pcur_init(cursor);
btr_cur_open_at_rnd_pos(index, latch_mode,
btr_pcur_get_btr_cur(cursor), mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->trx_if_known = NULL;
}
/******************************************************************
Frees the possible memory heap of a persistent cursor and sets the latch
mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
btr_pcur_t* cursor) /* in: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
mem_free(cursor->old_rec_buf);
cursor->old_rec = NULL;
cursor->old_rec_buf = NULL;
}
cursor->btr_cur.page_cur.rec = NULL;
cursor->btr_cur.page_cur.block = NULL;
cursor->old_rec = NULL;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
cursor->trx_if_known = NULL;
}

265
include/btr0sea.h Normal file
View File

@@ -0,0 +1,265 @@
/************************************************************************
The index tree adaptive search
(c) 1996 Innobase Oy
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#ifndef btr0sea_h
#define btr0sea_h
#include "univ.i"
#include "rem0rec.h"
#include "dict0dict.h"
#include "btr0types.h"
#include "mtr0mtr.h"
#include "ha0ha.h"
/*********************************************************************
Creates and initializes the adaptive search system at a database start. */
UNIV_INTERN
void
btr_search_sys_create(
/*==================*/
ulint hash_size); /* in: hash index hash table size */
/************************************************************************
Disable the adaptive hash search system and empty the index. */
UNIV_INTERN
void
btr_search_disable(void);
/*====================*/
/************************************************************************
Enable the adaptive hash search system. */
UNIV_INTERN
void
btr_search_enable(void);
/*====================*/
/************************************************************************
Returns search info for an index. */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
/* out: search info; search mutex reserved */
dict_index_t* index); /* in: index */
/*********************************************************************
Creates and initializes a search info struct. */
UNIV_INTERN
btr_search_t*
btr_search_info_create(
/*===================*/
/* out, own: search info struct */
mem_heap_t* heap); /* in: heap where created */
/*************************************************************************
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
dict_index_t* index, /* in: index of the cursor */
btr_cur_t* cursor);/* in: cursor which was just positioned */
/**********************************************************************
Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
both have sensible values. */
UNIV_INTERN
ibool
btr_search_guess_on_hash(
/*=====================*/
/* out: TRUE if succeeded */
dict_index_t* index, /* in: index */
btr_search_t* info, /* in: index search info */
const dtuple_t* tuple, /* in: logical record */
ulint mode, /* in: PAGE_CUR_L, ... */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /* out: tree cursor */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, RW_X_LATCH, or 0 */
mtr_t* mtr); /* in: mtr */
/************************************************************************
Moves or deletes hash entries for moved records. If new_page is already hashed,
then the hash index for page, if any, is dropped. If new_page is not hashed,
and page is hashed, then a new hash index is built to new_page with the same
parameters as page (this often happens when a page is split). */
UNIV_INTERN
void
btr_search_move_or_delete_hash_entries(
/*===================================*/
buf_block_t* new_block, /* in: records are copied
to this page */
buf_block_t* block, /* in: index page from which
records were copied, and the
copied records will be deleted
from this page */
dict_index_t* index); /* in: record descriptor */
/************************************************************************
Drops a page hash index. */
UNIV_INTERN
void
btr_search_drop_page_hash_index(
/*============================*/
buf_block_t* block); /* in: block containing index page,
s- or x-latched, or an index page
for which we know that
block->buf_fix_count == 0 */
/************************************************************************
Drops a page hash index when a page is freed from a fseg to the file system.
Drops possible hash index if the page happens to be in the buffer pool. */
UNIV_INTERN
void
btr_search_drop_page_hash_when_freed(
/*=================================*/
ulint space, /* in: space id */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no); /* in: page number */
/************************************************************************
Updates the page hash index when a single record is inserted on a page. */
UNIV_INTERN
void
btr_search_update_hash_node_on_insert(
/*==================================*/
btr_cur_t* cursor);/* in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
/************************************************************************
Updates the page hash index when a single record is inserted on a page. */
UNIV_INTERN
void
btr_search_update_hash_on_insert(
/*=============================*/
btr_cur_t* cursor);/* in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
/************************************************************************
Updates the page hash index when a single record is deleted from a page. */
UNIV_INTERN
void
btr_search_update_hash_on_delete(
/*=============================*/
btr_cur_t* cursor);/* in: cursor which was positioned on the
record to delete using btr_cur_search_...,
the record is not yet deleted */
/************************************************************************
Validates the search system. */
UNIV_INTERN
ibool
btr_search_validate(void);
/*======================*/
/* out: TRUE if ok */
/* Flag: has the search system been disabled? */
extern ibool btr_search_disabled;
/* The search info struct in an index */
struct btr_search_struct{
/* The following fields are not protected by any latch.
Unfortunately, this means that they must be aligned to
the machine word, i.e., they cannot be turned into bit-fields. */
buf_block_t* root_guess;/* the root page frame when it was last time
fetched, or NULL */
ulint hash_analysis; /* when this exceeds BTR_SEARCH_HASH_ANALYSIS,
the hash analysis starts; this is reset if no
success noticed */
ibool last_hash_succ; /* TRUE if the last search would have
succeeded, or did succeed, using the hash
index; NOTE that the value here is not exact:
it is not calculated for every search, and the
calculation itself is not always accurate! */
ulint n_hash_potential;
/* number of consecutive searches
which would have succeeded, or did succeed,
using the hash index;
the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
/*----------------------*/
ulint n_fields; /* recommended prefix length for hash search:
number of full fields */
ulint n_bytes; /* recommended prefix: number of bytes in
an incomplete field;
see also BTR_PAGE_MAX_REC_SIZE */
ibool left_side; /* TRUE or FALSE, depending on whether
the leftmost record of several records with
the same prefix should be indexed in the
hash index */
/*----------------------*/
#ifdef UNIV_SEARCH_PERF_STAT
ulint n_hash_succ; /* number of successful hash searches thus
far */
ulint n_hash_fail; /* number of failed hash searches */
ulint n_patt_succ; /* number of successful pattern searches thus
far */
ulint n_searches; /* number of searches */
#endif /* UNIV_SEARCH_PERF_STAT */
#ifdef UNIV_DEBUG
ulint magic_n; /* magic number */
# define BTR_SEARCH_MAGIC_N 1112765
#endif /* UNIV_DEBUG */
};
/* The hash index system */
typedef struct btr_search_sys_struct btr_search_sys_t;
struct btr_search_sys_struct{
hash_table_t* hash_index;
};
extern btr_search_sys_t* btr_search_sys;
/* The latch protecting the adaptive search system: this latch protects the
(1) hash index;
(2) columns of a record to which we have a pointer in the hash index;
but does NOT protect:
(3) next record offset field in a record;
(4) next or previous records on the same page.
Bear in mind (3) and (4) when using the hash index.
*/
extern rw_lock_t* btr_search_latch_temp;
#define btr_search_latch (*btr_search_latch_temp)
#ifdef UNIV_SEARCH_PERF_STAT
extern ulint btr_search_n_succ;
extern ulint btr_search_n_hash_fail;
#endif /* UNIV_SEARCH_PERF_STAT */
/* After change in n_fields or n_bytes in info, this many rounds are waited
before starting the hash analysis again: this is to save CPU time when there
is no hope in building a hash index. */
#define BTR_SEARCH_HASH_ANALYSIS 17
/* Limit of consecutive searches for trying a search shortcut on the search
pattern */
#define BTR_SEARCH_ON_PATTERN_LIMIT 3
/* Limit of consecutive searches for trying a search shortcut using the hash
index */
#define BTR_SEARCH_ON_HASH_LIMIT 3
/* We do this many searches before trying to keep the search latch over calls
from MySQL. If we notice someone waiting for the latch, we again set this
much timeout. This is to reduce contention. */
#define BTR_SEA_TIMEOUT 10000
#ifndef UNIV_NONINL
#include "btr0sea.ic"
#endif
#endif

67
include/btr0sea.ic Normal file
View File

@@ -0,0 +1,67 @@
/************************************************************************
The index tree adaptive search
(c) 1996 Innobase Oy
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#include "dict0mem.h"
#include "btr0cur.h"
#include "buf0buf.h"
/*************************************************************************
Updates the search info. */
UNIV_INTERN
void
btr_search_info_update_slow(
/*========================*/
btr_search_t* info, /* in/out: search info */
btr_cur_t* cursor);/* in: cursor which was just positioned */
/************************************************************************
Returns search info for an index. */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
/* out: search info; search mutex reserved */
dict_index_t* index) /* in: index */
{
ut_ad(index);
return(index->search_info);
}
/*************************************************************************
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
dict_index_t* index, /* in: index of the cursor */
btr_cur_t* cursor) /* in: cursor which was just positioned */
{
btr_search_t* info;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
info = btr_search_get_info(index);
info->hash_analysis++;
if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
/* Do nothing */
return;
}
ut_ad(cursor->flag != BTR_CUR_HASH);
btr_search_info_update_slow(info, cursor);
}

31
include/btr0types.h Normal file
View File

@@ -0,0 +1,31 @@
/************************************************************************
The index tree general types
(c) 1996 Innobase Oy
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#ifndef btr0types_h
#define btr0types_h
#include "univ.i"
#include "rem0types.h"
#include "page0types.h"
typedef struct btr_pcur_struct btr_pcur_t;
typedef struct btr_cur_struct btr_cur_t;
typedef struct btr_search_struct btr_search_t;
/* The size of a reference to data stored on a different page.
The reference is stored at the end of the prefix of the field
in the index record. */
#define BTR_EXTERN_FIELD_REF_SIZE 20
/* A BLOB field reference full of zero, for use in assertions and tests.
Initially, BLOB field references are set to zero, in
dtuple_convert_big_rec(). */
extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
#endif

73
include/buf0buddy.h Normal file
View File

@@ -0,0 +1,73 @@
/******************************************************
Binary buddy allocator for compressed pages
(c) 2006 Innobase Oy
Created December 2006 by Marko Makela
*******************************************************/
#ifndef buf0buddy_h
#define buf0buddy_h
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "univ.i"
#include "buf0types.h"
/**************************************************************************
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex. */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
/* out: allocated block,
possibly NULL if lru == NULL */
ulint size, /* in: block size, up to UNIV_PAGE_SIZE */
ibool* lru) /* in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
/**************************************************************************
Release a block. */
UNIV_INLINE
void
buf_buddy_free(
/*===========*/
void* buf, /* in: block to be freed, must not be
pointed to by the buffer pool */
ulint size) /* in: block size, up to UNIV_PAGE_SIZE */
__attribute__((nonnull));
/** Statistics of buddy blocks of a given size. */
struct buf_buddy_stat_struct {
/** Number of blocks allocated from the buddy system. */
ulint used;
/** Number of blocks relocated by the buddy system. */
ib_uint64_t relocated;
/** Total duration of block relocations, in microseconds. */
ib_uint64_t relocated_usec;
};
typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
/** Statistics of the buddy system, indexed by block size.
Protected by buf_pool_mutex. */
extern buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
#ifndef UNIV_NONINL
# include "buf0buddy.ic"
#endif
#endif /* buf0buddy_h */

112
include/buf0buddy.ic Normal file
View File

@@ -0,0 +1,112 @@
/******************************************************
Binary buddy allocator for compressed pages
(c) 2006 Innobase Oy
Created December 2006 by Marko Makela
*******************************************************/
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "buf0buf.h"
#include "buf0buddy.h"
#include "ut0ut.h"
#include "sync0sync.h"
/**************************************************************************
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL. */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
/* out: allocated block,
possibly NULL if lru==NULL */
ulint i, /* in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
ibool* lru) /* in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
/**************************************************************************
Deallocate a block. */
UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
void* buf, /* in: block to be freed, must not be
pointed to by the buffer pool */
ulint i) /* in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
__attribute__((nonnull));
/**************************************************************************
Get the index of buf_pool->zip_free[] for a given block size. */
UNIV_INLINE
ulint
buf_buddy_get_slot(
/*===============*/
/* out: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
ulint size) /* in: block size */
{
ulint i;
ulint s;
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1);
ut_ad(i <= BUF_BUDDY_SIZES);
return(i);
}
/**************************************************************************
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex. */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
/* out: allocated block,
possibly NULL if lru == NULL */
ulint size, /* in: block size, up to UNIV_PAGE_SIZE */
ibool* lru) /* in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
{
ut_ad(buf_pool_mutex_own());
return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
}
/**************************************************************************
Deallocate a block. */
UNIV_INLINE
void
buf_buddy_free(
/*===========*/
void* buf, /* in: block to be freed, must not be
pointed to by the buffer pool */
ulint size) /* in: block size, up to UNIV_PAGE_SIZE */
{
ut_ad(buf_pool_mutex_own());
buf_buddy_free_low(buf, buf_buddy_get_slot(size));
}
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif

1397
include/buf0buf.h Normal file

File diff suppressed because it is too large Load Diff

1058
include/buf0buf.ic Normal file

File diff suppressed because it is too large Load Diff

141
include/buf0flu.h Normal file
View File

@@ -0,0 +1,141 @@
/******************************************************
The database buffer pool flush algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0flu_h
#define buf0flu_h
#include "univ.i"
#include "buf0types.h"
#include "ut0byte.h"
#include "mtr0types.h"
/************************************************************************
Inserts a modified block into the flush list. */
UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
buf_page_t* bpage); /* in: block which is modified */
/************************************************************************
Remove a block from the flush list of modified blocks. */
UNIV_INTERN
void
buf_flush_remove(
/*=============*/
buf_page_t* bpage); /* in: pointer to the block in question */
/************************************************************************
Updates the flush system data structures when a write is completed. */
UNIV_INTERN
void
buf_flush_write_complete(
/*=====================*/
buf_page_t* bpage); /* in: pointer to the block in question */
/*************************************************************************
Flushes pages from the end of the LRU list if there is too small
a margin of replaceable pages there. */
UNIV_INTERN
void
buf_flush_free_margin(void);
/*=======================*/
/************************************************************************
Initializes a page for writing to the tablespace. */
UNIV_INTERN
void
buf_flush_init_for_writing(
/*=======================*/
byte* page, /* in/out: page */
void* page_zip_, /* in/out: compressed page, or NULL */
ib_uint64_t newest_lsn); /* in: newest modification lsn
to the page */
/***********************************************************************
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages! */
UNIV_INTERN
ulint
buf_flush_batch(
/*============*/
/* out: number of blocks for which the
write request was queued;
ULINT_UNDEFINED if there was a flush
of the same type already running */
enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
then the caller must not own any
latches on pages */
ulint min_n, /* in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
ib_uint64_t lsn_limit); /* in the case BUF_FLUSH_LIST all
blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
/**********************************************************************
Waits until a flush batch of the given type ends */
UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
enum buf_flush type); /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
/************************************************************************
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it not
already in it. */
UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
buf_block_t* block, /* in: block which is modified */
mtr_t* mtr); /* in: mtr */
/************************************************************************
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /* in: block which is modified */
ib_uint64_t start_lsn, /* in: start lsn of the first mtr in a
set of mtr's */
ib_uint64_t end_lsn); /* in: end lsn of the last mtr in the
set of mtr's */
/************************************************************************
Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed. */
UNIV_INTERN
ibool
buf_flush_ready_for_replace(
/*========================*/
/* out: TRUE if can replace immediately */
buf_page_t* bpage); /* in: buffer control block, must be
buf_page_in_file(bpage) and in the LRU list */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************
Validates the flush list. */
UNIV_INTERN
ibool
buf_flush_validate(void);
/*====================*/
/* out: TRUE if ok */
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/* When buf_flush_free_margin is called, it tries to make this many blocks
available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single
sweep). */
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
#ifndef UNIV_NONINL
#include "buf0flu.ic"
#endif
#endif

97
include/buf0flu.ic Normal file
View File

@@ -0,0 +1,97 @@
/******************************************************
The database buffer pool flush algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0buf.h"
#include "mtr0mtr.h"
/************************************************************************
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_page_t* bpage); /* in: block which is modified */
/************************************************************************
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not
already in it. */
UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
buf_block_t* block, /* in: block which is modified */
mtr_t* mtr) /* in: mtr */
{
ut_ad(block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_pool_mutex_own());
ut_ad(mtr->start_lsn != 0);
ut_ad(mtr->modifications);
ut_ad(block->page.newest_modification <= mtr->end_lsn);
block->page.newest_modification = mtr->end_lsn;
if (!block->page.oldest_modification) {
block->page.oldest_modification = mtr->start_lsn;
ut_ad(block->page.oldest_modification != 0);
buf_flush_insert_into_flush_list(&block->page);
} else {
ut_ad(block->page.oldest_modification <= mtr->start_lsn);
}
++srv_buf_pool_write_requests;
}
/************************************************************************
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /* in: block which is modified */
ib_uint64_t start_lsn, /* in: start lsn of the first mtr in a
set of mtr's */
ib_uint64_t end_lsn) /* in: end lsn of the last mtr in the
set of mtr's */
{
ut_ad(block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
buf_pool_mutex_enter();
ut_ad(block->page.newest_modification <= end_lsn);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
block->page.oldest_modification = start_lsn;
ut_ad(block->page.oldest_modification != 0);
buf_flush_insert_sorted_into_flush_list(&block->page);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
buf_pool_mutex_exit();
}

243
include/buf0lru.h Normal file
View File

@@ -0,0 +1,243 @@
/******************************************************
The database buffer pool LRU replacement algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0lru_h
#define buf0lru_h
#include "univ.i"
#include "ut0byte.h"
#include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/**********************************************************************
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
operation, as flushed pages from non-unique non-clustered indexes are here
taken out of the buffer pool, and their inserts redirected to the insert
buffer. Otherwise, the flushed blocks could get modified again before read
operations need new buffer blocks, and the i/o work done in flushing would be
wasted. */
UNIV_INTERN
void
buf_LRU_try_free_flushed_blocks(void);
/*==================================*/
/**********************************************************************
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
pool for their locks. */
UNIV_INTERN
ibool
buf_LRU_buf_pool_running_out(void);
/*==============================*/
/* out: TRUE if less than 25 % of buffer pool
left */
/*#######################################################################
These are low-level functions
#########################################################################*/
/* Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 80
#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
/**********************************************************************
Invalidates all pages belonging to a given tablespace when we are deleting
the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
what guarantees that it will not try to read in pages after this operation has
completed? */
UNIV_INTERN
void
buf_LRU_invalidate_tablespace(
/*==========================*/
ulint id); /* in: space id */
/**********************************************************************
Gets the minimum LRU_position field for the blocks in an initial segment
(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
guaranteed to be precise, because the ulint_clock may wrap around. */
UNIV_INTERN
ulint
buf_LRU_get_recent_limit(void);
/*==========================*/
/* out: the limit; zero if could not determine it */
/************************************************************************
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
void
buf_LRU_insert_zip_clean(
/*=====================*/
buf_page_t* bpage); /* in: pointer to the block in question */
/**********************************************************************
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well. If this function
returns BUF_LRU_FREED, it will not temporarily release
buf_pool_mutex. */
UNIV_INTERN
enum buf_lru_free_block_status
buf_LRU_free_block(
/*===============*/
/* out: BUF_LRU_FREED if freed,
BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
buf_page_t* bpage, /* in: block to be freed */
ibool zip, /* in: TRUE if should remove also the
compressed page of an uncompressed page */
ibool* buf_pool_mutex_released);
/* in: pointer to a variable that will
be assigned TRUE if buf_pool_mutex
was temporarily released, or NULL */
/**********************************************************************
Try to free a replaceable block. */
UNIV_INTERN
ibool
buf_LRU_search_and_free_block(
/*==========================*/
/* out: TRUE if found and freed */
ulint n_iterations); /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if
n_iterations < 10, then we search
n_iterations / 10 * buf_pool->curr_size
pages from the end of the LRU list; if
n_iterations < 5, then we will also search
n_iterations / 5 of the unzip_LRU list. */
/**********************************************************************
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL. */
UNIV_INTERN
buf_block_t*
buf_LRU_get_free_only(void);
/*=======================*/
/* out: a free control block, or NULL
if the buf_block->free list is empty */
/**********************************************************************
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, blocks are moved from the end of the
LRU list to the free list. */
UNIV_INTERN
buf_block_t*
buf_LRU_get_free_block(
/*===================*/
/* out: the free control block,
in state BUF_BLOCK_READY_FOR_USE */
ulint zip_size); /* in: compressed page size in bytes,
or 0 if uncompressed tablespace */
/**********************************************************************
Puts a block back to the free list. */
UNIV_INTERN
void
buf_LRU_block_free_non_file_page(
/*=============================*/
buf_block_t* block); /* in: block, must not contain a file page */
/**********************************************************************
Adds a block to the LRU list. */
UNIV_INTERN
void
buf_LRU_add_block(
/*==============*/
buf_page_t* bpage, /* in: control block */
ibool old); /* in: TRUE if should be put to the old
blocks in the LRU list, else put to the
start; if the LRU list is very short, added to
the start regardless of this parameter */
/**********************************************************************
Adds a block to the LRU list of decompressed zip pages. */
UNIV_INTERN
void
buf_unzip_LRU_add_block(
/*====================*/
buf_block_t* block, /* in: control block */
ibool old); /* in: TRUE if should be put to the end
of the list, else put to the start */
/**********************************************************************
Moves a block to the start of the LRU list. */
UNIV_INTERN
void
buf_LRU_make_block_young(
/*=====================*/
buf_page_t* bpage); /* in: control block */
/**********************************************************************
Moves a block to the end of the LRU list. */
UNIV_INTERN
void
buf_LRU_make_block_old(
/*===================*/
buf_page_t* bpage); /* in: control block */
/************************************************************************
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
UNIV_INTERN
void
buf_LRU_stat_update(void);
/*=====================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**************************************************************************
Validates the LRU list. */
UNIV_INTERN
ibool
buf_LRU_validate(void);
/*==================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**************************************************************************
Prints the LRU list. */
UNIV_INTERN
void
buf_LRU_print(void);
/*===============*/
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
/**********************************************************************
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics we decide
if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
/** Statistics for selecting the LRU list for eviction. */
struct buf_LRU_stat_struct
{
ulint io; /**< Counter of buffer pool I/O operations. */
ulint unzip; /**< Counter of page_zip_decompress operations. */
};
typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
/** Current operation counters. Not protected by any mutex.
Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
extern buf_LRU_stat_t buf_LRU_stat_sum;
/************************************************************************
Increments the I/O counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
/************************************************************************
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
#ifndef UNIV_NONINL
#include "buf0lru.ic"
#endif
#endif

8
include/buf0lru.ic Normal file
View File

@@ -0,0 +1,8 @@
/******************************************************
The database buffer replacement algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/

120
include/buf0rea.h Normal file
View File

@@ -0,0 +1,120 @@
/******************************************************
The database buffer read
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0rea_h
#define buf0rea_h
#include "univ.i"
#include "buf0types.h"
/************************************************************************
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. Does a random read-ahead if it seems
sensible. */
UNIV_INTERN
ulint
buf_read_page(
/*==========*/
/* out: number of page read requests issued: this can
be > 1 if read-ahead occurred */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint offset);/* in: page number */
/************************************************************************
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io. */
UNIV_INTERN
ulint
buf_read_ahead_linear(
/*==================*/
/* out: number of page read requests issued */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint offset);/* in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
/************************************************************************
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
ibool sync, /* in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
const ulint* space_ids, /* in: array of space ids */
const ib_int64_t* space_versions,/* in: the spaces must have
this version number
(timestamp), otherwise we
discard the read; we use this
to cancel reads if DISCARD +
IMPORT may have changed the
tablespace size */
const ulint* page_nos, /* in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored); /* in: number of elements
in the arrays */
/************************************************************************
Issues read requests for pages which recovery wants to read in. */
UNIV_INTERN
void
buf_read_recv_pages(
/*================*/
ibool sync, /* in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
ulint space, /* in: space id */
ulint zip_size, /* in: compressed page size in
bytes, or 0 */
const ulint* page_nos, /* in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored); /* in: number of page numbers
in the array */
/* The size in pages of the area which the read-ahead algorithms read if
invoked */
#define BUF_READ_AHEAD_AREA \
ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
/* Modes used in read-ahead */
#define BUF_READ_IBUF_PAGES_ONLY 131
#define BUF_READ_ANY_PAGE 132
#endif

54
include/buf0types.h Normal file
View File

@@ -0,0 +1,54 @@
/******************************************************
The database buffer pool global types for the directory
(c) 1995 Innobase Oy
Created 11/17/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0types_h
#define buf0types_h
typedef struct buf_page_struct buf_page_t;
typedef struct buf_block_struct buf_block_t;
typedef struct buf_chunk_struct buf_chunk_t;
typedef struct buf_pool_struct buf_pool_t;
/* The 'type' used of a buffer frame */
typedef byte buf_frame_t;
/* Flags for flush types */
enum buf_flush {
BUF_FLUSH_LRU = 0,
BUF_FLUSH_SINGLE_PAGE,
BUF_FLUSH_LIST,
BUF_FLUSH_N_TYPES /* index of last element + 1 */
};
/* Flags for io_fix types */
enum buf_io_fix {
BUF_IO_NONE = 0, /**< no pending I/O */
BUF_IO_READ, /**< read pending */
BUF_IO_WRITE /**< write pending */
};
/* Parameters of binary buddy system for compressed pages (buf0buddy.h) */
#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
# define BUF_BUDDY_LOW_SHIFT 6
#else /* 64-bit system */
# define BUF_BUDDY_LOW_SHIFT 7
#endif
#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
/* minimum block size in the binary
buddy system; must be at least
sizeof(buf_page_t) */
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
/* number of buddy sizes */
/* twice the maximum block size of the buddy system;
the underlying memory is aligned by this amount:
this must be equal to UNIV_PAGE_SIZE */
#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
#endif

464
include/data0data.h Normal file
View File

@@ -0,0 +1,464 @@
/************************************************************************
SQL data field and tuple
(c) 1994-1996 Innobase Oy
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#ifndef data0data_h
#define data0data_h
#include "univ.i"
#include "data0types.h"
#include "data0type.h"
#include "mem0mem.h"
#include "dict0types.h"
typedef struct big_rec_struct big_rec_t;
#ifdef UNIV_DEBUG
/*************************************************************************
Gets pointer to the type struct of SQL data field. */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
/* out: pointer to the type struct */
const dfield_t* field); /* in: SQL data field */
/*************************************************************************
Gets pointer to the data in a field. */
UNIV_INLINE
void*
dfield_get_data(
/*============*/
/* out: pointer to data */
const dfield_t* field); /* in: field */
#else /* UNIV_DEBUG */
# define dfield_get_type(field) (&(field)->type)
# define dfield_get_data(field) ((field)->data)
#endif /* UNIV_DEBUG */
/*************************************************************************
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
dfield_t* field, /* in: SQL data field */
dtype_t* type); /* in: pointer to data type struct */
/*************************************************************************
Gets length of field data. */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
/* out: length of data; UNIV_SQL_NULL if
SQL null data */
const dfield_t* field); /* in: field */
/*************************************************************************
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
dfield_t* field, /* in: field */
ulint len); /* in: length or UNIV_SQL_NULL */
/*************************************************************************
Determines if a field is SQL NULL */
UNIV_INLINE
ulint
dfield_is_null(
/*===========*/
/* out: nonzero if SQL null data */
const dfield_t* field); /* in: field */
/*************************************************************************
Determines if a field is externally stored */
UNIV_INLINE
ulint
dfield_is_ext(
/*==========*/
/* out: nonzero if externally stored */
const dfield_t* field); /* in: field */
/*************************************************************************
Sets the "external storage" flag */
UNIV_INLINE
void
dfield_set_ext(
/*===========*/
dfield_t* field); /* in/out: field */
/*************************************************************************
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
dfield_t* field, /* in: field */
const void* data, /* in: data */
ulint len); /* in: length or UNIV_SQL_NULL */
/*************************************************************************
Sets a data field to SQL NULL. */
UNIV_INLINE
void
dfield_set_null(
/*============*/
dfield_t* field); /* in/out: field */
/**************************************************************************
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
byte* data, /* in: pointer to a buffer of size len */
ulint len); /* in: SQL null size in bytes */
/*************************************************************************
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
dfield_t* field1, /* out: field to copy to */
const dfield_t* field2);/* in: field to copy from */
/*************************************************************************
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
dfield_t* field1, /* out: field to copy to */
const dfield_t* field2);/* in: field to copy from */
/*************************************************************************
Copies the data pointed to by a data field. */
UNIV_INLINE
void
dfield_dup(
/*=======*/
dfield_t* field, /* in/out: data field */
mem_heap_t* heap); /* in: memory heap where allocated */
/*************************************************************************
Tests if data length and content is equal for two dfields. */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
/* out: TRUE if equal */
const dfield_t* field1, /* in: field */
const dfield_t* field2);/* in: field */
/*************************************************************************
Tests if dfield data length and content is equal to the given. */
UNIV_INTERN
ibool
dfield_data_is_binary_equal(
/*========================*/
/* out: TRUE if equal */
const dfield_t* field, /* in: field */
ulint len, /* in: data length or UNIV_SQL_NULL */
const byte* data); /* in: data */
/*************************************************************************
Gets number of fields in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
/* out: number of fields */
const dtuple_t* tuple); /* in: tuple */
#ifdef UNIV_DEBUG
/*************************************************************************
Gets nth field of a tuple. */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
/* out: nth field */
const dtuple_t* tuple, /* in: tuple */
ulint n); /* in: index of field */
#else /* UNIV_DEBUG */
# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
#endif /* UNIV_DEBUG */
/*************************************************************************
Gets info bits in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
/* out: info bits */
const dtuple_t* tuple); /* in: tuple */
/*************************************************************************
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /* in: tuple */
ulint info_bits); /* in: info bits */
/*************************************************************************
Gets number of fields used in record comparisons. */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
/* out: number of fields used in comparisons
in rem0cmp.* */
const dtuple_t* tuple); /* in: tuple */
/*************************************************************************
Gets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields_cmp); /* in: number of fields used in
comparisons in rem0cmp.* */
/**************************************************************
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields. */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
/* out, own: created tuple */
mem_heap_t* heap, /* in: memory heap where the tuple
is created */
ulint n_fields); /* in: number of fields */
/**************************************************************
Wrap data fields in a tuple. The default value for number
of fields used in record comparisons for this tuple is n_fields. */
UNIV_INLINE
const dtuple_t*
dtuple_from_fields(
/*===============*/
/* out: data tuple */
dtuple_t* tuple, /* in: storage for data tuple */
const dfield_t* fields, /* in: fields */
ulint n_fields); /* in: number of fields */
/*************************************************************************
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields); /* in: number of fields */
/*************************************************************************
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field. */
UNIV_INLINE
dtuple_t*
dtuple_copy(
/*========*/
/* out, own: copy of tuple */
const dtuple_t* tuple, /* in: tuple to copy from */
mem_heap_t* heap); /* in: memory heap
where the tuple is created */
/**************************************************************
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
/* out: sum of data lens */
const dtuple_t* tuple); /* in: typed data tuple */
/*************************************************************************
Computes the number of externally stored fields in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_n_ext(
/*=============*/
/* out: number of fields */
const dtuple_t* tuple); /* in: tuple */
/****************************************************************
Compare two data tuples, respecting the collation of character fields. */
UNIV_INTERN
int
dtuple_coll_cmp(
/*============*/
/* out: 1, 0 , -1 if tuple1 is greater, equal,
less, respectively, than tuple2 */
const dtuple_t* tuple1, /* in: tuple 1 */
const dtuple_t* tuple2);/* in: tuple 2 */
/****************************************************************
Folds a prefix given as the number of fields of a tuple. */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
/* out: the folded value */
const dtuple_t* tuple, /* in: the tuple */
ulint n_fields,/* in: number of complete fields to fold */
ulint n_bytes,/* in: number of bytes to fold in an
incomplete last field */
dulint tree_id)/* in: index tree id */
__attribute__((pure));
/***********************************************************************
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /* in: data tuple */
ulint n); /* in: number of fields to set */
/**************************************************************************
Checks if a dtuple contains an SQL null value. */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
/* out: TRUE if some field is SQL null */
const dtuple_t* tuple); /* in: dtuple */
/**************************************************************
Checks that a data field is typed. Asserts an error if not. */
UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
/* out: TRUE if ok */
const dfield_t* field); /* in: data field */
/**************************************************************
Checks that a data tuple is typed. Asserts an error if not. */
UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
/* out: TRUE if ok */
const dtuple_t* tuple); /* in: tuple */
/**************************************************************
Checks that a data tuple is typed. */
UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
/* out: TRUE if ok */
const dtuple_t* tuple); /* in: tuple */
#ifdef UNIV_DEBUG
/**************************************************************
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set. */
UNIV_INTERN
ibool
dtuple_validate(
/*============*/
/* out: TRUE if ok */
const dtuple_t* tuple); /* in: tuple */
#endif /* UNIV_DEBUG */
/*****************************************************************
Pretty prints a dfield value according to its data type. */
UNIV_INTERN
void
dfield_print(
/*=========*/
const dfield_t* dfield);/* in: dfield */
/*****************************************************************
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
const dfield_t* dfield); /* in: dfield */
/**************************************************************
The following function prints the contents of a tuple. */
UNIV_INTERN
void
dtuple_print(
/*=========*/
FILE* f, /* in: output stream */
const dtuple_t* tuple); /* in: tuple */
/******************************************************************
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index. */
UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
/* out, own: created big record vector,
NULL if we are not able to shorten
the entry enough, i.e., if there are
too many fixed-length or short fields
in entry or the index is clustered */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in/out: index entry */
ulint* n_ext); /* in/out: number of
externally stored columns */
/******************************************************************
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: entry whose data was put to vector */
big_rec_t* vector);/* in, own: big rec vector; it is
freed in this function */
/******************************************************************
Frees the memory in a big rec vector. */
UNIV_INLINE
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector); /* in, own: big rec vector; it is
freed in this function */
/*######################################################################*/
/* Structure for an SQL data field */
struct dfield_struct{
void* data; /* pointer to data */
unsigned ext:1; /* TRUE=externally stored, FALSE=local */
unsigned len:32; /* data length; UNIV_SQL_NULL if SQL null */
dtype_t type; /* type of data */
};
struct dtuple_struct {
ulint info_bits; /* info bits of an index record:
the default is 0; this field is used
if an index record is built from
a data tuple */
ulint n_fields; /* number of fields in dtuple */
ulint n_fields_cmp; /* number of fields which should
be used in comparison services
of rem0cmp.*; the index search
is performed by comparing only these
fields, others are ignored; the
default value in dtuple creation is
the same value as n_fields */
dfield_t* fields; /* fields */
UT_LIST_NODE_T(dtuple_t) tuple_list;
/* data tuples can be linked into a
list using this field */
#ifdef UNIV_DEBUG
ulint magic_n;
# define DATA_TUPLE_MAGIC_N 65478679
#endif /* UNIV_DEBUG */
};
/* A slot for a field in a big rec vector */
typedef struct big_rec_field_struct big_rec_field_t;
struct big_rec_field_struct {
ulint field_no; /* field number in record */
ulint len; /* stored data len */
const void* data; /* stored data */
};
/* Storage format for overflow data in a big record, that is, a record
which needs external storage of data fields */
struct big_rec_struct {
mem_heap_t* heap; /* memory heap from which allocated */
ulint n_fields; /* number of stored fields */
big_rec_field_t* fields; /* stored fields */
};
#ifndef UNIV_NONINL
#include "data0data.ic"
#endif
#endif

592
include/data0data.ic Normal file
View File

@@ -0,0 +1,592 @@
/************************************************************************
SQL data field and tuple
(c) 1994-1996 Innobase Oy
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "mem0mem.h"
#include "ut0rnd.h"
#ifdef UNIV_DEBUG
extern byte data_error;
/*************************************************************************
Gets pointer to the type struct of SQL data field. */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
/* out: pointer to the type struct */
const dfield_t* field) /* in: SQL data field */
{
ut_ad(field);
return((dtype_t*) &(field->type));
}
#endif /* UNIV_DEBUG */
/*************************************************************************
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
dfield_t* field, /* in: SQL data field */
dtype_t* type) /* in: pointer to data type struct */
{
ut_ad(field && type);
field->type = *type;
}
#ifdef UNIV_DEBUG
/*************************************************************************
Gets pointer to the data in a field. */
UNIV_INLINE
void*
dfield_get_data(
/*============*/
/* out: pointer to data */
const dfield_t* field) /* in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
return((void*) field->data);
}
#endif /* UNIV_DEBUG */
/*************************************************************************
Gets length of field data. */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
/* out: length of data; UNIV_SQL_NULL if
SQL null data */
const dfield_t* field) /* in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
return(field->len);
}
/*************************************************************************
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
dfield_t* field, /* in: field */
ulint len) /* in: length or UNIV_SQL_NULL */
{
ut_ad(field);
#ifdef UNIV_VALGRIND_DEBUG
if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
#endif /* UNIV_VALGRIND_DEBUG */
field->ext = 0;
field->len = len;
}
/*************************************************************************
Determines if a field is SQL NULL */
UNIV_INLINE
ulint
dfield_is_null(
/*===========*/
/* out: nonzero if SQL null data */
const dfield_t* field) /* in: field */
{
ut_ad(field);
return(field->len == UNIV_SQL_NULL);
}
/*************************************************************************
Determines if a field is externally stored */
UNIV_INLINE
ulint
dfield_is_ext(
/*==========*/
/* out: nonzero if externally stored */
const dfield_t* field) /* in: field */
{
ut_ad(field);
return(UNIV_UNLIKELY(field->ext));
}
/*************************************************************************
Sets the "external storage" flag */
UNIV_INLINE
void
dfield_set_ext(
/*===========*/
dfield_t* field) /* in/out: field */
{
ut_ad(field);
field->ext = 1;
}
/*************************************************************************
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
dfield_t* field, /* in: field */
const void* data, /* in: data */
ulint len) /* in: length or UNIV_SQL_NULL */
{
ut_ad(field);
#ifdef UNIV_VALGRIND_DEBUG
if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
#endif /* UNIV_VALGRIND_DEBUG */
field->data = (void*) data;
field->ext = 0;
field->len = len;
}
/*************************************************************************
Sets a data field to SQL NULL. */
UNIV_INLINE
void
dfield_set_null(
/*============*/
dfield_t* field) /* in/out: field */
{
dfield_set_data(field, NULL, UNIV_SQL_NULL);
}
/*************************************************************************
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
dfield_t* field1, /* out: field to copy to */
const dfield_t* field2) /* in: field to copy from */
{
ut_ad(field1 && field2);
field1->data = field2->data;
field1->len = field2->len;
field1->ext = field2->ext;
}
/*************************************************************************
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
dfield_t* field1, /* out: field to copy to */
const dfield_t* field2) /* in: field to copy from */
{
*field1 = *field2;
}
/*************************************************************************
Copies the data pointed to by a data field. */
UNIV_INLINE
void
dfield_dup(
/*=======*/
dfield_t* field, /* in/out: data field */
mem_heap_t* heap) /* in: memory heap where allocated */
{
if (!dfield_is_null(field)) {
UNIV_MEM_ASSERT_RW(field->data, field->len);
field->data = mem_heap_dup(heap, field->data, field->len);
}
}
/*************************************************************************
Tests if data length and content is equal for two dfields. */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
/* out: TRUE if equal */
const dfield_t* field1, /* in: field */
const dfield_t* field2) /* in: field */
{
ulint len;
len = field1->len;
return(len == field2->len
&& (len == UNIV_SQL_NULL
|| !memcmp(field1->data, field2->data, len)));
}
/*************************************************************************
Gets info bits in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
/* out: info bits */
const dtuple_t* tuple) /* in: tuple */
{
ut_ad(tuple);
return(tuple->info_bits);
}
/*************************************************************************
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /* in: tuple */
ulint info_bits) /* in: info bits */
{
ut_ad(tuple);
tuple->info_bits = info_bits;
}
/*************************************************************************
Gets number of fields used in record comparisons. */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
/* out: number of fields used in comparisons
in rem0cmp.* */
const dtuple_t* tuple) /* in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields_cmp);
}
/*************************************************************************
Sets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields_cmp) /* in: number of fields used in
comparisons in rem0cmp.* */
{
ut_ad(tuple);
ut_ad(n_fields_cmp <= tuple->n_fields);
tuple->n_fields_cmp = n_fields_cmp;
}
/*************************************************************************
Gets number of fields in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
/* out: number of fields */
const dtuple_t* tuple) /* in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields);
}
#ifdef UNIV_DEBUG
/*************************************************************************
Gets nth field of a tuple. */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
/* out: nth field */
const dtuple_t* tuple, /* in: tuple */
ulint n) /* in: index of field */
{
ut_ad(tuple);
ut_ad(n < tuple->n_fields);
return((dfield_t*) tuple->fields + n);
}
#endif /* UNIV_DEBUG */
/**************************************************************
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields. */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
/* out, own: created tuple */
mem_heap_t* heap, /* in: memory heap where the tuple
is created */
ulint n_fields) /* in: number of fields */
{
dtuple_t* tuple;
ut_ad(heap);
tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
+ n_fields * sizeof(dfield_t));
tuple->info_bits = 0;
tuple->n_fields = n_fields;
tuple->n_fields_cmp = n_fields;
tuple->fields = (dfield_t*) &tuple[1];
#ifdef UNIV_DEBUG
tuple->magic_n = DATA_TUPLE_MAGIC_N;
{ /* In the debug version, initialize fields to an error value */
ulint i;
for (i = 0; i < n_fields; i++) {
dfield_t* field;
field = dtuple_get_nth_field(tuple, i);
dfield_set_len(field, UNIV_SQL_NULL);
field->data = &data_error;
dfield_get_type(field)->mtype = DATA_ERROR;
}
}
UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
#endif
return(tuple);
}
/**************************************************************
Wrap data fields in a tuple. The default value for number
of fields used in record comparisons for this tuple is n_fields. */
UNIV_INLINE
const dtuple_t*
dtuple_from_fields(
/*===============*/
/* out: data tuple */
dtuple_t* tuple, /* in: storage for data tuple */
const dfield_t* fields, /* in: fields */
ulint n_fields) /* in: number of fields */
{
tuple->info_bits = 0;
tuple->n_fields = tuple->n_fields_cmp = n_fields;
tuple->fields = (dfield_t*) fields;
ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
return(tuple);
}
/*************************************************************************
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field. */
UNIV_INLINE
dtuple_t*
dtuple_copy(
/*========*/
/* out, own: copy of tuple */
const dtuple_t* tuple, /* in: tuple to copy from */
mem_heap_t* heap) /* in: memory heap
where the tuple is created */
{
ulint n_fields = dtuple_get_n_fields(tuple);
dtuple_t* new_tuple = dtuple_create(heap, n_fields);
ulint i;
for (i = 0; i < n_fields; i++) {
dfield_copy(dtuple_get_nth_field(new_tuple, i),
dtuple_get_nth_field(tuple, i));
}
return(new_tuple);
}
/**************************************************************
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. Neither
is possible space in externally stored parts of the field. */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
/* out: sum of data lengths */
const dtuple_t* tuple) /* in: typed data tuple */
{
const dfield_t* field;
ulint n_fields;
ulint len;
ulint i;
ulint sum = 0;
ut_ad(tuple);
ut_ad(dtuple_check_typed(tuple));
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
n_fields = tuple->n_fields;
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
len = dfield_get_len(field);
if (len == UNIV_SQL_NULL) {
len = dtype_get_sql_null_size(dfield_get_type(field));
}
sum += len;
}
return(sum);
}
/*************************************************************************
Computes the number of externally stored fields in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_n_ext(
/*=============*/
/* out: number of externally stored fields */
const dtuple_t* tuple) /* in: tuple */
{
ulint n_ext = 0;
ulint n_fields = tuple->n_fields;
ulint i;
ut_ad(tuple);
ut_ad(dtuple_check_typed(tuple));
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
for (i = 0; i < n_fields; i++) {
n_ext += dtuple_get_nth_field(tuple, i)->ext;
}
return(n_ext);
}
/***********************************************************************
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /* in: data tuple */
ulint n) /* in: number of fields to set */
{
dtype_t* dfield_type;
ulint i;
for (i = 0; i < n; i++) {
dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
dtype_set(dfield_type, DATA_BINARY, 0, 0);
}
}
/****************************************************************
Folds a prefix given as the number of fields of a tuple. */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
/* out: the folded value */
const dtuple_t* tuple, /* in: the tuple */
ulint n_fields,/* in: number of complete fields to fold */
ulint n_bytes,/* in: number of bytes to fold in an
incomplete last field */
dulint tree_id)/* in: index tree id */
{
const dfield_t* field;
ulint i;
const byte* data;
ulint len;
ulint fold;
ut_ad(tuple);
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(tuple));
fold = ut_fold_dulint(tree_id);
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
data = (const byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
fold = ut_fold_ulint_pair(fold,
ut_fold_binary(data, len));
}
}
if (n_bytes > 0) {
field = dtuple_get_nth_field(tuple, i);
data = (const byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
if (len > n_bytes) {
len = n_bytes;
}
fold = ut_fold_ulint_pair(fold,
ut_fold_binary(data, len));
}
}
return(fold);
}
/**************************************************************************
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
byte* data, /* in: pointer to a buffer of size len */
ulint len) /* in: SQL null size in bytes */
{
memset(data, 0, len);
}
/**************************************************************************
Checks if a dtuple contains an SQL null value. */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
/* out: TRUE if some field is SQL null */
const dtuple_t* tuple) /* in: dtuple */
{
ulint n;
ulint i;
n = dtuple_get_n_fields(tuple);
for (i = 0; i < n; i++) {
if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
return(TRUE);
}
}
return(FALSE);
}
/******************************************************************
Frees the memory in a big rec vector. */
UNIV_INLINE
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector) /* in, own: big rec vector; it is
freed in this function */
{
mem_heap_free(vector->heap);
}

455
include/data0type.h Normal file
View File

@@ -0,0 +1,455 @@
/******************************************************
Data types
(c) 1996 Innobase Oy
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#ifndef data0type_h
#define data0type_h
#include "univ.i"
extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
/* SQL data type struct */
typedef struct dtype_struct dtype_t;
/*-------------------------------------------*/
/* The 'MAIN TYPE' of a column */
#define DATA_VARCHAR 1 /* character varying of the
latin1_swedish_ci charset-collation; note
that the MySQL format for this, DATA_BINARY,
DATA_VARMYSQL, is also affected by whether the
'precise type' contains
DATA_MYSQL_TRUE_VARCHAR */
#define DATA_CHAR 2 /* fixed length character of the
latin1_swedish_ci charset-collation */
#define DATA_FIXBINARY 3 /* binary string of fixed length */
#define DATA_BINARY 4 /* binary string */
#define DATA_BLOB 5 /* binary large object, or a TEXT type;
if prtype & DATA_BINARY_TYPE == 0, then this is
actually a TEXT column (or a BLOB created
with < 4.0.14; since column prefix indexes
came only in 4.0.14, the missing flag in BLOBs
created before that does not cause any harm) */
#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
#define DATA_SYS 8 /* system column */
/* Data types >= DATA_FLOAT must be compared using the whole field, not as
binary strings */
#define DATA_FLOAT 9
#define DATA_DOUBLE 10
#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
#define DATA_VARMYSQL 12 /* any charset varying length char */
#define DATA_MYSQL 13 /* any charset fixed length char */
/* NOTE that 4.1.1 used DATA_MYSQL and
DATA_VARMYSQL for all character sets, and the
charset-collation for tables created with it
can also be latin1_swedish_ci */
#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
requires the values are <= 63 */
/*-------------------------------------------*/
/* The 'PRECISE TYPE' of a column */
/*
Tables created by a MySQL user have the following convention:
- In the least significant byte in the precise type we store the MySQL type
code (not applicable for system columns).
- In the second least significant byte we OR flags DATA_NOT_NULL,
DATA_UNSIGNED, DATA_BINARY_TYPE.
- In the third least significant byte of the precise type of string types we
store the MySQL charset-collation code. In DATA_BLOB columns created with
< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
problem, though.
Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
precise type, since the charset was always the default charset of the MySQL
installation. If the stored charset code is 0 in the system table SYS_COLUMNS
of InnoDB, that means that the default charset of this MySQL installation
should be used.
When loading a table definition from the system tables to the InnoDB data
dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
if the stored charset-collation is 0, and if that is the case and the type is
a non-binary string, replace that 0 by the default charset-collation code of
this MySQL installation. In short, in old tables, the charset-collation code
in the system tables on disk can be 0, but in in-memory data structures
(dtype_t), the charset-collation code is always != 0 for non-binary string
types.
In new tables, in binary string types, the charset-collation code is the
MySQL code for the 'binary charset', that is, != 0.
For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
InnoDB performs all comparisons internally, without resorting to the MySQL
comparison functions. This is to save CPU time.
InnoDB's own internal system tables have different precise types for their
columns, and for them the precise type is usually not used at all.
*/
#define DATA_ENGLISH 4 /* English language character string: this
is a relic from pre-MySQL time and only used
for InnoDB's own system tables */
#define DATA_ERROR 111 /* another relic from pre-MySQL time */
#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
type from the precise type */
#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
format true VARCHAR */
/* Precise data types for system columns and the length of those columns;
NOTE: the values must run from 0 up in the order given! All codes must
be less than 256 */
#define DATA_ROW_ID 0 /* row id: a dulint */
#define DATA_ROW_ID_LEN 6 /* stored length for row id */
#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
#define DATA_TRX_ID_LEN 6
#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
#define DATA_ROLL_PTR_LEN 7
#define DATA_N_SYS_COLS 3 /* number of system columns defined above */
#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
/* Flags ORed to the precise data type */
#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
the column is declared as NOT NULL */
#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
we have an unsigned integer type */
#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
string, this is ORed to the precise type:
this only holds for tables created with
>= MySQL-4.0.14 */
/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1.
In earlier versions this was set for some
BLOB columns.
*/
#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
type when the column is true VARCHAR where
MySQL uses 2 bytes to store the data len;
for shorter VARCHARs MySQL uses only 1 byte */
/*-------------------------------------------*/
/* This many bytes we need to store the type information affecting the
alphabetical order for a single field and decide the storage size of an
SQL null*/
#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
store the charset-collation number; one byte is left unused, though */
#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
/*************************************************************************
Gets the MySQL type code from a dtype. */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
/* out: MySQL type code; this is NOT an InnoDB
type code! */
const dtype_t* type); /* in: type struct */
/*************************************************************************
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy. */
UNIV_INTERN
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
/* out: length of the prefix,
in bytes */
ulint prtype, /* in: precise type */
ulint mbminlen, /* in: minimum length of a
multi-byte character */
ulint mbmaxlen, /* in: maximum length of a
multi-byte character */
ulint prefix_len, /* in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
ulint data_len, /* in: length of str (in bytes) */
const char* str); /* in: the string whose prefix
length is being determined */
/*************************************************************************
Checks if a data main type is a string type. Also a BLOB is considered a
string type. */
UNIV_INTERN
ibool
dtype_is_string_type(
/*=================*/
/* out: TRUE if string type */
ulint mtype); /* in: InnoDB main data type code: DATA_CHAR, ... */
/*************************************************************************
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE. */
UNIV_INTERN
ibool
dtype_is_binary_string_type(
/*========================*/
/* out: TRUE if binary string type */
ulint mtype, /* in: main data type */
ulint prtype);/* in: precise type */
/*************************************************************************
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE. */
UNIV_INTERN
ibool
dtype_is_non_binary_string_type(
/*============================*/
/* out: TRUE if non-binary string type */
ulint mtype, /* in: main data type */
ulint prtype);/* in: precise type */
/*************************************************************************
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
dtype_t* type, /* in: type struct to init */
ulint mtype, /* in: main data type */
ulint prtype, /* in: precise type */
ulint len); /* in: precision of type */
/*************************************************************************
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
dtype_t* type1, /* in: type struct to copy to */
const dtype_t* type2); /* in: type struct to copy from */
/*************************************************************************
Gets the SQL main data type. */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
const dtype_t* type);
/*************************************************************************
Gets the precise data type. */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
const dtype_t* type);
/*************************************************************************
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_get_mblen(
/*============*/
ulint mtype, /* in: main type */
ulint prtype, /* in: precise type (and collation) */
ulint* mbminlen, /* out: minimum length of a
multi-byte character */
ulint* mbmaxlen); /* out: maximum length of a
multi-byte character */
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
ulint prtype);/* in: precise data type */
/*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */
UNIV_INTERN
ulint
dtype_form_prtype(
/*==============*/
ulint old_prtype, /* in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
ulint charset_coll); /* in: MySQL charset-collation code */
/*************************************************************************
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later. */
UNIV_INLINE
ibool
dtype_is_utf8(
/*==========*/
/* out: TRUE if a subset of UTF-8 */
ulint prtype);/* in: precise data type */
/*************************************************************************
Gets the type length. */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
const dtype_t* type);
/*************************************************************************
Gets the minimum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
/* out: minimum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type); /* in: type */
/*************************************************************************
Gets the maximum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
/* out: maximum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type); /* in: type */
/*************************************************************************
Gets the padding character code for the type. */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
/* out: padding character code, or
ULINT_UNDEFINED if no padding specified */
ulint mtype, /* in: main type */
ulint prtype); /* in: precise type */
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
/*=====================*/
/* out: fixed size, or 0 */
ulint mtype, /* in: main type */
ulint prtype, /* in: precise type */
ulint len, /* in: length */
ulint mbminlen, /* in: minimum length of a multibyte char */
ulint mbmaxlen); /* in: maximum length of a multibyte char */
/***************************************************************************
Returns the minimum size of a data type. */
UNIV_INLINE
ulint
dtype_get_min_size_low(
/*===================*/
/* out: minimum size */
ulint mtype, /* in: main type */
ulint prtype, /* in: precise type */
ulint len, /* in: length */
ulint mbminlen, /* in: minimum length of a multibyte char */
ulint mbmaxlen); /* in: maximum length of a multibyte char */
/***************************************************************************
Returns the maximum size of a data type. Note: types in system tables may be
incomplete and return incorrect information. */
UNIV_INLINE
ulint
dtype_get_max_size_low(
/*===================*/
/* out: maximum size */
ulint mtype, /* in: main type */
ulint len); /* in: length */
/***************************************************************************
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
For fixed length types it is the fixed length of the type, otherwise 0. */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
/* out: SQL null storage size
in ROW_FORMAT=REDUNDANT */
const dtype_t* type); /* in: type */
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
const byte* buf); /* in: buffer for the stored order info */
/**************************************************************************
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /* in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
const dtype_t* type, /* in: type struct */
ulint prefix_len);/* in: prefix length to
replace type->len, or 0 */
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /* in: type struct */
const byte* buf); /* in: buffer for stored type order info */
/*************************************************************************
Validates a data type structure. */
UNIV_INTERN
ibool
dtype_validate(
/*===========*/
/* out: TRUE if ok */
const dtype_t* type); /* in: type struct to validate */
/*************************************************************************
Prints a data type structure. */
UNIV_INTERN
void
dtype_print(
/*========*/
const dtype_t* type); /* in: type */
/* Structure for an SQL data type.
If you add fields to this structure, be sure to initialize them everywhere.
This structure is initialized in the following functions:
dtype_set()
dtype_read_for_order_and_null_size()
dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
struct dtype_struct{
unsigned mtype:8; /* main data type */
unsigned prtype:24; /* precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
binary string, whether this is
a true VARCHAR where MySQL
uses 2 bytes to store the length */
/* the remaining fields do not affect alphabetical ordering: */
unsigned len:16; /* length; for MySQL data this
is field->pack_length(),
except that for a >= 5.0.3
type true VARCHAR this is the
maximum byte length of the
string data (in addition to
the string, MySQL uses 1 or 2
bytes to store the string length) */
unsigned mbminlen:2; /* minimum length of a
character, in bytes */
unsigned mbmaxlen:3; /* maximum length of a
character, in bytes */
};
#ifndef UNIV_NONINL
#include "data0type.ic"
#endif
#endif

571
include/data0type.ic Normal file
View File

@@ -0,0 +1,571 @@
/******************************************************
Data types
(c) 1996 Innobase Oy
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
#include "ha_prototypes.h"
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
ulint prtype) /* in: precise data type */
{
return((prtype >> 16) & 0xFFUL);
}
/*************************************************************************
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later. */
UNIV_INLINE
ibool
dtype_is_utf8(
/*==========*/
/* out: TRUE if a subset of UTF-8 */
ulint prtype) /* in: precise data type */
{
/* These codes have been copied from strings/ctype-extra.c
and strings/ctype-utf8.c. */
switch (dtype_get_charset_coll(prtype)) {
case 11: /* ascii_general_ci */
case 65: /* ascii_bin */
case 33: /* utf8_general_ci */
case 83: /* utf8_bin */
case 254: /* utf8_general_cs */
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Gets the MySQL type code from a dtype. */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
/* out: MySQL type code; this is NOT an InnoDB
type code! */
const dtype_t* type) /* in: type struct */
{
return(type->prtype & 0xFFUL);
}
/*************************************************************************
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_get_mblen(
/*============*/
ulint mtype, /* in: main type */
ulint prtype, /* in: precise type (and collation) */
ulint* mbminlen, /* out: minimum length of a
multi-byte character */
ulint* mbmaxlen) /* out: maximum length of a
multi-byte character */
{
if (dtype_is_string_type(mtype)) {
#ifndef UNIV_HOTBACKUP
innobase_get_cset_width(dtype_get_charset_coll(prtype),
mbminlen, mbmaxlen);
ut_ad(*mbminlen <= *mbmaxlen);
ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
#else /* !UNIV_HOTBACKUP */
ut_a(mtype <= DATA_BINARY);
*mbminlen = *mbmaxlen = 1;
#endif /* !UNIV_HOTBACKUP */
} else {
*mbminlen = *mbmaxlen = 0;
}
}
/*************************************************************************
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_set_mblen(
/*============*/
dtype_t* type) /* in/out: type */
{
ulint mbminlen;
ulint mbmaxlen;
dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
type->mbminlen = mbminlen;
type->mbmaxlen = mbmaxlen;
ut_ad(dtype_validate(type));
}
/*************************************************************************
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
dtype_t* type, /* in: type struct to init */
ulint mtype, /* in: main data type */
ulint prtype, /* in: precise type */
ulint len) /* in: precision of type */
{
ut_ad(type);
ut_ad(mtype <= DATA_MTYPE_MAX);
type->mtype = mtype;
type->prtype = prtype;
type->len = len;
dtype_set_mblen(type);
}
/*************************************************************************
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
dtype_t* type1, /* in: type struct to copy to */
const dtype_t* type2) /* in: type struct to copy from */
{
*type1 = *type2;
ut_ad(dtype_validate(type1));
}
/*************************************************************************
Gets the SQL main data type. */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
const dtype_t* type)
{
ut_ad(type);
return(type->mtype);
}
/*************************************************************************
Gets the precise data type. */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
const dtype_t* type)
{
ut_ad(type);
return(type->prtype);
}
/*************************************************************************
Gets the type length. */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
const dtype_t* type)
{
ut_ad(type);
return(type->len);
}
/*************************************************************************
Gets the minimum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
/* out: minimum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type) /* in: type */
{
ut_ad(type);
return(type->mbminlen);
}
/*************************************************************************
Gets the maximum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
/* out: maximum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type) /* in: type */
{
ut_ad(type);
return(type->mbmaxlen);
}
/*************************************************************************
Gets the padding character code for a type. */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
/* out: padding character code, or
ULINT_UNDEFINED if no padding specified */
ulint mtype, /* in: main type */
ulint prtype) /* in: precise type */
{
switch (mtype) {
case DATA_FIXBINARY:
case DATA_BINARY:
if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype)
== DATA_MYSQL_BINARY_CHARSET_COLL)) {
/* Starting from 5.0.18, do not pad
VARBINARY or BINARY columns. */
return(ULINT_UNDEFINED);
}
/* Fall through */
case DATA_CHAR:
case DATA_VARCHAR:
case DATA_MYSQL:
case DATA_VARMYSQL:
/* Space is the padding character for all char and binary
strings, and starting from 5.0.3, also for TEXT strings. */
return(0x20);
case DATA_BLOB:
if (!(prtype & DATA_BINARY_TYPE)) {
return(0x20);
}
/* Fall through */
default:
/* No padding specified */
return(ULINT_UNDEFINED);
}
}
/**************************************************************************
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /* in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
const dtype_t* type, /* in: type struct */
ulint prefix_len)/* in: prefix length to
replace type->len, or 0 */
{
#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
#endif
ulint len;
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
buf[0] = buf[0] | 128;
}
/* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
buf[0] = buf[0] | 64;
}
*/
buf[1] = (byte)(type->prtype & 0xFFUL);
len = prefix_len ? prefix_len : type->len;
mach_write_to_2(buf + 2, len & 0xFFFFUL);
ut_ad(dtype_get_charset_coll(type->prtype) < 256);
mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
if (type->prtype & DATA_NOT_NULL) {
buf[4] |= 128;
}
}
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the < 4.1.x
storage format. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
const byte* buf) /* in: buffer for stored type order info */
{
#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
#endif
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype = type->prtype | DATA_BINARY_TYPE;
}
type->len = mach_read_from_2(buf + 2);
type->prtype = dtype_form_prtype(type->prtype,
data_mysql_default_charset_coll);
dtype_set_mblen(type);
}
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
storage format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /* in: type struct */
const byte* buf) /* in: buffer for stored type order info */
{
ulint charset_coll;
#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
#endif
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype |= DATA_BINARY_TYPE;
}
if (buf[4] & 128) {
type->prtype |= DATA_NOT_NULL;
}
type->len = mach_read_from_2(buf + 2);
charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
if (dtype_is_string_type(type->mtype)) {
ut_a(charset_coll < 256);
if (charset_coll == 0) {
/* This insert buffer record was inserted with MySQL
version < 4.1.2, and the charset-collation code was not
explicitly stored to dtype->prtype at that time. It
must be the default charset-collation of this MySQL
installation. */
charset_coll = data_mysql_default_charset_coll;
}
type->prtype = dtype_form_prtype(type->prtype, charset_coll);
}
dtype_set_mblen(type);
}
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
/*=====================*/
/* out: fixed size, or 0 */
ulint mtype, /* in: main type */
ulint prtype, /* in: precise type */
ulint len, /* in: length */
ulint mbminlen, /* in: minimum length of a multibyte char */
ulint mbmaxlen) /* in: maximum length of a multibyte char */
{
switch (mtype) {
case DATA_SYS:
#ifdef UNIV_DEBUG
switch (prtype & DATA_MYSQL_TYPE_MASK) {
case DATA_ROW_ID:
ut_ad(len == DATA_ROW_ID_LEN);
break;
case DATA_TRX_ID:
ut_ad(len == DATA_TRX_ID_LEN);
break;
case DATA_ROLL_PTR:
ut_ad(len == DATA_ROLL_PTR_LEN);
break;
default:
ut_ad(0);
return(0);
}
#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
return(len);
case DATA_MYSQL:
if (prtype & DATA_BINARY_TYPE) {
return(len);
} else {
#ifdef UNIV_HOTBACKUP
if (mbminlen == mbmaxlen) {
return(len);
}
#else /* UNIV_HOTBACKUP */
/* We play it safe here and ask MySQL for
mbminlen and mbmaxlen. Although
mbminlen and mbmaxlen are
initialized if and only if prtype
is (in one of the 3 functions in this file),
it could be that none of these functions
has been called. */
ulint i_mbminlen, i_mbmaxlen;
innobase_get_cset_width(
dtype_get_charset_coll(prtype),
&i_mbminlen, &i_mbmaxlen);
if (UNIV_UNLIKELY(mbminlen != i_mbminlen)
|| UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: "
"mbminlen=%lu, "
"mbmaxlen=%lu, "
"type->mbminlen=%lu, "
"type->mbmaxlen=%lu\n",
(ulong) i_mbminlen,
(ulong) i_mbmaxlen,
(ulong) mbminlen,
(ulong) mbmaxlen);
}
if (mbminlen == mbmaxlen) {
return(len);
}
#endif /* !UNIV_HOTBACKUP */
}
/* fall through for variable-length charsets */
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
case DATA_BLOB:
return(0);
default:
ut_error;
}
return(0);
}
/***************************************************************************
Returns the minimum size of a data type. */
UNIV_INLINE
ulint
dtype_get_min_size_low(
/*===================*/
/* out: minimum size */
ulint mtype, /* in: main type */
ulint prtype, /* in: precise type */
ulint len, /* in: length */
ulint mbminlen, /* in: minimum length of a multibyte char */
ulint mbmaxlen) /* in: maximum length of a multibyte char */
{
switch (mtype) {
case DATA_SYS:
#ifdef UNIV_DEBUG
switch (prtype & DATA_MYSQL_TYPE_MASK) {
case DATA_ROW_ID:
ut_ad(len == DATA_ROW_ID_LEN);
break;
case DATA_TRX_ID:
ut_ad(len == DATA_TRX_ID_LEN);
break;
case DATA_ROLL_PTR:
ut_ad(len == DATA_ROLL_PTR_LEN);
break;
default:
ut_ad(0);
return(0);
}
#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
return(len);
case DATA_MYSQL:
if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) {
return(len);
}
/* this is a variable-length character set */
ut_a(mbminlen > 0);
ut_a(mbmaxlen > mbminlen);
ut_a(len % mbmaxlen == 0);
return(len * mbminlen / mbmaxlen);
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
case DATA_BLOB:
return(0);
default:
ut_error;
}
return(0);
}
/***************************************************************************
Returns the maximum size of a data type. Note: types in system tables may be
incomplete and return incorrect information. */
UNIV_INLINE
ulint
dtype_get_max_size_low(
/*===================*/
/* out: maximum size */
ulint mtype, /* in: main type */
ulint len) /* in: length */
{
switch (mtype) {
case DATA_SYS:
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
case DATA_MYSQL:
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
return(len);
case DATA_BLOB:
break;
default:
ut_error;
}
return(ULINT_MAX);
}
/***************************************************************************
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
For fixed length types it is the fixed length of the type, otherwise 0. */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
/* out: SQL null storage size
in ROW_FORMAT=REDUNDANT */
const dtype_t* type) /* in: type */
{
return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
type->mbminlen, type->mbmaxlen) > 0);
}

19
include/data0types.h Normal file
View File

@@ -0,0 +1,19 @@
/************************************************************************
Some type definitions
(c) 1994-2000 Innobase Oy
Created 9/21/2000 Heikki Tuuri
*************************************************************************/
#ifndef data0types_h
#define data0types_h
/* SQL data field struct */
typedef struct dfield_struct dfield_t;
/* SQL data tuple struct */
typedef struct dtuple_struct dtuple_t;
#endif

88
include/db0err.h Normal file
View File

@@ -0,0 +1,88 @@
/******************************************************
Global error codes for the database
(c) 1996 Innobase Oy
Created 5/24/1996 Heikki Tuuri
*******************************************************/
#ifndef db0err_h
#define db0err_h
enum db_err {
DB_SUCCESS = 10,
/* The following are error codes */
DB_ERROR,
DB_OUT_OF_MEMORY,
DB_OUT_OF_FILE_SPACE,
DB_LOCK_WAIT,
DB_DEADLOCK,
DB_ROLLBACK,
DB_DUPLICATE_KEY,
DB_QUE_THR_SUSPENDED,
DB_MISSING_HISTORY, /* required history data has been
deleted due to lack of space in
rollback segment */
DB_CLUSTER_NOT_FOUND = 30,
DB_TABLE_NOT_FOUND,
DB_MUST_GET_MORE_FILE_SPACE, /* the database has to be stopped
and restarted with more file space */
DB_TABLE_IS_BEING_USED,
DB_TOO_BIG_RECORD, /* a record in an index would not fit
on a compressed page, or it would
become bigger than 1/2 free space in
an uncompressed page frame */
DB_LOCK_WAIT_TIMEOUT, /* lock wait lasted too long */
DB_NO_REFERENCED_ROW, /* referenced key value not found
for a foreign key in an insert or
update of a row */
DB_ROW_IS_REFERENCED, /* cannot delete or update a row
because it contains a key value
which is referenced */
DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint
to a table failed */
DB_CORRUPTION, /* data structure corruption noticed */
DB_COL_APPEARS_TWICE_IN_INDEX, /* InnoDB cannot handle an index
where same column appears twice */
DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint
from a table failed */
DB_NO_SAVEPOINT, /* no savepoint exists with the given
name */
DB_TABLESPACE_ALREADY_EXISTS, /* we cannot create a new single-table
tablespace because a file of the same
name already exists */
DB_TABLESPACE_DELETED, /* tablespace does not exist or is
being dropped right now */
DB_LOCK_TABLE_FULL, /* lock structs have exhausted the
buffer pool (for big transactions,
InnoDB stores the lock structs in the
buffer pool) */
DB_FOREIGN_DUPLICATE_KEY, /* foreign key constraints
activated by the operation would
lead to a duplicate key in some
table */
DB_TOO_MANY_CONCURRENT_TRXS, /* when InnoDB runs out of the
preconfigured undo slots, this can
only happen when there are too many
concurrent transactions */
DB_UNSUPPORTED, /* when InnoDB sees any artefact or
a feature that it can't recoginize or
work with e.g., FT indexes created by
a later version of the engine. */
DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY
was found to be NULL */
/* The following are partial failure codes */
DB_FAIL = 1000,
DB_OVERFLOW,
DB_UNDERFLOW,
DB_STRONG_FAIL,
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
DB_END_OF_INDEX
};
#endif

134
include/dict0boot.h Normal file
View File

@@ -0,0 +1,134 @@
/******************************************************
Data dictionary creation and booting
(c) 1996 Innobase Oy
Created 4/18/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0boot_h
#define dict0boot_h
#include "univ.i"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "ut0byte.h"
#include "buf0buf.h"
#include "fsp0fsp.h"
#include "dict0dict.h"
typedef byte dict_hdr_t;
/**************************************************************************
Gets a pointer to the dictionary header and x-latches its page. */
UNIV_INTERN
dict_hdr_t*
dict_hdr_get(
/*=========*/
/* out: pointer to the dictionary header,
page x-latched */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Returns a new row, table, index, or tree id. */
UNIV_INTERN
dulint
dict_hdr_get_new_id(
/*================*/
/* out: the new id */
ulint type); /* in: DICT_HDR_ROW_ID, ... */
/**************************************************************************
Returns a new row id. */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void);
/*=========================*/
/* out: the new id */
/**************************************************************************
Reads a row id from a record or other 6-byte stored form. */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
/* out: row id */
byte* field); /* in: record field */
/**************************************************************************
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
byte* field, /* in: record field */
dulint row_id);/* in: row id */
/*********************************************************************
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
UNIV_INTERN
void
dict_boot(void);
/*===========*/
/*********************************************************************
Creates and initializes the data dictionary at the database creation. */
UNIV_INTERN
void
dict_create(void);
/*=============*/
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
/* The ids for the basic system tables and their indexes */
#define DICT_TABLES_ID ut_dulint_create(0, 1)
#define DICT_COLUMNS_ID ut_dulint_create(0, 2)
#define DICT_INDEXES_ID ut_dulint_create(0, 3)
#define DICT_FIELDS_ID ut_dulint_create(0, 4)
/* The following is a secondary index on SYS_TABLES */
#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5)
#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
from this number, except for basic
system tables and their above defined
indexes; ibuf tables and indexes are
assigned as the id the number
DICT_IBUF_ID_MIN plus the space id */
#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0)
/* The offset of the dictionary header on the page */
#define DICT_HDR FSEG_PAGE_DATA
/*-------------------------------------------------------------*/
/* Dictionary header offsets */
#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */
#define DICT_HDR_TABLES 32 /* Root of the table index tree */
#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */
#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */
#define DICT_HDR_INDEXES 44 /* Root of the index index tree */
#define DICT_HDR_FIELDS 48 /* Root of the index field
index tree */
#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
segment into which the dictionary
header is created */
/*-------------------------------------------------------------*/
/* The field number of the page number field in the sys_indexes table
clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
updated */
#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
#ifndef UNIV_NONINL
#include "dict0boot.ic"
#endif
#endif

76
include/dict0boot.ic Normal file
View File

@@ -0,0 +1,76 @@
/******************************************************
Data dictionary creation and booting
(c) 1996 Innobase Oy
Created 4/18/1996 Heikki Tuuri
*******************************************************/
/**************************************************************************
Writes the current value of the row id counter to the dictionary header file
page. */
UNIV_INTERN
void
dict_hdr_flush_row_id(void);
/*=======================*/
/**************************************************************************
Returns a new row id. */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void)
/*=========================*/
/* out: the new id */
{
dulint id;
mutex_enter(&(dict_sys->mutex));
id = dict_sys->row_id;
if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
dict_hdr_flush_row_id();
}
UT_DULINT_INC(dict_sys->row_id);
mutex_exit(&(dict_sys->mutex));
return(id);
}
/**************************************************************************
Reads a row id from a record or other 6-byte stored form. */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
/* out: row id */
byte* field) /* in: record field */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
#endif
return(mach_read_from_6(field));
}
/**************************************************************************
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
byte* field, /* in: record field */
dulint row_id) /* in: row id */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
#endif
mach_write_to_6(field, row_id);
}

183
include/dict0crea.h Normal file
View File

@@ -0,0 +1,183 @@
/******************************************************
Database object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0crea_h
#define dict0crea_h
#include "univ.i"
#include "dict0types.h"
#include "dict0dict.h"
#include "que0types.h"
#include "row0types.h"
#include "mtr0mtr.h"
/*************************************************************************
Creates a table create graph. */
UNIV_INTERN
tab_node_t*
tab_create_graph_create(
/*====================*/
/* out, own: table create node */
dict_table_t* table, /* in: table to create, built as a memory data
structure */
mem_heap_t* heap); /* in: heap where created */
/*************************************************************************
Creates an index create graph. */
UNIV_INTERN
ind_node_t*
ind_create_graph_create(
/*====================*/
/* out, own: index create node */
dict_index_t* index, /* in: index to create, built as a memory data
structure */
mem_heap_t* heap); /* in: heap where created */
/***************************************************************
Creates a table. This is a high-level function used in SQL execution graphs. */
UNIV_INTERN
que_thr_t*
dict_create_table_step(
/*===================*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/***************************************************************
Creates an index. This is a high-level function used in SQL execution
graphs. */
UNIV_INTERN
que_thr_t*
dict_create_index_step(
/*===================*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/***********************************************************************
Truncates the index tree associated with a row in SYS_INDEXES table. */
UNIV_INTERN
ulint
dict_truncate_index_tree(
/*=====================*/
/* out: new root page number, or
FIL_NULL on failure */
dict_table_t* table, /* in: the table the index belongs to */
ulint space, /* in: 0=truncate,
nonzero=create the index tree in the
given tablespace */
btr_pcur_t* pcur, /* in/out: persistent cursor pointing to
record in the clustered index of
SYS_INDEXES table. The cursor may be
repositioned in this call. */
mtr_t* mtr); /* in: mtr having the latch
on the record page. The mtr may be
committed and restarted in this call. */
/***********************************************************************
Drops the index tree associated with a row in SYS_INDEXES table. */
UNIV_INTERN
void
dict_drop_index_tree(
/*=================*/
rec_t* rec, /* in/out: record in the clustered index
of SYS_INDEXES table */
mtr_t* mtr); /* in: mtr having the latch on the record page */
#ifndef UNIV_HOTBACKUP
/********************************************************************
Creates the foreign key constraints system tables inside InnoDB
at database creation or database start if they are not found or are
not of the right form. */
UNIV_INTERN
ulint
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
/* out: DB_SUCCESS or error code */
/************************************************************************
Adds foreign key definitions to data dictionary tables in the database. We
look at table->foreign_list, and also generate names to constraints that were
not named by the user. A generated constraint has a name of the format
databasename/tablename_ibfk_<number>, where the numbers start from 1, and are
given locally for this table, that is, the number is not global, as in the
old format constraints < 4.0.18 it used to be. */
UNIV_INTERN
ulint
dict_create_add_foreigns_to_dictionary(
/*===================================*/
/* out: error code or DB_SUCCESS */
ulint start_id,/* in: if we are actually doing ALTER TABLE
ADD CONSTRAINT, we want to generate constraint
numbers which are bigger than in the table so
far; we number the constraints from
start_id + 1 up; start_id should be set to 0 if
we are creating a new table, or if the table
so far has no constraints for which the name
was generated here */
dict_table_t* table, /* in: table */
trx_t* trx); /* in: transaction */
#endif /* !UNIV_HOTBACKUP */
/* Table create node structure */
struct tab_node_struct{
que_common_t common; /* node type: QUE_NODE_TABLE_CREATE */
dict_table_t* table; /* table to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* tab_def; /* child node which does the insert of
the table definition; the row to be inserted
is built by the parent node */
ins_node_t* col_def; /* child node which does the inserts of
the column definitions; the row to be inserted
is built by the parent node */
commit_node_t* commit_node;
/* child node which performs a commit after
a successful table creation */
/*----------------------*/
/* Local storage for this graph node */
ulint state; /* node execution state */
ulint col_no; /* next column definition to insert */
mem_heap_t* heap; /* memory heap used as auxiliary storage */
};
/* Table create node states */
#define TABLE_BUILD_TABLE_DEF 1
#define TABLE_BUILD_COL_DEF 2
#define TABLE_COMMIT_WORK 3
#define TABLE_ADD_TO_CACHE 4
#define TABLE_COMPLETED 5
/* Index create node struct */
struct ind_node_struct{
que_common_t common; /* node type: QUE_NODE_INDEX_CREATE */
dict_index_t* index; /* index to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* ind_def; /* child node which does the insert of
the index definition; the row to be inserted
is built by the parent node */
ins_node_t* field_def; /* child node which does the inserts of
the field definitions; the row to be inserted
is built by the parent node */
commit_node_t* commit_node;
/* child node which performs a commit after
a successful index creation */
/*----------------------*/
/* Local storage for this graph node */
ulint state; /* node execution state */
ulint page_no;/* root page number of the index */
dict_table_t* table; /* table which owns the index */
dtuple_t* ind_row;/* index definition row built */
ulint field_no;/* next field definition to insert */
mem_heap_t* heap; /* memory heap used as auxiliary storage */
};
/* Index create node states */
#define INDEX_BUILD_INDEX_DEF 1
#define INDEX_BUILD_FIELD_DEF 2
#define INDEX_CREATE_INDEX_TREE 3
#define INDEX_COMMIT_WORK 4
#define INDEX_ADD_TO_CACHE 5
#ifndef UNIV_NONINL
#include "dict0crea.ic"
#endif
#endif

8
include/dict0crea.ic Normal file
View File

@@ -0,0 +1,8 @@
/******************************************************
Database object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/

1159
include/dict0dict.h Normal file

File diff suppressed because it is too large Load Diff

778
include/dict0dict.ic Normal file
View File

@@ -0,0 +1,778 @@
/**********************************************************************
Data dictionary system
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
#include "dict0load.h"
#include "rem0types.h"
#include "data0type.h"
/*************************************************************************
Gets the column data type. */
UNIV_INLINE
void
dict_col_copy_type(
/*===============*/
const dict_col_t* col, /* in: column */
dtype_t* type) /* out: data type */
{
ut_ad(col && type);
type->mtype = col->mtype;
type->prtype = col->prtype;
type->len = col->len;
type->mbminlen = col->mbminlen;
type->mbmaxlen = col->mbmaxlen;
}
#ifdef UNIV_DEBUG
/*************************************************************************
Assert that a column and a data type match. */
UNIV_INLINE
ibool
dict_col_type_assert_equal(
/*=======================*/
/* out: TRUE */
const dict_col_t* col, /* in: column */
const dtype_t* type) /* in: data type */
{
ut_ad(col);
ut_ad(type);
ut_ad(col->mtype == type->mtype);
ut_ad(col->prtype == type->prtype);
ut_ad(col->len == type->len);
ut_ad(col->mbminlen == type->mbminlen);
ut_ad(col->mbmaxlen == type->mbmaxlen);
return(TRUE);
}
#endif /* UNIV_DEBUG */
/***************************************************************************
Returns the minimum size of the column. */
UNIV_INLINE
ulint
dict_col_get_min_size(
/*==================*/
/* out: minimum size */
const dict_col_t* col) /* in: column */
{
return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
col->mbminlen, col->mbmaxlen));
}
/***************************************************************************
Returns the maximum size of the column. */
UNIV_INLINE
ulint
dict_col_get_max_size(
/*==================*/
/* out: maximum size */
const dict_col_t* col) /* in: column */
{
return(dtype_get_max_size_low(col->mtype, col->len));
}
/***************************************************************************
Returns the size of a fixed size column, 0 if not a fixed size column. */
UNIV_INLINE
ulint
dict_col_get_fixed_size(
/*====================*/
/* out: fixed size, or 0 */
const dict_col_t* col) /* in: column */
{
return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
col->mbminlen, col->mbmaxlen));
}
/***************************************************************************
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
For fixed length types it is the fixed length of the type, otherwise 0. */
UNIV_INLINE
ulint
dict_col_get_sql_null_size(
/*=======================*/
/* out: SQL null storage size
in ROW_FORMAT=REDUNDANT */
const dict_col_t* col) /* in: column */
{
return(dict_col_get_fixed_size(col));
}
/*************************************************************************
Gets the column number. */
UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
const dict_col_t* col)
{
ut_ad(col);
return(col->ind);
}
/*************************************************************************
Gets the column position in the clustered index. */
UNIV_INLINE
ulint
dict_col_get_clust_pos(
/*===================*/
const dict_col_t* col, /* in: table column */
const dict_index_t* clust_index) /* in: clustered index */
{
ulint i;
ut_ad(col);
ut_ad(clust_index);
ut_ad(dict_index_is_clust(clust_index));
for (i = 0; i < clust_index->n_def; i++) {
const dict_field_t* field = &clust_index->fields[i];
if (!field->prefix_len && field->col == col) {
return(i);
}
}
return(ULINT_UNDEFINED);
}
#ifdef UNIV_DEBUG
/************************************************************************
Gets the first index on the table (the clustered index). */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
/* out: index, NULL if none exists */
const dict_table_t* table) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
}
/************************************************************************
Gets the next index on the table. */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
/* out: index, NULL if none left */
const dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
}
#endif /* UNIV_DEBUG */
/************************************************************************
Check whether the index is the clustered index. */
UNIV_INLINE
ulint
dict_index_is_clust(
/*================*/
/* out: nonzero for clustered index,
zero for other indexes */
const dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED));
}
/************************************************************************
Check whether the index is unique. */
UNIV_INLINE
ulint
dict_index_is_unique(
/*=================*/
/* out: nonzero for unique index,
zero for other indexes */
const dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UNIV_UNLIKELY(index->type & DICT_UNIQUE));
}
/************************************************************************
Check whether the index is the insert buffer tree. */
UNIV_INLINE
ulint
dict_index_is_ibuf(
/*===============*/
/* out: nonzero for insert buffer,
zero for other indexes */
const dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UNIV_UNLIKELY(index->type & DICT_IBUF));
}
/************************************************************************
Gets the number of user-defined columns in a table in the dictionary
cache. */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
/* out: number of user-defined
(e.g., not ROW_ID)
columns of a table */
const dict_table_t* table) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(table->n_cols - DATA_N_SYS_COLS);
}
/************************************************************************
Gets the number of system columns in a table in the dictionary cache. */
UNIV_INLINE
ulint
dict_table_get_n_sys_cols(
/*======================*/
/* out: number of system (e.g.,
ROW_ID) columns of a table */
const dict_table_t* table __attribute__((unused))) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(table->cached);
return(DATA_N_SYS_COLS);
}
/************************************************************************
Gets the number of all columns (also system) in a table in the dictionary
cache. */
UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
/* out: number of columns of a table */
const dict_table_t* table) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(table->n_cols);
}
#ifdef UNIV_DEBUG
/************************************************************************
Gets the nth column of a table. */
UNIV_INLINE
dict_col_t*
dict_table_get_nth_col(
/*===================*/
/* out: pointer to column object */
const dict_table_t* table, /* in: table */
ulint pos) /* in: position of column */
{
ut_ad(table);
ut_ad(pos < table->n_def);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return((dict_col_t*) (table->cols) + pos);
}
/************************************************************************
Gets the given system column of a table. */
UNIV_INLINE
dict_col_t*
dict_table_get_sys_col(
/*===================*/
/* out: pointer to column object */
const dict_table_t* table, /* in: table */
ulint sys) /* in: DATA_ROW_ID, ... */
{
dict_col_t* col;
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
col = dict_table_get_nth_col(table, table->n_cols
- DATA_N_SYS_COLS + sys);
ut_ad(col->mtype == DATA_SYS);
ut_ad(col->prtype == (sys | DATA_NOT_NULL));
return(col);
}
#endif /* UNIV_DEBUG */
/************************************************************************
Gets the given system column number of a table. */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
/*======================*/
/* out: column number */
const dict_table_t* table, /* in: table */
ulint sys) /* in: DATA_ROW_ID, ... */
{
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(table->n_cols - DATA_N_SYS_COLS + sys);
}
/************************************************************************
Check whether the table uses the compact page format. */
UNIV_INLINE
ibool
dict_table_is_comp(
/*===============*/
/* out: TRUE if table uses the
compact page format */
const dict_table_t* table) /* in: table */
{
ut_ad(table);
#if DICT_TF_COMPACT != TRUE
#error
#endif
return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
}
/************************************************************************
Determine the file format of a table. */
UNIV_INLINE
ulint
dict_table_get_format(
/*==================*/
/* out: file format version */
const dict_table_t* table) /* in: table */
{
ut_ad(table);
return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT);
}
/************************************************************************
Determine the file format of a table. */
UNIV_INLINE
void
dict_table_set_format(
/*==================*/
dict_table_t* table, /* in/out: table */
ulint format) /* in: file format version */
{
ut_ad(table);
table->flags = (table->flags & ~DICT_TF_FORMAT_MASK)
| (format << DICT_TF_FORMAT_SHIFT);
}
/************************************************************************
Extract the compressed page size from table flags. */
UNIV_INLINE
ulint
dict_table_flags_to_zip_size(
/*=========================*/
/* out: compressed page size,
or 0 if not compressed */
ulint flags) /* in: flags */
{
ulint zip_size = flags & DICT_TF_ZSSIZE_MASK;
if (UNIV_UNLIKELY(zip_size)) {
zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
<< (zip_size >> DICT_TF_ZSSIZE_SHIFT));
ut_ad(zip_size <= UNIV_PAGE_SIZE);
}
return(zip_size);
}
/************************************************************************
Check whether the table uses the compressed compact page format. */
UNIV_INLINE
ulint
dict_table_zip_size(
/*================*/
/* out: compressed page size,
or 0 if not compressed */
const dict_table_t* table) /* in: table */
{
ut_ad(table);
return(dict_table_flags_to_zip_size(table->flags));
}
/************************************************************************
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system. */
UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
/* out: number of fields */
const dict_index_t* index) /* in: an internal
representation of index (in
the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->n_fields);
}
/************************************************************************
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
returned by dict_index_get_n_unique_in_tree. */
UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
/* out: number of fields */
const dict_index_t* index) /* in: an internal representation
of index (in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(index->cached);
return(index->n_uniq);
}
/************************************************************************
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
we also take multiversioning into account. */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
/* out: number of fields */
const dict_index_t* index) /* in: an internal representation
of index (in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(index->cached);
if (dict_index_is_clust(index)) {
return(dict_index_get_n_unique(index));
}
return(dict_index_get_n_fields(index));
}
/************************************************************************
Gets the number of user-defined ordering fields in the index. In the internal
representation of clustered indexes we add the row id to the ordering fields
to make a clustered index unique, but this function returns the number of
fields the user defined in the index as ordering fields. */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
/* out: number of fields */
const dict_index_t* index) /* in: an internal representation
of index (in the dictionary cache) */
{
return(index->n_user_defined_cols);
}
#ifdef UNIV_DEBUG
/************************************************************************
Gets the nth field of an index. */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
/*=====================*/
/* out: pointer to field object */
const dict_index_t* index, /* in: index */
ulint pos) /* in: position of field */
{
ut_ad(index);
ut_ad(pos < index->n_def);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return((dict_field_t*) (index->fields) + pos);
}
#endif /* UNIV_DEBUG */
/************************************************************************
Returns the position of a system column in an index. */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
/*=======================*/
/* out: position,
ULINT_UNDEFINED if not contained */
const dict_index_t* index, /* in: index */
ulint type) /* in: DATA_ROW_ID, ... */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(!(index->type & DICT_UNIVERSAL));
if (dict_index_is_clust(index)) {
return(dict_col_get_clust_pos(
dict_table_get_sys_col(index->table, type),
index));
}
return(dict_index_get_nth_col_pos(
index, dict_table_get_sys_col_no(index->table, type)));
}
/*************************************************************************
Gets the field column. */
UNIV_INLINE
const dict_col_t*
dict_field_get_col(
/*===============*/
const dict_field_t* field)
{
ut_ad(field);
return(field->col);
}
/************************************************************************
Gets pointer to the nth column in an index. */
UNIV_INLINE
const dict_col_t*
dict_index_get_nth_col(
/*===================*/
/* out: column */
const dict_index_t* index, /* in: index */
ulint pos) /* in: position of the field */
{
return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
}
/************************************************************************
Gets the column number the nth field in an index. */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
/*======================*/
/* out: column number */
const dict_index_t* index, /* in: index */
ulint pos) /* in: position of the field */
{
return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
}
/************************************************************************
Returns the minimum data size of an index record. */
UNIV_INLINE
ulint
dict_index_get_min_size(
/*====================*/
/* out: minimum data size in bytes */
const dict_index_t* index) /* in: index */
{
ulint n = dict_index_get_n_fields(index);
ulint size = 0;
while (n--) {
size += dict_col_get_min_size(dict_index_get_nth_col(index,
n));
}
return(size);
}
/*************************************************************************
Gets the space id of the root of the index tree. */
UNIV_INLINE
ulint
dict_index_get_space(
/*=================*/
/* out: space id */
const dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->space);
}
/*************************************************************************
Sets the space id of the root of the index tree. */
UNIV_INLINE
void
dict_index_set_space(
/*=================*/
dict_index_t* index, /* in/out: index */
ulint space) /* in: space id */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
index->space = space;
}
/*************************************************************************
Gets the page number of the root of the index tree. */
UNIV_INLINE
ulint
dict_index_get_page(
/*================*/
/* out: page number */
const dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->page);
}
/*************************************************************************
Sets the page number of the root of index tree. */
UNIV_INLINE
void
dict_index_set_page(
/*================*/
dict_index_t* index, /* in/out: index */
ulint page) /* in: page number */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
index->page = page;
}
/*************************************************************************
Gets the type of the index tree. */
UNIV_INLINE
ulint
dict_index_get_type(
/*================*/
/* out: type */
const dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->type);
}
/*************************************************************************
Gets the read-write lock of the index tree. */
UNIV_INLINE
rw_lock_t*
dict_index_get_lock(
/*================*/
/* out: read-write lock */
dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(&(index->lock));
}
/************************************************************************
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
which make the records bigger might fragment the index. */
UNIV_INLINE
ulint
dict_index_get_space_reserve(void)
/*==============================*/
/* out: number of free bytes on page,
reserved for updates */
{
return(UNIV_PAGE_SIZE / 16);
}
/**************************************************************************
Checks if a table is in the dictionary cache. */
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
/*=============================*/
/* out: table, NULL if not found */
const char* table_name) /* in: table name */
{
dict_table_t* table;
ulint table_fold;
ut_ad(table_name);
ut_ad(mutex_own(&(dict_sys->mutex)));
/* Look for the table name in the hash table */
table_fold = ut_fold_string(table_name);
HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
dict_table_t*, table, !strcmp(table->name, table_name));
return(table);
}
/**************************************************************************
Gets a table; loads it to the dictionary cache if necessary. A low-level
function. */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
/*===============*/
/* out: table, NULL if not found */
const char* table_name) /* in: table name */
{
dict_table_t* table;
ut_ad(table_name);
ut_ad(mutex_own(&(dict_sys->mutex)));
table = dict_table_check_if_in_cache_low(table_name);
if (table == NULL) {
table = dict_load_table(table_name);
}
return(table);
}
/**************************************************************************
Returns a table object based on table id. */
UNIV_INLINE
dict_table_t*
dict_table_get_on_id_low(
/*=====================*/
/* out: table, NULL if does not exist */
dulint table_id) /* in: table id */
{
dict_table_t* table;
ulint fold;
ut_ad(mutex_own(&(dict_sys->mutex)));
/* Look for the table name in the hash table */
fold = ut_fold_dulint(table_id);
HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
dict_table_t*, table, !ut_dulint_cmp(table->id, table_id));
if (table == NULL) {
table = dict_load_table_on_id(table_id);
}
/* TODO: should get the type information from MySQL */
return(table);
}

103
include/dict0load.h Normal file
View File

@@ -0,0 +1,103 @@
/******************************************************
Loads to the memory cache database object definitions
from dictionary tables
(c) 1996 Innobase Oy
Created 4/24/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0load_h
#define dict0load_h
#include "univ.i"
#include "dict0types.h"
#include "ut0byte.h"
#include "mem0mem.h"
/************************************************************************
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
to what we already read with fil_load_single_table_tablespaces().
In a normal startup, we create the tablespace objects for every table in
InnoDB's data dictionary, if the corresponding .ibd file exists.
We also scan the biggest space id, and store it to fil_system. */
UNIV_INTERN
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
ibool in_crash_recovery); /* in: are we doing a crash recovery */
/************************************************************************
Finds the first table name in the given database. */
UNIV_INTERN
char*
dict_get_first_table_name_in_db(
/*============================*/
/* out, own: table name, NULL if
does not exist; the caller must free
the memory in the string! */
const char* name); /* in: database name which ends to '/' */
/************************************************************************
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
a foreign key references columns in this table. */
UNIV_INTERN
dict_table_t*
dict_load_table(
/*============*/
/* out: table, NULL if does not exist;
if the table is stored in an .ibd file,
but the file does not exist,
then we set the ibd_file_missing flag TRUE
in the table object we return */
const char* name); /* in: table name in the
databasename/tablename format */
/***************************************************************************
Loads a table object based on the table id. */
UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
/* out: table; NULL if table does not exist */
dulint table_id); /* in: table id */
/************************************************************************
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
is added to the dictionary cache at booting before calling this function. */
UNIV_INTERN
void
dict_load_sys_table(
/*================*/
dict_table_t* table); /* in: system table */
#ifndef UNIV_HOTBACKUP
/***************************************************************************
Loads foreign key constraints where the table is either the foreign key
holder or where the table is referenced by a foreign key. Adds these
constraints to the data dictionary. Note that we know that the dictionary
cache already contains all constraints where the other relevant table is
already in the dictionary cache. */
UNIV_INTERN
ulint
dict_load_foreigns(
/*===============*/
/* out: DB_SUCCESS or error code */
const char* table_name, /* in: table name */
ibool check_charsets);/* in: TRUE=check charsets
compatibility */
#endif /* !UNIV_HOTBACKUP */
/************************************************************************
Prints to the standard output information on all tables found in the data
dictionary system table. */
UNIV_INTERN
void
dict_print(void);
/*============*/
#ifndef UNIV_NONINL
#include "dict0load.ic"
#endif
#endif

9
include/dict0load.ic Normal file
View File

@@ -0,0 +1,9 @@
/******************************************************
Loads to the memory cache database object definitions
from dictionary tables
(c) 1996 Innobase Oy
Created 4/24/1996 Heikki Tuuri
*******************************************************/

475
include/dict0mem.h Normal file
View File

@@ -0,0 +1,475 @@
/******************************************************
Data dictionary memory object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0mem_h
#define dict0mem_h
#include "univ.i"
#include "dict0types.h"
#include "data0type.h"
#include "data0data.h"
#include "mem0mem.h"
#include "rem0types.h"
#include "btr0types.h"
#include "ut0mem.h"
#include "ut0lst.h"
#include "ut0rnd.h"
#include "ut0byte.h"
#include "sync0rw.h"
#include "lock0types.h"
#include "hash0hash.h"
#include "que0types.h"
/* Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
#define DICT_CLUSTERED 1 /* clustered index */
#define DICT_UNIQUE 2 /* unique index */
#define DICT_UNIVERSAL 4 /* index which can contain records from any
other index */
#define DICT_IBUF 8 /* insert buffer tree */
/* Types for a table object */
#define DICT_TABLE_ORDINARY 1
#if 0 /* not implemented */
#define DICT_TABLE_CLUSTER_MEMBER 2
#define DICT_TABLE_CLUSTER 3 /* this means that the table is
really a cluster definition */
#endif
/* Table flags. All unused bits must be 0. */
#define DICT_TF_COMPACT 1 /* Compact page format.
This must be set for
new file formats
(later than
DICT_TF_FORMAT_51). */
/* compressed page size (0=uncompressed, up to 15 compressed sizes) */
#define DICT_TF_ZSSIZE_SHIFT 1
#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT)
#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1)
#define DICT_TF_FORMAT_SHIFT 5 /* file format */
#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT)
#define DICT_TF_FORMAT_51 0 /* InnoDB/MySQL up to 5.1 */
#define DICT_TF_FORMAT_ZIP 1 /* InnoDB plugin for 5.1:
compressed tables,
new BLOB treatment */
#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
#define DICT_TF_BITS 6 /* number of flag bits */
#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
#endif
/**************************************************************************
Creates a table memory object. */
UNIV_INTERN
dict_table_t*
dict_mem_table_create(
/*==================*/
/* out, own: table object */
const char* name, /* in: table name */
ulint space, /* in: space where the clustered index
of the table is placed; this parameter
is ignored if the table is made
a member of a cluster */
ulint n_cols, /* in: number of columns */
ulint flags); /* in: table flags */
/********************************************************************
Free a table memory object. */
UNIV_INTERN
void
dict_mem_table_free(
/*================*/
dict_table_t* table); /* in: table */
/**************************************************************************
Adds a column definition to a table. */
UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
dict_table_t* table, /* in: table */
mem_heap_t* heap, /* in: temporary memory heap, or NULL */
const char* name, /* in: column name, or NULL */
ulint mtype, /* in: main datatype */
ulint prtype, /* in: precise type */
ulint len); /* in: precision */
/**************************************************************************
Creates an index memory object. */
UNIV_INTERN
dict_index_t*
dict_mem_index_create(
/*==================*/
/* out, own: index object */
const char* table_name, /* in: table name */
const char* index_name, /* in: index name */
ulint space, /* in: space where the index tree is
placed, ignored if the index is of
the clustered type */
ulint type, /* in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields); /* in: number of fields */
/**************************************************************************
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /* in: index */
const char* name, /* in: column name */
ulint prefix_len); /* in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
/**************************************************************************
Frees an index memory object. */
UNIV_INTERN
void
dict_mem_index_free(
/*================*/
dict_index_t* index); /* in: index */
/**************************************************************************
Creates and initializes a foreign constraint memory object. */
UNIV_INTERN
dict_foreign_t*
dict_mem_foreign_create(void);
/*=========================*/
/* out, own: foreign constraint struct */
/* Data structure for a column in a table */
struct dict_col_struct{
/*----------------------*/
/* The following are copied from dtype_t,
so that all bit-fields can be packed tightly. */
unsigned mtype:8; /* main data type */
unsigned prtype:24; /* precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
binary string, whether this is
a true VARCHAR where MySQL
uses 2 bytes to store the length */
/* the remaining fields do not affect alphabetical ordering: */
unsigned len:16; /* length; for MySQL data this
is field->pack_length(),
except that for a >= 5.0.3
type true VARCHAR this is the
maximum byte length of the
string data (in addition to
the string, MySQL uses 1 or 2
bytes to store the string length) */
unsigned mbminlen:2; /* minimum length of a
character, in bytes */
unsigned mbmaxlen:3; /* maximum length of a
character, in bytes */
/*----------------------*/
/* End of definitions copied from dtype_t */
unsigned ind:10; /* table column position
(starting from 0) */
unsigned ord_part:1; /* nonzero if this column
appears in the ordering fields
of an index */
};
/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
indexed column length (or indexed prefix length). It is set to 3*256,
so that one can create a column prefix index on 256 characters of a
TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
a character may take at most 3 bytes.
This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN
/* Data structure for a field in an index */
struct dict_field_struct{
dict_col_t* col; /* pointer to the table column */
const char* name; /* name of the column */
unsigned prefix_len:10; /* 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
must be smaller than
DICT_MAX_INDEX_COL_LEN; NOTE that
in the UTF-8 charset, MySQL sets this
to 3 * the prefix len in UTF-8 chars */
unsigned fixed_len:10; /* 0 or the fixed length of the
column if smaller than
DICT_MAX_INDEX_COL_LEN */
};
/* Data structure for an index. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
struct dict_index_struct{
dulint id; /* id of the index */
mem_heap_t* heap; /* memory heap */
const char* name; /* index name */
const char* table_name; /* table name */
dict_table_t* table; /* back pointer to table */
unsigned space:32;
/* space where the index tree is placed */
unsigned page:32;/* index tree root page number */
unsigned type:4; /* index type (DICT_CLUSTERED, DICT_UNIQUE,
DICT_UNIVERSAL, DICT_IBUF) */
unsigned trx_id_offset:10;/* position of the the trx id column
in a clustered index record, if the fields
before it are known to be of a fixed size,
0 otherwise */
unsigned n_user_defined_cols:10;
/* number of columns the user defined to
be in the index: in the internal
representation we add more columns */
unsigned n_uniq:10;/* number of fields from the beginning
which are enough to determine an index
entry uniquely */
unsigned n_def:10;/* number of fields defined so far */
unsigned n_fields:10;/* number of fields in the index */
unsigned n_nullable:10;/* number of nullable fields */
unsigned cached:1;/* TRUE if the index object is in the
dictionary cache */
unsigned to_be_dropped:1;
/* TRUE if this index is marked to be
dropped in ha_innobase::prepare_drop_index(),
otherwise FALSE */
dict_field_t* fields; /* array of field descriptions */
UT_LIST_NODE_T(dict_index_t)
indexes;/* list of indexes of the table */
btr_search_t* search_info; /* info used in optimistic searches */
/*----------------------*/
ib_int64_t* stat_n_diff_key_vals;
/* approximate number of different key values
for this index, for each n-column prefix
where n <= dict_get_n_unique(index); we
periodically calculate new estimates */
ulint stat_index_size;
/* approximate index size in database pages */
ulint stat_n_leaf_pages;
/* approximate number of leaf pages in the
index tree */
rw_lock_t lock; /* read-write lock protecting the upper levels
of the index tree */
#ifdef ROW_MERGE_IS_INDEX_USABLE
dulint trx_id; /* id of the transaction that created this
index, or ut_dulint_zero if the index existed
when InnoDB was started up */
#endif /* ROW_MERGE_IS_INDEX_USABLE */
#ifdef UNIV_DEBUG
ulint magic_n;/* magic number */
# define DICT_INDEX_MAGIC_N 76789786
#endif
};
/* Data structure for a foreign key constraint; an example:
FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
struct dict_foreign_struct{
mem_heap_t* heap; /* this object is allocated from
this memory heap */
char* id; /* id of the constraint as a
null-terminated string */
unsigned n_fields:10; /* number of indexes' first fields
for which the the foreign key
constraint is defined: we allow the
indexes to contain more fields than
mentioned in the constraint, as long
as the first fields are as mentioned */
unsigned type:6; /* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
or DICT_FOREIGN_ON_DELETE_SET_NULL */
char* foreign_table_name;/* foreign table name */
dict_table_t* foreign_table; /* table where the foreign key is */
const char** foreign_col_names;/* names of the columns in the
foreign key */
char* referenced_table_name;/* referenced table name */
dict_table_t* referenced_table;/* table where the referenced key
is */
const char** referenced_col_names;/* names of the referenced
columns in the referenced table */
dict_index_t* foreign_index; /* foreign index; we require that
both tables contain explicitly defined
indexes for the constraint: InnoDB
does not generate new indexes
implicitly */
dict_index_t* referenced_index;/* referenced index */
UT_LIST_NODE_T(dict_foreign_t)
foreign_list; /* list node for foreign keys of the
table */
UT_LIST_NODE_T(dict_foreign_t)
referenced_list;/* list node for referenced keys of the
table */
};
/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
a foreign key constraint is enforced, therefore RESTRICT just means no flag */
#define DICT_FOREIGN_ON_DELETE_CASCADE 1
#define DICT_FOREIGN_ON_DELETE_SET_NULL 2
#define DICT_FOREIGN_ON_UPDATE_CASCADE 4
#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8
#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
/* Data structure for a database table. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_table_create(). */
struct dict_table_struct{
dulint id; /* id of the table */
mem_heap_t* heap; /* memory heap */
const char* name; /* table name */
const char* dir_path_of_temp_table;/* NULL or the directory path
where a TEMPORARY table that was explicitly
created by a user should be placed if
innodb_file_per_table is defined in my.cnf;
in Unix this is usually /tmp/..., in Windows
\temp\... */
unsigned space:32;
/* space where the clustered index of the
table is placed */
unsigned flags:DICT_TF_BITS;/* DICT_TF_COMPACT, ... */
unsigned ibd_file_missing:1;
/* TRUE if this is in a single-table
tablespace and the .ibd file is missing; then
we must return in ha_innodb.cc an error if the
user tries to query such an orphaned table */
unsigned tablespace_discarded:1;
/* this flag is set TRUE when the user
calls DISCARD TABLESPACE on this
table, and reset to FALSE in IMPORT
TABLESPACE */
unsigned cached:1;/* TRUE if the table object has been added
to the dictionary cache */
unsigned n_def:10;/* number of columns defined so far */
unsigned n_cols:10;/* number of columns */
dict_col_t* cols; /* array of column descriptions */
const char* col_names;
/* Column names packed in a character string
"name1\0name2\0...nameN\0". Until
the string contains n_cols, it will be
allocated from a temporary heap. The final
string will be allocated from table->heap. */
hash_node_t name_hash; /* hash chain node */
hash_node_t id_hash; /* hash chain node */
UT_LIST_BASE_NODE_T(dict_index_t)
indexes; /* list of indexes of the table */
UT_LIST_BASE_NODE_T(dict_foreign_t)
foreign_list;/* list of foreign key constraints
in the table; these refer to columns
in other tables */
UT_LIST_BASE_NODE_T(dict_foreign_t)
referenced_list;/* list of foreign key constraints
which refer to this table */
UT_LIST_NODE_T(dict_table_t)
table_LRU; /* node of the LRU list of tables */
ulint n_mysql_handles_opened;
/* count of how many handles MySQL has opened
to this table; dropping of the table is
NOT allowed until this count gets to zero;
MySQL does NOT itself check the number of
open handles at drop */
ulint n_foreign_key_checks_running;
/* count of how many foreign key check
operations are currently being performed
on the table: we cannot drop the table while
there are foreign key checks running on
it! */
lock_t* auto_inc_lock;/* a buffer for an auto-inc lock
for this table: we allocate the memory here
so that individual transactions can get it
and release it without a need to allocate
space from the lock heap of the trx:
otherwise the lock heap would grow rapidly
if we do a large insert from a select */
dulint query_cache_inv_trx_id;
/* transactions whose trx id < than this
number are not allowed to store to the MySQL
query cache or retrieve from it; when a trx
with undo logs commits, it sets this to the
value of the trx id counter for the tables it
had an IX lock on */
UT_LIST_BASE_NODE_T(lock_t)
locks; /* list of locks on the table */
#ifdef UNIV_DEBUG
/*----------------------*/
ibool does_not_fit_in_memory;
/* this field is used to specify in simulations
tables which are so big that disk should be
accessed: disk access is simulated by
putting the thread to sleep for a while;
NOTE that this flag is not stored to the data
dictionary on disk, and the database will
forget about value TRUE if it has to reload
the table definition from disk */
#endif /* UNIV_DEBUG */
/*----------------------*/
unsigned big_rows:1;
/* flag: TRUE if the maximum length of
a single row exceeds BIG_ROW_SIZE;
initialized in dict_table_add_to_cache() */
unsigned stat_initialized:1; /* TRUE if statistics have
been calculated the first time
after database startup or table creation */
ib_int64_t stat_n_rows;
/* approximate number of rows in the table;
we periodically calculate new estimates */
ulint stat_clustered_index_size;
/* approximate clustered index size in
database pages */
ulint stat_sum_of_other_index_sizes;
/* other indexes in database pages */
ulint stat_modified_counter;
/* when a row is inserted, updated, or deleted,
we add 1 to this number; we calculate new
estimates for the stat_... values for the
table and the indexes at an interval of 2 GB
or when about 1 / 16 of table has been
modified; also when the estimate operation is
called for MySQL SHOW TABLE STATUS; the
counter is reset to zero at statistics
calculation; this counter is not protected by
any latch, because this is only used for
heuristics */
/*----------------------*/
mutex_t autoinc_mutex;
/* mutex protecting the autoincrement
counter */
ibool autoinc_inited;
/* TRUE if the autoinc counter has been
inited; MySQL gets the init value by executing
SELECT MAX(auto inc column) */
ib_uint64_t autoinc;/* autoinc counter value to give to the
next inserted row */
ib_int64_t autoinc_increment;
/* The increment step of the auto increment
column. Value must be greater than or equal
to 1 */
/*----------------------*/
ulong n_waiting_or_granted_auto_inc_locks;
/* This counter is used to track the number
of granted and pending autoinc locks on this
table. This value is set after acquiring the
kernel mutex but we peek the contents to
determine whether other transactions have
acquired the AUTOINC lock or not. Of course
only one transaction can be granted the
lock but there can be multiple waiters. */
#ifdef UNIV_DEBUG
ulint magic_n;/* magic number */
# define DICT_TABLE_MAGIC_N 76333786
#endif /* UNIV_DEBUG */
};
#ifndef UNIV_NONINL
#include "dict0mem.ic"
#endif
#endif

9
include/dict0mem.ic Normal file
View File

@@ -0,0 +1,9 @@
/**********************************************************************
Data dictionary memory object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
***********************************************************************/

29
include/dict0types.h Normal file
View File

@@ -0,0 +1,29 @@
/******************************************************
Data dictionary global types
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0types_h
#define dict0types_h
#include "ut0list.h"
typedef struct dict_sys_struct dict_sys_t;
typedef struct dict_col_struct dict_col_t;
typedef struct dict_field_struct dict_field_t;
typedef struct dict_index_struct dict_index_t;
typedef struct dict_table_struct dict_table_t;
typedef struct dict_foreign_struct dict_foreign_t;
/* A cluster object is a table object with the type field set to
DICT_CLUSTERED */
typedef dict_table_t dict_cluster_t;
typedef struct ind_node_struct ind_node_t;
typedef struct tab_node_struct tab_node_t;
#endif

166
include/dyn0dyn.h Normal file
View File

@@ -0,0 +1,166 @@
/******************************************************
The dynamically allocated array
(c) 1996 Innobase Oy
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#ifndef dyn0dyn_h
#define dyn0dyn_h
#include "univ.i"
#include "ut0lst.h"
#include "mem0mem.h"
typedef struct dyn_block_struct dyn_block_t;
typedef dyn_block_t dyn_array_t;
/* This is the initial 'payload' size of a dynamic array;
this must be > MLOG_BUF_MARGIN + 30! */
#define DYN_ARRAY_DATA_SIZE 512
/*************************************************************************
Initializes a dynamic array. */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
/* out: initialized dyn array */
dyn_array_t* arr); /* in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
/****************************************************************
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
dyn_array_t* arr); /* in: dyn array */
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
dyn_array_close. */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
/* out: pointer to the buffer */
dyn_array_t* arr, /* in: dynamic array */
ulint size); /* in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
/*************************************************************************
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
dyn_array_t* arr, /* in: dynamic array */
byte* ptr); /* in: buffer space from ptr up was not used */
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to
the added element. The caller must copy the element to
the pointer returned. */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
/* out: pointer to the element */
dyn_array_t* arr, /* in: dynamic array */
ulint size); /* in: size in bytes of the element */
/****************************************************************
Returns pointer to an element in dyn array. */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
/* out: pointer to element */
dyn_array_t* arr, /* in: dyn array */
ulint pos); /* in: position of element as bytes
from array start */
/****************************************************************
Returns the size of stored data in a dyn array. */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
/* out: data size in bytes */
dyn_array_t* arr); /* in: dyn array */
/****************************************************************
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
dyn_array_t* arr); /* in: dyn array */
/****************************************************************
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
dyn_array_t* arr); /* in: dyn array */
/************************************************************************
Gets the next block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
/* out: pointer to next, NULL if end of list */
dyn_array_t* arr, /* in: dyn array */
dyn_block_t* block); /* in: dyn array block */
/************************************************************************
Gets the number of used bytes in a dyn array block. */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
/* out: number of bytes used */
dyn_block_t* block); /* in: dyn array block */
/************************************************************************
Gets pointer to the start of data in a dyn array block. */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
/* out: pointer to data */
dyn_block_t* block); /* in: dyn array block */
/************************************************************
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
dyn_array_t* arr, /* in: dyn array */
const byte* str, /* in: string to write */
ulint len); /* in: string length */
/*#################################################################*/
/* NOTE! Do not use the fields of the struct directly: the definition
appears here only for the compiler to know its size! */
struct dyn_block_struct{
mem_heap_t* heap; /* in the first block this is != NULL
if dynamic allocation has been needed */
ulint used; /* number of data bytes used in this block */
byte data[DYN_ARRAY_DATA_SIZE];
/* storage for array elements */
UT_LIST_BASE_NODE_T(dyn_block_t) base;
/* linear list of dyn blocks: this node is
used only in the first block */
UT_LIST_NODE_T(dyn_block_t) list;
/* linear list node: used in all blocks */
#ifdef UNIV_DEBUG
ulint buf_end;/* only in the debug version: if dyn array is
opened, this is the buffer end offset, else
this is 0 */
ulint magic_n;
#endif
};
#ifndef UNIV_NONINL
#include "dyn0dyn.ic"
#endif
#endif

346
include/dyn0dyn.ic Normal file
View File

@@ -0,0 +1,346 @@
/******************************************************
The dynamically allocated array
(c) 1996 Innobase Oy
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#define DYN_BLOCK_MAGIC_N 375767
#define DYN_BLOCK_FULL_FLAG 0x1000000UL
/****************************************************************
Adds a new block to a dyn array. */
UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
/* out: created block */
dyn_array_t* arr); /* in: dyn array */
/****************************************************************
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
dyn_array_t* arr) /* in: dyn array */
{
return(arr);
}
/****************************************************************
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
dyn_array_t* arr) /* in: dyn array */
{
if (arr->heap == NULL) {
return(arr);
}
return(UT_LIST_GET_LAST(arr->base));
}
/************************************************************************
Gets the next block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
/* out: pointer to next, NULL if end of list */
dyn_array_t* arr, /* in: dyn array */
dyn_block_t* block) /* in: dyn array block */
{
ut_ad(arr && block);
if (arr->heap == NULL) {
ut_ad(arr == block);
return(NULL);
}
return(UT_LIST_GET_NEXT(list, block));
}
/************************************************************************
Gets the number of used bytes in a dyn array block. */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
/* out: number of bytes used */
dyn_block_t* block) /* in: dyn array block */
{
ut_ad(block);
return((block->used) & ~DYN_BLOCK_FULL_FLAG);
}
/************************************************************************
Gets pointer to the start of data in a dyn array block. */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
/* out: pointer to data */
dyn_block_t* block) /* in: dyn array block */
{
ut_ad(block);
return(block->data);
}
/*************************************************************************
Initializes a dynamic array. */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
/* out: initialized dyn array */
dyn_array_t* arr) /* in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
{
ut_ad(arr);
#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
#endif
arr->heap = NULL;
arr->used = 0;
#ifdef UNIV_DEBUG
arr->buf_end = 0;
arr->magic_n = DYN_BLOCK_MAGIC_N;
#endif
return(arr);
}
/****************************************************************
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
dyn_array_t* arr) /* in: dyn array */
{
if (arr->heap != NULL) {
mem_heap_free(arr->heap);
}
#ifdef UNIV_DEBUG
arr->magic_n = 0;
#endif
}
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to the added element.
The caller must copy the element to the pointer returned. */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
/* out: pointer to the element */
dyn_array_t* arr, /* in: dynamic array */
ulint size) /* in: size in bytes of the element */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
ut_ad(size <= DYN_ARRAY_DATA_SIZE);
ut_ad(size);
block = arr;
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
used = block->used;
}
}
block->used = used + size;
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
return((block->data) + used);
}
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
dyn_array_close. */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
/* out: pointer to the buffer */
dyn_array_t* arr, /* in: dynamic array */
ulint size) /* in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
ut_ad(size <= DYN_ARRAY_DATA_SIZE);
ut_ad(size);
block = arr;
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
used = block->used;
ut_a(size <= DYN_ARRAY_DATA_SIZE);
}
}
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
#ifdef UNIV_DEBUG
ut_ad(arr->buf_end == 0);
arr->buf_end = used + size;
#endif
return((block->data) + used);
}
/*************************************************************************
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
dyn_array_t* arr, /* in: dynamic array */
byte* ptr) /* in: buffer space from ptr up was not used */
{
dyn_block_t* block;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
block = dyn_array_get_last_block(arr);
ut_ad(arr->buf_end + block->data >= ptr);
block->used = ptr - block->data;
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
#ifdef UNIV_DEBUG
arr->buf_end = 0;
#endif
}
/****************************************************************
Returns pointer to an element in dyn array. */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
/* out: pointer to element */
dyn_array_t* arr, /* in: dyn array */
ulint pos) /* in: position of element as bytes
from array start */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
/* Get the first array block */
block = dyn_array_get_first_block(arr);
if (arr->heap != NULL) {
used = dyn_block_get_used(block);
while (pos >= used) {
pos -= used;
block = UT_LIST_GET_NEXT(list, block);
ut_ad(block);
used = dyn_block_get_used(block);
}
}
ut_ad(block);
ut_ad(dyn_block_get_used(block) >= pos);
return(block->data + pos);
}
/****************************************************************
Returns the size of stored data in a dyn array. */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
/* out: data size in bytes */
dyn_array_t* arr) /* in: dyn array */
{
dyn_block_t* block;
ulint sum = 0;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
if (arr->heap == NULL) {
return(arr->used);
}
/* Get the first array block */
block = dyn_array_get_first_block(arr);
while (block != NULL) {
sum += dyn_block_get_used(block);
block = dyn_array_get_next_block(arr, block);
}
return(sum);
}
/************************************************************
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
dyn_array_t* arr, /* in: dyn array */
const byte* str, /* in: string to write */
ulint len) /* in: string length */
{
ulint n_copied;
while (len > 0) {
if (len > DYN_ARRAY_DATA_SIZE) {
n_copied = DYN_ARRAY_DATA_SIZE;
} else {
n_copied = len;
}
memcpy(dyn_array_push(arr, n_copied), str, n_copied);
str += n_copied;
len -= n_copied;
}
}

97
include/eval0eval.h Normal file
View File

@@ -0,0 +1,97 @@
/******************************************************
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
(c) 1997 Innobase Oy
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#ifndef eval0eval_h
#define eval0eval_h
#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
/*********************************************************************
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
que_node_t* node); /* in: query graph node */
/*********************************************************************
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
sym_node_t* sym_node); /* in: symbol table node */
/*********************************************************************
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
que_node_t* exp_node); /* in: expression */
/*********************************************************************
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
que_node_t* node, /* in: expression node */
lint val); /* in: value to set */
/*********************************************************************
Gets an integer value from an expression node. */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
/* out: integer value */
que_node_t* node); /* in: expression node */
/*********************************************************************
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
que_node_t* node, /* in: query graph node */
const byte* str, /* in: binary string */
ulint len); /* in: string length or UNIV_SQL_NULL */
/*********************************************************************
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
que_node_t* node1, /* in: node to copy to */
que_node_t* node2); /* in: node to copy from */
/*********************************************************************
Gets a iboolean value from a query node. */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*====================*/
/* out: iboolean value */
que_node_t* node); /* in: query graph node */
/*********************************************************************
Evaluates a comparison node. */
UNIV_INTERN
ibool
eval_cmp(
/*=====*/
/* out: the result of the comparison */
func_node_t* cmp_node); /* in: comparison node */
#ifndef UNIV_NONINL
#include "eval0eval.ic"
#endif
#endif

234
include/eval0eval.ic Normal file
View File

@@ -0,0 +1,234 @@
/******************************************************
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
(c) 1997 Innobase Oy
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#include "que0que.h"
#include "rem0cmp.h"
#include "pars0grm.h"
/*********************************************************************
Evaluates a function node. */
UNIV_INTERN
void
eval_func(
/*======*/
func_node_t* func_node); /* in: function node */
/*********************************************************************
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field. */
UNIV_INTERN
byte*
eval_node_alloc_val_buf(
/*====================*/
/* out: pointer to allocated buffer */
que_node_t* node, /* in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size); /* in: buffer size */
/*********************************************************************
Allocates a new buffer if needed. */
UNIV_INLINE
byte*
eval_node_ensure_val_buf(
/*=====================*/
/* out: pointer to buffer */
que_node_t* node, /* in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size) /* in: buffer size */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
dfield_set_len(dfield, size);
data = dfield_get_data(dfield);
if (!data || que_node_get_val_buf_size(node) < size) {
data = eval_node_alloc_val_buf(node, size);
}
return(data);
}
/*********************************************************************
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
sym_node_t* sym_node) /* in: symbol table node */
{
ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
if (sym_node->indirection) {
/* The symbol table node is an alias for a variable or a
column */
dfield_copy_data(que_node_get_val(sym_node),
que_node_get_val(sym_node->indirection));
}
}
/*********************************************************************
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
que_node_t* exp_node) /* in: expression */
{
if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
eval_sym((sym_node_t*)exp_node);
return;
}
eval_func(exp_node);
}
/*********************************************************************
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
que_node_t* node, /* in: expression node */
lint val) /* in: value to set */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (data == NULL) {
data = eval_node_alloc_val_buf(node, 4);
}
ut_ad(dfield_get_len(dfield) == 4);
mach_write_to_4(data, (ulint)val);
}
/*********************************************************************
Gets an integer non-SQL null value from an expression node. */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
/* out: integer value */
que_node_t* node) /* in: expression node */
{
dfield_t* dfield;
dfield = que_node_get_val(node);
ut_ad(dfield_get_len(dfield) == 4);
return((int)mach_read_from_4(dfield_get_data(dfield)));
}
/*********************************************************************
Gets a iboolean value from a query node. */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*====================*/
/* out: iboolean value */
que_node_t* node) /* in: query graph node */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
ut_ad(data != NULL);
return(mach_read_from_1(data));
}
/*********************************************************************
Sets a iboolean value as the value of a function node. */
UNIV_INLINE
void
eval_node_set_ibool_val(
/*====================*/
func_node_t* func_node, /* in: function node */
ibool val) /* in: value to set */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(func_node);
data = dfield_get_data(dfield);
if (data == NULL) {
/* Allocate 1 byte to hold the value */
data = eval_node_alloc_val_buf(func_node, 1);
}
ut_ad(dfield_get_len(dfield) == 1);
mach_write_to_1(data, val);
}
/*********************************************************************
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
que_node_t* node, /* in: query graph node */
const byte* str, /* in: binary string */
ulint len) /* in: string length or UNIV_SQL_NULL */
{
byte* data;
if (len == UNIV_SQL_NULL) {
dfield_set_len(que_node_get_val(node), len);
return;
}
data = eval_node_ensure_val_buf(node, len);
ut_memcpy(data, str, len);
}
/*********************************************************************
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
que_node_t* node1, /* in: node to copy to */
que_node_t* node2) /* in: node to copy from */
{
dfield_t* dfield2;
dfield2 = que_node_get_val(node2);
eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
dfield_get_len(dfield2));
}

87
include/eval0proc.h Normal file
View File

@@ -0,0 +1,87 @@
/******************************************************
Executes SQL stored procedures and their control structures
(c) 1998 Innobase Oy
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#ifndef eval0proc_h
#define eval0proc_h
#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
/**************************************************************************
Performs an execution step of a procedure node. */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of an if-statement node. */
UNIV_INTERN
que_thr_t*
if_step(
/*====*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a while-statement node. */
UNIV_INTERN
que_thr_t*
while_step(
/*=======*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a for-loop node. */
UNIV_INTERN
que_thr_t*
for_step(
/*=====*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of an assignment statement node. */
UNIV_INTERN
que_thr_t*
assign_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a procedure call node. */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of an exit statement node. */
UNIV_INTERN
que_thr_t*
exit_step(
/*======*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a return-statement node. */
UNIV_INTERN
que_thr_t*
return_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
#ifndef UNIV_NONINL
#include "eval0proc.ic"
#endif
#endif

71
include/eval0proc.ic Normal file
View File

@@ -0,0 +1,71 @@
/******************************************************
Executes SQL stored procedures and their control structures
(c) 1998 Innobase Oy
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#include "pars0pars.h"
#include "que0que.h"
#include "eval0eval.h"
/**************************************************************************
Performs an execution step of a procedure node. */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
proc_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
if (thr->prev_node == que_node_get_parent(node)) {
/* Start execution from the first statement in the statement
list */
thr->run_node = node->stat_list;
} else {
/* Move to the next statement */
ut_ad(que_node_get_next(thr->prev_node) == NULL);
thr->run_node = NULL;
}
if (thr->run_node == NULL) {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**************************************************************************
Performs an execution step of a procedure call node. */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
func_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
/* Evaluate the procedure */
eval_exp(node);
thr->run_node = que_node_get_parent(node);
return(thr);
}

702
include/fil0fil.h Normal file
View File

@@ -0,0 +1,702 @@
/******************************************************
The low-level file system
(c) 1995 Innobase Oy
Created 10/25/1995 Heikki Tuuri
*******************************************************/
#ifndef fil0fil_h
#define fil0fil_h
#include "univ.i"
#include "sync0rw.h"
#include "dict0types.h"
#include "ibuf0types.h"
#include "ut0byte.h"
#include "os0file.h"
/* When mysqld is run, the default directory "." is the mysqld datadir, but in
ibbackup we must set it explicitly; the patgh must NOT contain the trailing
'/' or '\' */
extern const char* fil_path_to_mysql_datadir;
/* Initial size of a single-table tablespace in pages */
#define FIL_IBD_FILE_INITIAL_SIZE 4
/* 'null' (undefined) page offset in the context of file spaces */
#define FIL_NULL ULINT32_UNDEFINED
/* Space address data type; this is intended to be used when
addresses accurate to a byte are stored in file pages. If the page part
of the address is FIL_NULL, the address is considered undefined. */
typedef byte fil_faddr_t; /* 'type' definition in C: an address
stored in a file page is a string of bytes */
#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
/* A struct for storing a space address FIL_ADDR, when it is used
in C program data structures. */
typedef struct fil_addr_struct fil_addr_t;
struct fil_addr_struct{
ulint page; /* page number within a space */
ulint boffset; /* byte offset within the page */
};
/* Null file address */
extern fil_addr_t fil_addr_null;
/* The byte offsets on a file page for various variables */
#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
page belongs to (== 0) but in later
versions the 'new' checksum of the
page */
#define FIL_PAGE_OFFSET 4 /* page offset inside space */
#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
of the page, its offset.
Otherwise FIL_NULL.
This field is not set on BLOB pages,
which are stored as a singly-linked
list. See also FIL_PAGE_NEXT. */
#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor
of the page, its offset.
Otherwise FIL_NULL.
B-tree index pages
(FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
on the same PAGE_LEVEL are maintained
as a doubly linked list via
FIL_PAGE_PREV and FIL_PAGE_NEXT
in the collation order of the
smallest user record on each page. */
#define FIL_PAGE_LSN 16 /* lsn of the end of the newest
modification log record to the page */
#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,...,
2 bytes.
The contents of this field can only
be trusted in the following case:
if the page is an uncompressed
B-tree index page, then it is
guaranteed that the value is
FIL_PAGE_INDEX.
The opposite does not hold.
In tablespaces created by
MySQL/InnoDB 5.1.7 or later, the
contents of this field is valid
for all uncompressed pages. */
#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the
first page in a data file: the file
has been flushed to disk at least up
to this lsn */
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_DATA 38 /* start of the data on the page */
/* File page trailer */
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
to store the page checksum, the
last 4 bytes should be identical
to the last 4 bytes of FIL_PAGE_LSN */
#define FIL_PAGE_DATA_END 8
/* File page types (values of FIL_PAGE_TYPE) */
#define FIL_PAGE_INDEX 17855 /* B-tree node */
#define FIL_PAGE_UNDO_LOG 2 /* Undo log page */
#define FIL_PAGE_INODE 3 /* Index node */
#define FIL_PAGE_IBUF_FREE_LIST 4 /* Insert buffer free list */
/* File page types introduced in MySQL/InnoDB 5.1.7 */
#define FIL_PAGE_TYPE_ALLOCATED 0 /* Freshly allocated page */
#define FIL_PAGE_IBUF_BITMAP 5 /* Insert buffer bitmap */
#define FIL_PAGE_TYPE_SYS 6 /* System page */
#define FIL_PAGE_TYPE_TRX_SYS 7 /* Transaction system data */
#define FIL_PAGE_TYPE_FSP_HDR 8 /* File space header */
#define FIL_PAGE_TYPE_XDES 9 /* Extent descriptor page */
#define FIL_PAGE_TYPE_BLOB 10 /* Uncompressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB 11 /* First compressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB2 12 /* Subsequent compressed BLOB page */
/* Space types */
#define FIL_TABLESPACE 501
#define FIL_LOG 502
extern ulint fil_n_log_flushes;
extern ulint fil_n_pending_log_flushes;
extern ulint fil_n_pending_tablespace_flushes;
/***********************************************************************
Returns the version number of a tablespace, -1 if not found. */
UNIV_INTERN
ib_int64_t
fil_space_get_version(
/*==================*/
/* out: version number, -1 if the tablespace does not
exist in the memory cache */
ulint id); /* in: space id */
/***********************************************************************
Returns the latch of a file space. */
UNIV_INTERN
rw_lock_t*
fil_space_get_latch(
/*================*/
/* out: latch protecting storage allocation */
ulint id, /* in: space id */
ulint* zip_size);/* out: compressed page size, or
0 for uncompressed tablespaces */
/***********************************************************************
Returns the type of a file space. */
UNIV_INTERN
ulint
fil_space_get_type(
/*===============*/
/* out: FIL_TABLESPACE or FIL_LOG */
ulint id); /* in: space id */
/***********************************************************************
Returns the ibuf data of a file space. */
UNIV_INTERN
ibuf_data_t*
fil_space_get_ibuf_data(
/*====================*/
/* out: ibuf data for this space */
ulint id); /* in: space id */
/***********************************************************************
Appends a new file to the chain of files of a space. File must be closed. */
UNIV_INTERN
void
fil_node_create(
/*============*/
const char* name, /* in: file name (file must be closed) */
ulint size, /* in: file size in database blocks, rounded
downwards to an integer */
ulint id, /* in: space id where to append */
ibool is_raw);/* in: TRUE if a raw device or
a raw disk partition */
#ifdef UNIV_LOG_ARCHIVE
/********************************************************************
Drops files from the start of a file space, so that its size is cut by
the amount given. */
UNIV_INTERN
void
fil_space_truncate_start(
/*=====================*/
ulint id, /* in: space id */
ulint trunc_len); /* in: truncate by this much; it is an error
if this does not equal to the combined size of
some initial files in the space */
#endif /* UNIV_LOG_ARCHIVE */
/***********************************************************************
Creates a space memory object and puts it to the 'fil system' hash table. If
there is an error, prints an error message to the .err log. */
UNIV_INTERN
ibool
fil_space_create(
/*=============*/
/* out: TRUE if success */
const char* name, /* in: space name */
ulint id, /* in: space id */
ulint zip_size,/* in: compressed page size, or
0 for uncompressed tablespaces */
ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
/***********************************************************************
Frees a space object from a the tablespace memory cache. Closes the files in
the chain but does not delete them. */
UNIV_INTERN
ibool
fil_space_free(
/*===========*/
/* out: TRUE if success */
ulint id); /* in: space id */
/***********************************************************************
Returns the size of the space in pages. The tablespace must be cached in the
memory cache. */
UNIV_INTERN
ulint
fil_space_get_size(
/*===============*/
/* out: space size, 0 if space not found */
ulint id); /* in: space id */
/***********************************************************************
Returns the flags of the space. The tablespace must be cached
in the memory cache. */
UNIV_INTERN
ulint
fil_space_get_flags(
/*================*/
/* out: flags, ULINT_UNDEFINED if space not found */
ulint id); /* in: space id */
/***********************************************************************
Returns the compressed page size of the space, or 0 if the space
is not compressed. The tablespace must be cached in the memory cache. */
UNIV_INTERN
ulint
fil_space_get_zip_size(
/*===================*/
/* out: compressed page size, ULINT_UNDEFINED
if space not found */
ulint id); /* in: space id */
/***********************************************************************
Checks if the pair space, page_no refers to an existing page in a tablespace
file space. The tablespace must be cached in the memory cache. */
UNIV_INTERN
ibool
fil_check_adress_in_tablespace(
/*===========================*/
/* out: TRUE if the address is meaningful */
ulint id, /* in: space id */
ulint page_no);/* in: page number */
/********************************************************************
Initializes the tablespace memory cache. */
UNIV_INTERN
void
fil_init(
/*=====*/
ulint max_n_open); /* in: max number of open files */
/***********************************************************************
Opens all log files and system tablespace data files. They stay open until the
database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
UNIV_INTERN
void
fil_open_log_and_system_tablespace_files(void);
/*==========================================*/
/***********************************************************************
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
UNIV_INTERN
void
fil_close_all_files(void);
/*=====================*/
/***********************************************************************
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
UNIV_INTERN
void
fil_set_max_space_id_if_bigger(
/*===========================*/
ulint max_id);/* in: maximum known id */
/********************************************************************
Initializes the ibuf data structure for space 0 == the system tablespace.
This can be called after the file space headers have been created and the
dictionary system has been initialized. */
UNIV_INTERN
void
fil_ibuf_init_at_db_start(void);
/*===========================*/
/********************************************************************
Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace. */
UNIV_INTERN
ulint
fil_write_flushed_lsn_to_data_files(
/*================================*/
/* out: DB_SUCCESS or error number */
ib_uint64_t lsn, /* in: lsn to write */
ulint arch_log_no); /* in: latest archived log
file number */
/***********************************************************************
Reads the flushed lsn and arch no fields from a data file at database
startup. */
UNIV_INTERN
void
fil_read_flushed_lsn_and_arch_log_no(
/*=================================*/
os_file_t data_file, /* in: open data file */
ibool one_read_already, /* in: TRUE if min and max
parameters below already
contain sensible data */
#ifdef UNIV_LOG_ARCHIVE
ulint* min_arch_log_no, /* in/out: */
ulint* max_arch_log_no, /* in/out: */
#endif /* UNIV_LOG_ARCHIVE */
ib_uint64_t* min_flushed_lsn, /* in/out: */
ib_uint64_t* max_flushed_lsn); /* in/out: */
/***********************************************************************
Increments the count of pending insert buffer page merges, if space is not
being deleted. */
UNIV_INTERN
ibool
fil_inc_pending_ibuf_merges(
/*========================*/
/* out: TRUE if being deleted, and ibuf merges should
be skipped */
ulint id); /* in: space id */
/***********************************************************************
Decrements the count of pending insert buffer page merges. */
UNIV_INTERN
void
fil_decr_pending_ibuf_merges(
/*=========================*/
ulint id); /* in: space id */
/***********************************************************************
Parses the body of a log record written about an .ibd file operation. That is,
the log record part after the standard (type, space id, page no) header of the
log record.
If desired, also replays the delete or rename operation if the .ibd file
exists and the space id in it matches. Replays the create operation if a file
at that path does not exist yet. If the database directory for the file to be
created does not exist, then we create the directory, too.
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
datadir that we should use in replaying the file operations. */
UNIV_INTERN
byte*
fil_op_log_parse_or_replay(
/*=======================*/
/* out: end of log record, or NULL if the
record was not completely contained between
ptr and end_ptr */
byte* ptr, /* in: buffer containing the log record body,
or an initial segment of it, if the record does
not fir completely between ptr and end_ptr */
byte* end_ptr, /* in: buffer end */
ulint type, /* in: the type of this log record */
ulint space_id); /* in: the space id of the tablespace in
question, or 0 if the log record should
only be parsed but not replayed */
/***********************************************************************
Deletes a single-table tablespace. The tablespace must be cached in the
memory cache. */
UNIV_INTERN
ibool
fil_delete_tablespace(
/*==================*/
/* out: TRUE if success */
ulint id); /* in: space id */
/***********************************************************************
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
1) we do not drop the table from the data dictionary;
2) we remove all insert buffer entries for the tablespace immediately; in DROP
TABLE they are only removed gradually in the background;
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
as it originally had. */
UNIV_INTERN
ibool
fil_discard_tablespace(
/*===================*/
/* out: TRUE if success */
ulint id); /* in: space id */
/***********************************************************************
Renames a single-table tablespace. The tablespace must be cached in the
tablespace memory cache. */
UNIV_INTERN
ibool
fil_rename_tablespace(
/*==================*/
/* out: TRUE if success */
const char* old_name, /* in: old table name in the standard
databasename/tablename format of
InnoDB, or NULL if we do the rename
based on the space id only */
ulint id, /* in: space id */
const char* new_name); /* in: new table name in the standard
databasename/tablename format
of InnoDB */
/***********************************************************************
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
dir of the mysqld server. */
UNIV_INTERN
ulint
fil_create_new_single_table_tablespace(
/*===================================*/
/* out: DB_SUCCESS or error code */
ulint* space_id, /* in/out: space id; if this is != 0,
then this is an input parameter,
otherwise output */
const char* tablename, /* in: the table name in the usual
databasename/tablename format
of InnoDB, or a dir path to a temp
table */
ibool is_temp, /* in: TRUE if a table created with
CREATE TEMPORARY TABLE */
ulint flags, /* in: tablespace flags */
ulint size); /* in: the initial size of the
tablespace file in pages,
must be >= FIL_IBD_FILE_INITIAL_SIZE */
/************************************************************************
Tries to open a single-table tablespace and optionally checks the space id is
right in it. If does not succeed, prints an error message to the .err log. This
function is used to open a tablespace when we start up mysqld, and also in
IMPORT TABLESPACE.
NOTE that we assume this operation is used either at the database startup
or under the protection of the dictionary mutex, so that two users cannot
race here. This operation does not leave the file associated with the
tablespace open, but closes it after we have looked at the space id in it. */
UNIV_INTERN
ibool
fil_open_single_table_tablespace(
/*=============================*/
/* out: TRUE if success */
ibool check_space_id, /* in: should we check that the space
id in the file is right; we assume
that this function runs much faster
if no check is made, since accessing
the file inode probably is much
faster (the OS caches them) than
accessing the first page of the file */
ulint id, /* in: space id */
ulint flags, /* in: tablespace flags */
const char* name); /* in: table name in the
databasename/tablename format */
/************************************************************************
It is possible, though very improbable, that the lsn's in the tablespace to be
imported have risen above the current system lsn, if a lengthy purge, ibuf
merge, or rollback was performed on a backup taken with ibbackup. If that is
the case, reset page lsn's in the file. We assume that mysqld was shut down
after it performed these cleanup operations on the .ibd file, so that it at
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
first page of the .ibd file, and we can determine whether we need to reset the
lsn's just by looking at that flush lsn. */
UNIV_INTERN
ibool
fil_reset_too_high_lsns(
/*====================*/
/* out: TRUE if success */
const char* name, /* in: table name in the
databasename/tablename format */
ib_uint64_t current_lsn); /* in: reset lsn's if the lsn stamped
to FIL_PAGE_FILE_FLUSH_LSN in the
first page is too high */
/************************************************************************
At the server startup, if we need crash recovery, scans the database
directories under the MySQL datadir, looking for .ibd files. Those files are
single-table tablespaces. We need to know the space id in each of them so that
we know into which file we should look to check the contents of a page stored
in the doublewrite buffer, also to know where to apply log records where the
space id is != 0. */
UNIV_INTERN
ulint
fil_load_single_table_tablespaces(void);
/*===================================*/
/* out: DB_SUCCESS or error number */
/************************************************************************
If we need crash recovery, and we have called
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
we can call this function to print an error message of orphaned .ibd files
for which there is not a data dictionary entry with a matching table name
and space id. */
UNIV_INTERN
void
fil_print_orphaned_tablespaces(void);
/*================================*/
/***********************************************************************
Returns TRUE if a single-table tablespace does not exist in the memory cache,
or is being deleted there. */
UNIV_INTERN
ibool
fil_tablespace_deleted_or_being_deleted_in_mem(
/*===========================================*/
/* out: TRUE if does not exist or is being\
deleted */
ulint id, /* in: space id */
ib_int64_t version);/* in: tablespace_version should be this; if
you pass -1 as the value of this, then this
parameter is ignored */
/***********************************************************************
Returns TRUE if a single-table tablespace exists in the memory cache. */
UNIV_INTERN
ibool
fil_tablespace_exists_in_mem(
/*=========================*/
/* out: TRUE if exists */
ulint id); /* in: space id */
/***********************************************************************
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
cache. Note that if we have not done a crash recovery at the database startup,
there may be many tablespaces which are not yet in the memory cache. */
UNIV_INTERN
ibool
fil_space_for_table_exists_in_mem(
/*==============================*/
/* out: TRUE if a matching tablespace
exists in the memory cache */
ulint id, /* in: space id */
const char* name, /* in: table name in the standard
'databasename/tablename' format or
the dir path to a temp table */
ibool is_temp, /* in: TRUE if created with CREATE
TEMPORARY TABLE */
ibool mark_space, /* in: in crash recovery, at database
startup we mark all spaces which have
an associated table in the InnoDB
data dictionary, so that
we can print a warning about orphaned
tablespaces */
ibool print_error_if_does_not_exist);
/* in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
/**************************************************************************
Tries to extend a data file so that it would accommodate the number of pages
given. The tablespace must be cached in the memory cache. If the space is big
enough already, does nothing. */
UNIV_INTERN
ibool
fil_extend_space_to_desired_size(
/*=============================*/
/* out: TRUE if success */
ulint* actual_size, /* out: size of the space after extension;
if we ran out of disk space this may be lower
than the desired size */
ulint space_id, /* in: space id */
ulint size_after_extend);/* in: desired size in pages after the
extension; if the current space size is bigger
than this already, the function does nothing */
#ifdef UNIV_HOTBACKUP
/************************************************************************
Extends all tablespaces to the size stored in the space header. During the
ibbackup --apply-log phase we extended the spaces on-demand so that log records
could be appllied, but that may have left spaces still too small compared to
the size stored in the space header. */
UNIV_INTERN
void
fil_extend_tablespaces_to_stored_len(void);
/*======================================*/
#endif
/***********************************************************************
Tries to reserve free extents in a file space. */
UNIV_INTERN
ibool
fil_space_reserve_free_extents(
/*===========================*/
/* out: TRUE if succeed */
ulint id, /* in: space id */
ulint n_free_now, /* in: number of free extents now */
ulint n_to_reserve); /* in: how many one wants to reserve */
/***********************************************************************
Releases free extents in a file space. */
UNIV_INTERN
void
fil_space_release_free_extents(
/*===========================*/
ulint id, /* in: space id */
ulint n_reserved); /* in: how many one reserved */
/***********************************************************************
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
UNIV_INTERN
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
ulint id); /* in: space id */
/************************************************************************
Reads or writes data. This operation is asynchronous (aio). */
UNIV_INTERN
ulint
fil_io(
/*===*/
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
if we are trying to do i/o on a tablespace
which does not exist */
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
and ORed to OS_AIO_SIMULATED_WAKE_LATER
if simulated aio and we want to post a
batch of i/os; NOTE that a simulated batch
may introduce hidden chances of deadlocks,
because i/os are not actually handled until
all have been posted: use with great
caution! */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint space_id, /* in: space id */
ulint zip_size, /* in: compressed page size in bytes;
0 for uncompressed pages */
ulint block_offset, /* in: offset in number of blocks */
ulint byte_offset, /* in: remainder of offset in bytes; in
aio this must be divisible by the OS block
size */
ulint len, /* in: how many bytes to read or write; this
must not cross a file boundary; in aio this
must be a block size multiple */
void* buf, /* in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
void* message); /* in: message for aio handler if non-sync
aio used, else ignored */
/**************************************************************************
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
into segments (see os0file.c for more info). The thread specifies which
segment it wants to wait for. */
UNIV_INTERN
void
fil_aio_wait(
/*=========*/
ulint segment); /* in: the number of the segment in the aio
array to wait for */
/**************************************************************************
Flushes to disk possible writes cached by the OS. If the space does not exist
or is being dropped, does not do anything. */
UNIV_INTERN
void
fil_flush(
/*======*/
ulint space_id); /* in: file space id (this can be a group of
log files or a tablespace of the database) */
/**************************************************************************
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */
UNIV_INTERN
void
fil_flush_file_spaces(
/*==================*/
ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
/**********************************************************************
Checks the consistency of the tablespace cache. */
UNIV_INTERN
ibool
fil_validate(void);
/*==============*/
/* out: TRUE if ok */
/************************************************************************
Returns TRUE if file address is undefined. */
UNIV_INTERN
ibool
fil_addr_is_null(
/*=============*/
/* out: TRUE if undefined */
fil_addr_t addr); /* in: address */
/************************************************************************
Accessor functions for a file page */
UNIV_INTERN
ulint
fil_page_get_prev(const byte* page);
ulint
fil_page_get_next(const byte* page);
/*************************************************************************
Sets the file page type. */
UNIV_INTERN
void
fil_page_set_type(
/*==============*/
byte* page, /* in: file page */
ulint type); /* in: type */
/*************************************************************************
Gets the file page type. */
UNIV_INTERN
ulint
fil_page_get_type(
/*==============*/
/* out: type; NOTE that if the type
has not been written to page, the
return value not defined */
const byte* page); /* in: file page */
typedef struct fil_space_struct fil_space_t;
#endif

417
include/fsp0fsp.h Normal file
View File

@@ -0,0 +1,417 @@
/******************************************************
File space management
(c) 1995 Innobase Oy
Created 12/18/1995 Heikki Tuuri
*******************************************************/
#ifndef fsp0fsp_h
#define fsp0fsp_h
#include "univ.i"
#include "mtr0mtr.h"
#include "fut0lst.h"
#include "ut0byte.h"
#include "page0types.h"
/* If records are inserted in order, there are the following
flags to tell this (their type is made byte for the compiler
to warn if direction and hint parameters are switched in
fseg_alloc_free_page): */
#define FSP_UP ((byte)111) /* alphabetically upwards */
#define FSP_DOWN ((byte)112) /* alphabetically downwards */
#define FSP_NO_DIR ((byte)113) /* no order */
/* File space extent size (one megabyte) in pages */
#define FSP_EXTENT_SIZE (1 << (20 - UNIV_PAGE_SIZE_SHIFT))
/* On a page of any file segment, data may be put starting from this offset: */
#define FSEG_PAGE_DATA FIL_PAGE_DATA
/* File segment header which points to the inode describing the file segment */
typedef byte fseg_header_t;
#define FSEG_HDR_SPACE 0 /* space id of the inode */
#define FSEG_HDR_PAGE_NO 4 /* page number of the inode */
#define FSEG_HDR_OFFSET 8 /* byte offset of the inode */
#define FSEG_HEADER_SIZE 10
/**************************************************************************
Initializes the file space system. */
UNIV_INTERN
void
fsp_init(void);
/*==========*/
/**************************************************************************
Gets the current free limit of the system tablespace. The free limit
means the place of the first page which has never been put to the the
free list for allocation. The space above that address is initialized
to zero. Sets also the global variable log_fsp_current_free_limit. */
UNIV_INTERN
ulint
fsp_header_get_free_limit(void);
/*===========================*/
/* out: free limit in megabytes */
/**************************************************************************
Gets the size of the system tablespace from the tablespace header. If
we do not have an auto-extending data file, this should be equal to
the size of the data files. If there is an auto-extending data file,
this can be smaller. */
UNIV_INTERN
ulint
fsp_header_get_tablespace_size(void);
/*================================*/
/* out: size in pages */
/**************************************************************************
Reads the file space size stored in the header page. */
UNIV_INTERN
ulint
fsp_get_size_low(
/*=============*/
/* out: tablespace size stored in the space header */
page_t* page); /* in: header page (page 0 in the tablespace) */
/**************************************************************************
Reads the space id from the first page of a tablespace. */
UNIV_INTERN
ulint
fsp_header_get_space_id(
/*====================*/
/* out: space id, ULINT UNDEFINED if error */
const page_t* page); /* in: first page of a tablespace */
/**************************************************************************
Reads the space flags from the first page of a tablespace. */
UNIV_INTERN
ulint
fsp_header_get_flags(
/*=================*/
/* out: flags */
const page_t* page); /* in: first page of a tablespace */
/**************************************************************************
Reads the compressed page size from the first page of a tablespace. */
UNIV_INTERN
ulint
fsp_header_get_zip_size(
/*====================*/
/* out: compressed page size in bytes,
or 0 if uncompressed */
const page_t* page); /* in: first page of a tablespace */
/**************************************************************************
Writes the space id and compressed page size to a tablespace header.
This function is used past the buffer pool when we in fil0fil.c create
a new single-table tablespace. */
UNIV_INTERN
void
fsp_header_init_fields(
/*===================*/
page_t* page, /* in/out: first page in the space */
ulint space_id, /* in: space id */
ulint flags); /* in: tablespace flags (FSP_SPACE_FLAGS):
0, or table->flags if newer than COMPACT */
/**************************************************************************
Initializes the space header of a new created space and creates also the
insert buffer tree root if space == 0. */
UNIV_INTERN
void
fsp_header_init(
/*============*/
ulint space, /* in: space id */
ulint size, /* in: current size in blocks */
mtr_t* mtr); /* in: mini-transaction handle */
/**************************************************************************
Increases the space size field of a space. */
UNIV_INTERN
void
fsp_header_inc_size(
/*================*/
ulint space, /* in: space id */
ulint size_inc,/* in: size increment in pages */
mtr_t* mtr); /* in: mini-transaction handle */
/**************************************************************************
Creates a new segment. */
UNIV_INTERN
buf_block_t*
fseg_create(
/*========*/
/* out: the block where the segment header is placed,
x-latched, NULL if could not create segment
because of lack of space */
ulint space, /* in: space id */
ulint page, /* in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
ulint byte_offset, /* in: byte offset of the created segment header
on the page */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Creates a new segment. */
UNIV_INTERN
buf_block_t*
fseg_create_general(
/*================*/
/* out: the block where the segment header is placed,
x-latched, NULL if could not create segment
because of lack of space */
ulint space, /* in: space id */
ulint page, /* in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
ulint byte_offset, /* in: byte offset of the created segment header
on the page */
ibool has_done_reservation, /* in: TRUE if the caller has already
done the reservation for the pages with
fsp_reserve_free_extents (at least 2 extents: one for
the inode and the other for the segment) then there is
no need to do the check for this individual
operation */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Calculates the number of pages reserved by a segment, and how many pages are
currently used. */
UNIV_INTERN
ulint
fseg_n_reserved_pages(
/*==================*/
/* out: number of reserved pages */
fseg_header_t* header, /* in: segment header */
ulint* used, /* out: number of pages used (<= reserved) */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
file space fragmentation. */
UNIV_INTERN
ulint
fseg_alloc_free_page(
/*=================*/
/* out: the allocated page offset
FIL_NULL if no page could be allocated */
fseg_header_t* seg_header, /* in: segment header */
ulint hint, /* in: hint of which page would be desirable */
byte direction, /* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */
UNIV_INTERN
ulint
fseg_alloc_free_page_general(
/*=========================*/
/* out: allocated page offset, FIL_NULL if no
page could be allocated */
fseg_header_t* seg_header,/* in: segment header */
ulint hint, /* in: hint of which page would be desirable */
byte direction,/* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
ibool has_done_reservation, /* in: TRUE if the caller has
already done the reservation for the page
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
to do their operation, like a B-tree page split, fully. Reservations
must be released with function fil_space_release_free_extents!
The alloc_type below has the following meaning: FSP_NORMAL means an
operation which will probably result in more space usage, like an
insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
deleting rows, then this allocation will in the long run result in
less space usage (after a purge); FSP_CLEANING means allocation done
in a physical record delete (like in a purge) or other cleaning operation
which will result in less space usage in the long run. We prefer the latter
two types of allocation: when space is scarce, FSP_NORMAL allocations
will not succeed, but the latter two allocations will succeed, if possible.
The purpose is to avoid dead end where the database is full but the
user cannot free any space because these freeing operations temporarily
reserve some space.
Single-table tablespaces whose size is < 32 pages are a special case. In this
function we would liberally reserve several 64 page extents for every page
split or merge in a B-tree. But we do not want to waste disk space if the table
only occupies < 32 pages. That is why we apply different rules in that special
case, just ensuring that there are 3 free pages available. */
UNIV_INTERN
ibool
fsp_reserve_free_extents(
/*=====================*/
/* out: TRUE if we were able to make the reservation */
ulint* n_reserved,/* out: number of extents actually reserved; if we
return TRUE and the tablespace size is < 64 pages,
then this can be 0, otherwise it is n_ext */
ulint space, /* in: space id */
ulint n_ext, /* in: number of extents to reserve */
ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
This function should be used to get information on how much we still
will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents. */
UNIV_INTERN
ullint
fsp_get_available_space_in_free_extents(
/*====================================*/
/* out: available space in kB */
ulint space); /* in: space id */
/**************************************************************************
Frees a single page of a segment. */
UNIV_INTERN
void
fseg_free_page(
/*===========*/
fseg_header_t* seg_header, /* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr); /* in: mtr handle */
/***********************************************************************
Frees a segment. The freeing is performed in several mini-transactions,
so that there is no danger of bufferfixing too many buffer pages. */
UNIV_INTERN
void
fseg_free(
/*======*/
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no,/* in: page number where the segment header is
placed */
ulint offset);/* in: byte offset of the segment header on that
page */
/**************************************************************************
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in
too big a mini-transaction. */
UNIV_INTERN
ibool
fseg_free_step(
/*===========*/
/* out: TRUE if freeing completed */
fseg_header_t* header, /* in, own: segment header; NOTE: if the header
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Frees part of a segment. Differs from fseg_free_step because this function
leaves the header page unfreed. */
UNIV_INTERN
ibool
fseg_free_step_not_header(
/*======================*/
/* out: TRUE if freeing completed, except the
header page */
fseg_header_t* header, /* in: segment header which must reside on
the first fragment page of the segment */
mtr_t* mtr); /* in: mtr */
/***************************************************************************
Checks if a page address is an extent descriptor page address. */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
/* out: TRUE if a descriptor page */
ulint zip_size,/* in: compressed page size in bytes;
0 for uncompressed pages */
ulint page_no);/* in: page number */
/***************************************************************
Parses a redo log record of a file page init. */
UNIV_INTERN
byte*
fsp_parse_init_file_page(
/*=====================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr, /* in: buffer end */
buf_block_t* block); /* in: block or NULL */
/***********************************************************************
Validates the file space system and its segments. */
UNIV_INTERN
ibool
fsp_validate(
/*=========*/
/* out: TRUE if ok */
ulint space); /* in: space id */
/***********************************************************************
Prints info of a file space. */
UNIV_INTERN
void
fsp_print(
/*======*/
ulint space); /* in: space id */
/***********************************************************************
Validates a segment. */
UNIV_INTERN
ibool
fseg_validate(
/*==========*/
/* out: TRUE if ok */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr2); /* in: mtr */
#ifdef UNIV_BTR_PRINT
/***********************************************************************
Writes info of a segment. */
UNIV_INTERN
void
fseg_print(
/*=======*/
fseg_header_t* header, /* in: segment header */
mtr_t* mtr); /* in: mtr */
#endif /* UNIV_BTR_PRINT */
/* Flags for fsp_reserve_free_extents */
#define FSP_NORMAL 1000000
#define FSP_UNDO 2000000
#define FSP_CLEANING 3000000
/* Number of pages described in a single descriptor page: currently each page
description takes less than 1 byte; a descriptor page is repeated every
this many file pages */
/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */
/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
/* The space low address page map */
/*--------------------------------------*/
/* The following two pages are repeated
every XDES_DESCRIBED_PER_PAGE pages in
every tablespace. */
#define FSP_XDES_OFFSET 0 /* extent descriptor */
#define FSP_IBUF_BITMAP_OFFSET 1 /* insert buffer bitmap */
/* The ibuf bitmap pages are the ones whose
page number is the number above plus a
multiple of XDES_DESCRIBED_PER_PAGE */
#define FSP_FIRST_INODE_PAGE_NO 2 /* in every tablespace */
/* The following pages exist
in the system tablespace (space 0). */
#define FSP_IBUF_HEADER_PAGE_NO 3 /* in tablespace 0 */
#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /* in tablespace 0 */
/* The ibuf tree root page number in
tablespace 0; its fseg inode is on the page
number FSP_FIRST_INODE_PAGE_NO */
#define FSP_TRX_SYS_PAGE_NO 5 /* in tablespace 0 */
#define FSP_FIRST_RSEG_PAGE_NO 6 /* in tablespace 0 */
#define FSP_DICT_HDR_PAGE_NO 7 /* in tablespace 0 */
/*--------------------------------------*/
#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
#endif
#endif

28
include/fsp0fsp.ic Normal file
View File

@@ -0,0 +1,28 @@
/******************************************************
File space management
(c) 1995 Innobase Oy
Created 12/18/1995 Heikki Tuuri
*******************************************************/
/***************************************************************************
Checks if a page address is an extent descriptor page address. */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
/* out: TRUE if a descriptor page */
ulint zip_size,/* in: compressed page size in bytes;
0 for uncompressed pages */
ulint page_no)/* in: page number */
{
ut_ad(ut_is_2pow(zip_size));
if (!zip_size) {
return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
== FSP_XDES_OFFSET));
}
return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET));
}

38
include/fut0fut.h Normal file
View File

@@ -0,0 +1,38 @@
/**********************************************************************
File-based utilities
(c) 1995 Innobase Oy
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#ifndef fut0fut_h
#define fut0fut_h
#include "univ.i"
#include "fil0fil.h"
#include "mtr0mtr.h"
/************************************************************************
Gets a pointer to a file address and latches the page. */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
/* out: pointer to a byte in a frame; the file
page in the frame is bufferfixed and latched */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes
or 0 for uncompressed pages */
fil_addr_t addr, /* in: file address */
ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
mtr_t* mtr); /* in: mtr handle */
#ifndef UNIV_NONINL
#include "fut0fut.ic"
#endif
#endif

41
include/fut0fut.ic Normal file
View File

@@ -0,0 +1,41 @@
/**********************************************************************
File-based utilities
(c) 1995 Innobase Oy
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#include "sync0rw.h"
#include "buf0buf.h"
/************************************************************************
Gets a pointer to a file address and latches the page. */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
/* out: pointer to a byte in a frame; the file
page in the frame is bufferfixed and latched */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes
or 0 for uncompressed pages */
fil_addr_t addr, /* in: file address */
ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
mtr_t* mtr) /* in: mtr handle */
{
buf_block_t* block;
byte* ptr;
ut_ad(addr.boffset < UNIV_PAGE_SIZE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
ptr = buf_block_get_frame(block) + addr.boffset;
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
return(ptr);
}

198
include/fut0lst.h Normal file
View File

@@ -0,0 +1,198 @@
/**********************************************************************
File-based list utilities
(c) 1995 Innobase Oy
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#ifndef fut0lst_h
#define fut0lst_h
#include "univ.i"
#include "fil0fil.h"
#include "mtr0mtr.h"
/* The C 'types' of base node and list node: these should be used to
write self-documenting code. Of course, the sizeof macro cannot be
applied to these types! */
typedef byte flst_base_node_t;
typedef byte flst_node_t;
/* The physical size of a list base node in bytes */
#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
/* The physical size of a list node in bytes */
#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
/************************************************************************
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Adds a node as the last node in a list. */
UNIV_INTERN
void
flst_add_last(
/*==========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Adds a node as the first node in a list. */
UNIV_INTERN
void
flst_add_first(
/*===========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Inserts a node after another in a list. */
UNIV_INTERN
void
flst_insert_after(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node1, /* in: node to insert after */
flst_node_t* node2, /* in: node to add */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Inserts a node before another in a list. */
UNIV_INTERN
void
flst_insert_before(
/*===============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to insert */
flst_node_t* node3, /* in: node to insert before */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Removes a node. */
UNIV_INTERN
void
flst_remove(
/*========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to remove */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_cut_end(
/*=========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node to remove */
ulint n_nodes,/* in: number of nodes to remove,
must be >= 1 */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_truncate_end(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node not to remove */
ulint n_nodes,/* in: number of nodes to remove */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list length. */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
/* out: length */
const flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list first node address. */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
/* out: file address */
const flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list last node address. */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
/* out: file address */
const flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list next node address. */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
/* out: file address */
const flst_node_t* node, /* in: pointer to node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list prev node address. */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
/* out: file address */
const flst_node_t* node, /* in: pointer to node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
fil_faddr_t* faddr, /* in: pointer to file faddress */
fil_addr_t addr, /* in: file address */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Reads a file address. */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
/* out: file address */
const fil_faddr_t* faddr, /* in: pointer to file faddress */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Validates a file-based list. */
UNIV_INTERN
ibool
flst_validate(
/*==========*/
/* out: TRUE if ok */
const flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr1); /* in: mtr */
/************************************************************************
Prints info of a file-based list. */
UNIV_INTERN
void
flst_print(
/*=======*/
const flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr); /* in: mtr */
#ifndef UNIV_NONINL
#include "fut0lst.ic"
#endif
#endif

146
include/fut0lst.ic Normal file
View File

@@ -0,0 +1,146 @@
/**********************************************************************
File-based list utilities
(c) 1995 Innobase Oy
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "fut0fut.h"
#include "mtr0log.h"
#include "buf0buf.h"
/* We define the field offsets of a node for the list */
#define FLST_PREV 0 /* 6-byte address of the previous list element;
the page part of address is FIL_NULL, if no
previous element */
#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next
list element; the page part of address
is FIL_NULL, if no next element */
/* We define the field offsets of a base node for the list */
#define FLST_LEN 0 /* 32-bit list length field */
#define FLST_FIRST 4 /* 6-byte address of the first element
of the list; undefined if empty list */
#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the
last element of the list; undefined
if empty list */
/************************************************************************
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
fil_faddr_t* faddr, /* in: pointer to file faddress */
fil_addr_t addr, /* in: file address */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(faddr && mtr);
ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
MLOG_2BYTES, mtr);
}
/************************************************************************
Reads a file address. */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
/* out: file address */
const fil_faddr_t* faddr, /* in: pointer to file faddress */
mtr_t* mtr) /* in: mini-transaction handle */
{
fil_addr_t addr;
ut_ad(faddr && mtr);
addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
mtr);
return(addr);
}
/************************************************************************
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
}
/************************************************************************
Gets list length. */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
/* out: length */
const flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
}
/************************************************************************
Gets list first node address. */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
/* out: file address */
const flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_FIRST, mtr));
}
/************************************************************************
Gets list last node address. */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
/* out: file address */
const flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_LAST, mtr));
}
/************************************************************************
Gets list next node address. */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
/* out: file address */
const flst_node_t* node, /* in: pointer to node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_NEXT, mtr));
}
/************************************************************************
Gets list prev node address. */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
/* out: file address */
const flst_node_t* node, /* in: pointer to node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_PREV, mtr));
}

172
include/ha0ha.h Normal file
View File

@@ -0,0 +1,172 @@
/******************************************************
The hash table with external chains
(c) 1994-1997 Innobase Oy
Created 8/18/1994 Heikki Tuuri
*******************************************************/
#ifndef ha0ha_h
#define ha0ha_h
#include "univ.i"
#include "hash0hash.h"
#include "page0types.h"
#include "buf0types.h"
/*****************************************************************
Looks for an element in a hash table. */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
/* out: pointer to the data of the first hash
table node in chain having the fold number,
NULL if not found */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: folded value of the searched data */
/*************************************************************
Looks for an element when we know the pointer to the data and updates
the pointer to data if found. */
UNIV_INTERN
void
ha_search_and_update_if_found_func(
/*===============================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data, /* in: pointer to the data */
#ifdef UNIV_DEBUG
buf_block_t* new_block,/* in: block containing new_data */
#endif
void* new_data);/* in: new pointer to the data */
#ifdef UNIV_DEBUG
# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
#else
# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
ha_search_and_update_if_found_func(table,fold,data,new_data)
#endif
/*****************************************************************
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n. */
UNIV_INTERN
hash_table_t*
ha_create_func(
/*===========*/
/* out, own: created table */
ulint n, /* in: number of array cells */
#ifdef UNIV_SYNC_DEBUG
ulint mutex_level, /* in: level of the mutexes in the latching
order: this is used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
ulint n_mutexes); /* in: number of mutexes to protect the
hash table: must be a power of 2 */
#ifdef UNIV_SYNC_DEBUG
# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m)
#else /* UNIV_SYNC_DEBUG */
# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m)
#endif /* UNIV_SYNC_DEBUG */
/*****************************************************************
Empties a hash table and frees the memory heaps. */
UNIV_INTERN
void
ha_clear(
/*=====*/
hash_table_t* table); /* in, own: hash table */
/*****************************************************************
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted. */
UNIV_INTERN
ibool
ha_insert_for_fold_func(
/*====================*/
/* out: TRUE if succeed, FALSE if no more
memory could be allocated */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
#ifdef UNIV_DEBUG
buf_block_t* block, /* in: buffer block containing the data */
#endif /* UNIV_DEBUG */
void* data); /* in: data, must not be NULL */
#ifdef UNIV_DEBUG
# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d)
#else
# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d)
#endif
/*****************************************************************
Deletes an entry from a hash table. */
UNIV_INTERN
void
ha_delete(
/*======*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data */
void* data); /* in: data, must not be NULL and must exist
in the hash table */
/*************************************************************
Looks for an element when we know the pointer to the data and deletes
it from the hash table if found. */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
/* out: TRUE if found */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data); /* in: pointer to the data */
/*********************************************************************
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: fold value */
const page_t* page); /* in: buffer page */
/*****************************************************************
Validates a given range of the cells in hash table. */
UNIV_INTERN
ibool
ha_validate(
/*========*/
/* out: TRUE if ok */
hash_table_t* table, /* in: hash table */
ulint start_index, /* in: start index */
ulint end_index); /* in: end index */
/*****************************************************************
Prints info of a hash table. */
UNIV_INTERN
void
ha_print_info(
/*==========*/
FILE* file, /* in: file where to print */
hash_table_t* table); /* in: hash table */
/* The hash table external chain node */
typedef struct ha_node_struct ha_node_t;
struct ha_node_struct {
ha_node_t* next; /* next chain node or NULL if none */
#ifdef UNIV_DEBUG
buf_block_t* block; /* buffer block containing the data, or NULL */
#endif /* UNIV_DEBUG */
void* data; /* pointer to the data */
ulint fold; /* fold value for the data */
};
#ifndef UNIV_NONINL
#include "ha0ha.ic"
#endif
#endif

198
include/ha0ha.ic Normal file
View File

@@ -0,0 +1,198 @@
/************************************************************************
The hash table with external chains
(c) 1994-1997 Innobase Oy
Created 8/18/1994 Heikki Tuuri
*************************************************************************/
#include "ut0rnd.h"
#include "mem0mem.h"
/***************************************************************
Deletes a hash node. */
UNIV_INTERN
void
ha_delete_hash_node(
/*================*/
hash_table_t* table, /* in: hash table */
ha_node_t* del_node); /* in: node to be deleted */
/**********************************************************************
Gets a hash node data. */
UNIV_INLINE
void*
ha_node_get_data(
/*=============*/
/* out: pointer to the data */
ha_node_t* node) /* in: hash chain node */
{
return(node->data);
}
/**********************************************************************
Sets hash node data. */
UNIV_INLINE
void
ha_node_set_data_func(
/*==================*/
ha_node_t* node, /* in: hash chain node */
#ifdef UNIV_DEBUG
buf_block_t* block, /* in: buffer block containing the data */
#endif /* UNIV_DEBUG */
void* data) /* in: pointer to the data */
{
#ifdef UNIV_DEBUG
node->block = block;
#endif /* UNIV_DEBUG */
node->data = data;
}
#ifdef UNIV_DEBUG
# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
#else /* UNIV_DEBUG */
# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
#endif /* UNIV_DEBUG */
/**********************************************************************
Gets the next node in a hash chain. */
UNIV_INLINE
ha_node_t*
ha_chain_get_next(
/*==============*/
/* out: next node, NULL if none */
ha_node_t* node) /* in: hash chain node */
{
return(node->next);
}
/**********************************************************************
Gets the first node in a hash chain. */
UNIV_INLINE
ha_node_t*
ha_chain_get_first(
/*===============*/
/* out: first node, NULL if none */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold value determining the chain */
{
return((ha_node_t*)
hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
}
/*****************************************************************
Looks for an element in a hash table. */
UNIV_INLINE
ha_node_t*
ha_search(
/*======*/
/* out: pointer to the first hash table node
in chain having the fold number, NULL if not
found */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: folded value of the searched data */
{
ha_node_t* node;
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
node = ha_chain_get_first(table, fold);
while (node) {
if (node->fold == fold) {
return(node);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*****************************************************************
Looks for an element in a hash table. */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
/* out: pointer to the data of the first hash
table node in chain having the fold number,
NULL if not found */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: folded value of the searched data */
{
ha_node_t* node;
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
node = ha_chain_get_first(table, fold);
while (node) {
if (node->fold == fold) {
return(node->data);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*************************************************************
Looks for an element when we know the pointer to the data. */
UNIV_INLINE
ha_node_t*
ha_search_with_data(
/*================*/
/* out: pointer to the hash table node, NULL
if not found in the table */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data) /* in: pointer to the data */
{
ha_node_t* node;
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
node = ha_chain_get_first(table, fold);
while (node) {
if (node->data == data) {
return(node);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*************************************************************
Looks for an element when we know the pointer to the data, and deletes
it from the hash table, if found. */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
/* out: TRUE if found */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data) /* in: pointer to the data */
{
ha_node_t* node;
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
node = ha_search_with_data(table, fold, data);
if (node) {
ha_delete_hash_node(table, node);
return(TRUE);
}
return(FALSE);
}

113
include/ha0storage.h Normal file
View File

@@ -0,0 +1,113 @@
/******************************************************
Hash storage.
Provides a data structure that stores chunks of data in
its own storage, avoiding duplicates.
(c) 2007 Innobase Oy
Created September 22, 2007 Vasil Dimov
*******************************************************/
#ifndef ha0storage_h
#define ha0storage_h
#include "univ.i"
/* This value is used by default by ha_storage_create(). More memory
is allocated later when/if it is needed. */
#define HA_STORAGE_DEFAULT_HEAP_BYTES 1024
/* This value is used by default by ha_storage_create(). It is a
constant per ha_storage's lifetime. */
#define HA_STORAGE_DEFAULT_HASH_CELLS 4096
typedef struct ha_storage_struct ha_storage_t;
/***********************************************************************
Creates a hash storage. If any of the parameters is 0, then a default
value is used. */
UNIV_INLINE
ha_storage_t*
ha_storage_create(
/*==============*/
/* out, own: hash storage */
ulint initial_heap_bytes, /* in: initial heap's size */
ulint initial_hash_cells); /* in: initial number of cells
in the hash table */
/***********************************************************************
Copies data into the storage and returns a pointer to the copy. If the
same data chunk is already present, then pointer to it is returned.
Data chunks are considered to be equal if len1 == len2 and
memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
data_len bytes need to be allocated) and the size of storage is going to
become more than "memlim" then "data" is not added and NULL is returned.
To disable this behavior "memlim" can be set to 0, which stands for
"no limit". */
const void*
ha_storage_put_memlim(
/*==================*/
/* out: pointer to the copy */
ha_storage_t* storage, /* in/out: hash storage */
const void* data, /* in: data to store */
ulint data_len, /* in: data length */
ulint memlim); /* in: memory limit to obey */
/***********************************************************************
Same as ha_storage_put_memlim() but without memory limit. */
#define ha_storage_put(storage, data, data_len) \
ha_storage_put_memlim((storage), (data), (data_len), 0)
/***********************************************************************
Copies string into the storage and returns a pointer to the copy. If the
same string is already present, then pointer to it is returned.
Strings are considered to be equal if strcmp(str1, str2) == 0. */
#define ha_storage_put_str(storage, str) \
((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
/***********************************************************************
Copies string into the storage and returns a pointer to the copy obeying
a memory limit. */
#define ha_storage_put_str_memlim(storage, str, memlim) \
((const char*) ha_storage_put_memlim((storage), (str), \
strlen(str) + 1, (memlim)))
/***********************************************************************
Empties a hash storage, freeing memory occupied by data chunks.
This invalidates any pointers previously returned by ha_storage_put().
The hash storage is not invalidated itself and can be used again. */
UNIV_INLINE
void
ha_storage_empty(
/*=============*/
ha_storage_t** storage); /* in/out: hash storage */
/***********************************************************************
Frees a hash storage and everything it contains, it cannot be used after
this call.
This invalidates any pointers previously returned by ha_storage_put().
*/
UNIV_INLINE
void
ha_storage_free(
/*============*/
ha_storage_t* storage); /* in/out: hash storage */
/***********************************************************************
Gets the size of the memory used by a storage. */
UNIV_INLINE
ulint
ha_storage_get_size(
/*================*/
/* out: bytes used */
const ha_storage_t* storage); /* in: hash storage */
#ifndef UNIV_NONINL
#include "ha0storage.ic"
#endif
#endif /* ha0storage_h */

130
include/ha0storage.ic Normal file
View File

@@ -0,0 +1,130 @@
/******************************************************
Hash storage.
Provides a data structure that stores chunks of data in
its own storage, avoiding duplicates.
(c) 2007 Innobase Oy
Created September 24, 2007 Vasil Dimov
*******************************************************/
#include "univ.i"
#include "ha0storage.h"
#include "hash0hash.h"
#include "mem0mem.h"
struct ha_storage_struct {
mem_heap_t* heap; /* storage from which memory is
allocated */
hash_table_t* hash; /* hash table used to avoid
duplicates */
};
/* Objects of this type are put in the hash */
typedef struct ha_storage_node_struct ha_storage_node_t;
struct ha_storage_node_struct {
ulint data_len;/* length of the data */
const void* data; /* pointer to data */
ha_storage_node_t* next; /* next node in hash chain */
};
/***********************************************************************
Creates a hash storage. If any of the parameters is 0, then a default
value is used. */
UNIV_INLINE
ha_storage_t*
ha_storage_create(
/*==============*/
/* out, own: hash storage */
ulint initial_heap_bytes, /* in: initial heap's size */
ulint initial_hash_cells) /* in: initial number of cells
in the hash table */
{
ha_storage_t* storage;
mem_heap_t* heap;
if (initial_heap_bytes == 0) {
initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES;
}
if (initial_hash_cells == 0) {
initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS;
}
/* we put "storage" within "storage->heap" */
heap = mem_heap_create(sizeof(ha_storage_t)
+ initial_heap_bytes);
storage = (ha_storage_t*) mem_heap_alloc(heap,
sizeof(ha_storage_t));
storage->heap = heap;
storage->hash = hash_create(initial_hash_cells);
return(storage);
}
/***********************************************************************
Empties a hash storage, freeing memory occupied by data chunks.
This invalidates any pointers previously returned by ha_storage_put().
The hash storage is not invalidated itself and can be used again. */
UNIV_INLINE
void
ha_storage_empty(
/*=============*/
ha_storage_t** storage) /* in/out: hash storage */
{
ha_storage_t temp_storage;
temp_storage.heap = (*storage)->heap;
temp_storage.hash = (*storage)->hash;
hash_table_clear(temp_storage.hash);
mem_heap_empty(temp_storage.heap);
*storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap,
sizeof(ha_storage_t));
(*storage)->heap = temp_storage.heap;
(*storage)->hash = temp_storage.hash;
}
/***********************************************************************
Frees a hash storage and everything it contains, it cannot be used after
this call.
This invalidates any pointers previously returned by ha_storage_put().
*/
UNIV_INLINE
void
ha_storage_free(
/*============*/
ha_storage_t* storage) /* in/out: hash storage */
{
/* order is important because the pointer storage->hash is
within the heap */
hash_table_free(storage->hash);
mem_heap_free(storage->heap);
}
/***********************************************************************
Gets the size of the memory used by a storage. */
UNIV_INLINE
ulint
ha_storage_get_size(
/*================*/
/* out: bytes used */
const ha_storage_t* storage) /* in: hash storage */
{
ulint ret;
ret = mem_heap_get_size(storage->heap);
/* this assumes hash->heap and hash->heaps are NULL */
ret += sizeof(hash_table_t);
ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash);
return(ret);
}

162
include/ha_prototypes.h Normal file
View File

@@ -0,0 +1,162 @@
#ifndef HA_INNODB_PROTOTYPES_H
#define HA_INNODB_PROTOTYPES_H
#ifndef UNIV_HOTBACKUP
#include "univ.i" /* ulint, uint */
#include "m_ctype.h" /* CHARSET_INFO */
/* Prototypes for global functions in ha_innodb.cc that are called by
InnoDB's C-code. */
/*************************************************************************
Wrapper around MySQL's copy_and_convert function, see it for
documentation. */
UNIV_INTERN
ulint
innobase_convert_string(
/*====================*/
void* to,
ulint to_length,
CHARSET_INFO* to_cs,
const void* from,
ulint from_length,
CHARSET_INFO* from_cs,
uint* errors);
/***********************************************************************
Formats the raw data in "data" (in InnoDB on-disk format) that is of
type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
the result to "buf". The result is converted to "system_charset_info".
Not more than "buf_size" bytes are written to "buf".
The result is always '\0'-terminated (provided buf_size > 0) and the
number of bytes that were written to "buf" is returned (including the
terminating '\0'). */
UNIV_INTERN
ulint
innobase_raw_format(
/*================*/
/* out: number of bytes
that were written */
const char* data, /* in: raw data */
ulint data_len, /* in: raw data length
in bytes */
ulint charset_coll, /* in: charset collation */
char* buf, /* out: output buffer */
ulint buf_size); /* in: output buffer size
in bytes */
/*********************************************************************
Convert a table or index name to the MySQL system_charset_info (UTF-8)
and quote it if needed. */
UNIV_INTERN
char*
innobase_convert_name(
/*==================*/
/* out: pointer to the end of buf */
char* buf, /* out: buffer for converted identifier */
ulint buflen, /* in: length of buf, in bytes */
const char* id, /* in: identifier to convert */
ulint idlen, /* in: length of id, in bytes */
void* thd, /* in: MySQL connection thread, or NULL */
ibool table_id);/* in: TRUE=id is a table or database name;
FALSE=id is an index name */
/**********************************************************************
Returns true if the thread is the replication thread on the slave
server. Used in srv_conc_enter_innodb() to determine if the thread
should be allowed to enter InnoDB - the replication thread is treated
differently than other threads. Also used in
srv_conc_force_exit_innodb(). */
UNIV_INTERN
ibool
thd_is_replication_slave_thread(
/*============================*/
/* out: true if thd is the replication thread */
void* thd); /* in: thread handle (THD*) */
/**********************************************************************
Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
rolling back transactions that have edited non-transactional tables. */
UNIV_INTERN
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
/* out: true if non-transactional tables have
been edited */
void* thd); /* in: thread handle (THD*) */
/*****************************************************************
Prints info of a THD object (== user session thread) to the given file. */
UNIV_INTERN
void
innobase_mysql_print_thd(
/*=====================*/
FILE* f, /* in: output stream */
void* thd, /* in: pointer to a MySQL THD object */
uint max_query_len); /* in: max query length to print, or 0 to
use the default max length */
/******************************************************************
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
UNIV_INTERN
ulint
get_innobase_type_from_mysql_type(
/*==============================*/
/* out: DATA_BINARY,
DATA_VARCHAR, ... */
ulint* unsigned_flag, /* out: DATA_UNSIGNED if an
'unsigned type';
at least ENUM and SET,
and unsigned integer
types are 'unsigned types' */
const void* field) /* in: MySQL Field */
__attribute__((nonnull));
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex. */
UNIV_INTERN
void
innobase_mysql_prepare_print_arbitrary_thd(void);
/*============================================*/
/*****************************************************************
Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
In the InnoDB latching order, the mutex sits right above the
kernel_mutex. In debug builds, we assert that the kernel_mutex is
released before this function is invoked. */
UNIV_INTERN
void
innobase_mysql_end_print_arbitrary_thd(void);
/*========================================*/
/**********************************************************************
Get the variable length bounds of the given character set. */
UNIV_INTERN
void
innobase_get_cset_width(
/*====================*/
ulint cset, /* in: MySQL charset-collation code */
ulint* mbminlen, /* out: minimum length of a char (in bytes) */
ulint* mbmaxlen); /* out: maximum length of a char (in bytes) */
/**********************************************************************
Compares NUL-terminated UTF-8 strings case insensitively. */
UNIV_INTERN
int
innobase_strcasecmp(
/*================*/
/* out: 0 if a=b, <0 if a<b, >1 if a>b */
const char* a, /* in: first string to compare */
const char* b); /* in: second string to compare */
#endif
#endif

25
include/handler0alter.h Normal file
View File

@@ -0,0 +1,25 @@
/******************************************************
Smart ALTER TABLE
(c) 2005-2007 Innobase Oy
*******************************************************/
/*****************************************************************
Copies an InnoDB record to table->record[0]. */
UNIV_INTERN
void
innobase_rec_to_mysql(
/*==================*/
TABLE* table, /* in/out: MySQL table */
const rec_t* rec, /* in: record */
const dict_index_t* index, /* in: index */
const ulint* offsets); /* in: rec_get_offsets(
rec, index, ...) */
/*****************************************************************
Resets table->record[0]. */
UNIV_INTERN
void
innobase_rec_reset(
/*===============*/
TABLE* table); /* in/out: MySQL table */

390
include/hash0hash.h Normal file
View File

@@ -0,0 +1,390 @@
/******************************************************
The simple hash table utility
(c) 1997 Innobase Oy
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#ifndef hash0hash_h
#define hash0hash_h
#include "univ.i"
#include "mem0mem.h"
#include "sync0sync.h"
typedef struct hash_table_struct hash_table_t;
typedef struct hash_cell_struct hash_cell_t;
typedef void* hash_node_t;
/* Fix Bug #13859: symbol collision between imap/mysql */
#define hash_create hash0_create
/*****************************************************************
Creates a hash table with >= n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n. */
UNIV_INTERN
hash_table_t*
hash_create(
/*========*/
/* out, own: created table */
ulint n); /* in: number of array cells */
/*****************************************************************
Creates a mutex array to protect a hash table. */
UNIV_INTERN
void
hash_create_mutexes_func(
/*=====================*/
hash_table_t* table, /* in: hash table */
#ifdef UNIV_SYNC_DEBUG
ulint sync_level, /* in: latching order level of the
mutexes: used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
ulint n_mutexes); /* in: number of mutexes */
#ifdef UNIV_SYNC_DEBUG
# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n)
#else /* UNIV_SYNC_DEBUG */
# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n)
#endif /* UNIV_SYNC_DEBUG */
/*****************************************************************
Frees a hash table. */
UNIV_INTERN
void
hash_table_free(
/*============*/
hash_table_t* table); /* in, own: hash table */
/******************************************************************
Calculates the hash value from a folded value. */
UNIV_INLINE
ulint
hash_calc_hash(
/*===========*/
/* out: hashed value */
ulint fold, /* in: folded value */
hash_table_t* table); /* in: hash table */
/************************************************************************
Assert that the mutex for the table in a hash operation is owned. */
#ifdef UNIV_SYNC_DEBUG
# define HASH_ASSERT_OWNED(TABLE, FOLD) \
ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
#else
# define HASH_ASSERT_OWNED(TABLE, FOLD)
#endif
/***********************************************************************
Inserts a struct to a hash table. */
#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
do {\
hash_cell_t* cell3333;\
TYPE* struct3333;\
\
HASH_ASSERT_OWNED(TABLE, FOLD)\
\
(DATA)->NAME = NULL;\
\
cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
\
if (cell3333->node == NULL) {\
cell3333->node = DATA;\
} else {\
struct3333 = cell3333->node;\
\
while (struct3333->NAME != NULL) {\
\
struct3333 = struct3333->NAME;\
}\
\
struct3333->NAME = DATA;\
}\
} while (0)
#ifdef UNIV_HASH_DEBUG
# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
# define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1
#else
# define HASH_ASSERT_VALID(DATA) do {} while (0)
# define HASH_INVALIDATE(DATA, NAME) do {} while (0)
#endif
/***********************************************************************
Deletes a struct from a hash table. */
#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
do {\
hash_cell_t* cell3333;\
TYPE* struct3333;\
\
HASH_ASSERT_OWNED(TABLE, FOLD)\
\
cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
\
if (cell3333->node == DATA) {\
HASH_ASSERT_VALID(DATA->NAME);\
cell3333->node = DATA->NAME;\
} else {\
struct3333 = cell3333->node;\
\
while (struct3333->NAME != DATA) {\
\
struct3333 = struct3333->NAME;\
ut_a(struct3333);\
}\
\
struct3333->NAME = DATA->NAME;\
}\
HASH_INVALIDATE(DATA, NAME);\
} while (0)
/***********************************************************************
Gets the first struct in a hash chain, NULL if none. */
#define HASH_GET_FIRST(TABLE, HASH_VAL)\
(hash_get_nth_cell(TABLE, HASH_VAL)->node)
/***********************************************************************
Gets the next struct in a hash chain, NULL if none. */
#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME)
/************************************************************************
Looks for a struct in a hash table. */
#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, TEST)\
{\
\
HASH_ASSERT_OWNED(TABLE, FOLD)\
\
(DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
HASH_ASSERT_VALID(DATA);\
\
while ((DATA) != NULL) {\
if (TEST) {\
break;\
} else {\
HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\
(DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\
}\
}\
}
/****************************************************************
Gets the nth cell in a hash table. */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
/*==============*/
/* out: pointer to cell */
hash_table_t* table, /* in: hash table */
ulint n); /* in: cell index */
/*****************************************************************
Clears a hash table so that all the cells become empty. */
UNIV_INLINE
void
hash_table_clear(
/*=============*/
hash_table_t* table); /* in/out: hash table */
/*****************************************************************
Returns the number of cells in a hash table. */
UNIV_INLINE
ulint
hash_get_n_cells(
/*=============*/
/* out: number of cells */
hash_table_t* table); /* in: table */
/***********************************************************************
Deletes a struct which is stored in the heap of the hash table, and compacts
the heap. The fold value must be stored in the struct NODE in a field named
'fold'. */
#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
do {\
TYPE* node111;\
TYPE* top_node111;\
hash_cell_t* cell111;\
ulint fold111;\
\
fold111 = (NODE)->fold;\
\
HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
\
top_node111 = (TYPE*)mem_heap_get_top(\
hash_get_heap(TABLE, fold111),\
sizeof(TYPE));\
\
/* If the node to remove is not the top node in the heap, compact the\
heap of nodes by moving the top node in the place of NODE. */\
\
if (NODE != top_node111) {\
\
/* Copy the top node in place of NODE */\
\
*(NODE) = *top_node111;\
\
cell111 = hash_get_nth_cell(TABLE,\
hash_calc_hash(top_node111->fold, TABLE));\
\
/* Look for the pointer to the top node, to update it */\
\
if (cell111->node == top_node111) {\
/* The top node is the first in the chain */\
\
cell111->node = NODE;\
} else {\
/* We have to look for the predecessor of the top\
node */\
node111 = cell111->node;\
\
while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
\
node111 = HASH_GET_NEXT(NAME, node111);\
}\
\
/* Now we have the predecessor node */\
\
node111->NAME = NODE;\
}\
}\
\
/* Free the space occupied by the top node */\
\
mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
} while (0)
/********************************************************************
Move all hash table entries from OLD_TABLE to NEW_TABLE.*/
#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
do {\
ulint i2222;\
ulint cell_count2222;\
\
cell_count2222 = hash_get_n_cells(OLD_TABLE);\
\
for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
\
while (node2222) {\
NODE_TYPE* next2222 = node2222->PTR_NAME;\
ulint fold2222 = FOLD_FUNC(node2222);\
\
HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
fold2222, node2222);\
\
node2222 = next2222;\
}\
}\
} while (0)
/****************************************************************
Gets the mutex index for a fold value in a hash table. */
UNIV_INLINE
ulint
hash_get_mutex_no(
/*==============*/
/* out: mutex number */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Gets the nth heap in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
/*==============*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint i); /* in: index of the heap */
/****************************************************************
Gets the heap for a fold value in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
/*==========*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Gets the nth mutex in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_nth_mutex(
/*===============*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint i); /* in: index of the mutex */
/****************************************************************
Gets the mutex for a fold value in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_mutex(
/*===========*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Reserves the mutex for a fold value in a hash table. */
UNIV_INTERN
void
hash_mutex_enter(
/*=============*/
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Releases the mutex for a fold value in a hash table. */
UNIV_INTERN
void
hash_mutex_exit(
/*============*/
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Reserves all the mutexes of a hash table, in an ascending order. */
UNIV_INTERN
void
hash_mutex_enter_all(
/*=================*/
hash_table_t* table); /* in: hash table */
/****************************************************************
Releases all the mutexes of a hash table. */
UNIV_INTERN
void
hash_mutex_exit_all(
/*================*/
hash_table_t* table); /* in: hash table */
struct hash_cell_struct{
void* node; /* hash chain node, NULL if none */
};
/* The hash table structure */
struct hash_table_struct {
#ifdef UNIV_DEBUG
ibool adaptive;/* TRUE if this is the hash table of the
adaptive hash index */
#endif /* UNIV_DEBUG */
ulint n_cells;/* number of cells in the hash table */
hash_cell_t* array; /* pointer to cell array */
ulint n_mutexes;/* if mutexes != NULL, then the number of
mutexes, must be a power of 2 */
mutex_t* mutexes;/* NULL, or an array of mutexes used to
protect segments of the hash table */
mem_heap_t** heaps; /* if this is non-NULL, hash chain nodes for
external chaining can be allocated from these
memory heaps; there are then n_mutexes many of
these heaps */
mem_heap_t* heap;
ulint magic_n;
};
#define HASH_TABLE_MAGIC_N 76561114
#ifndef UNIV_NONINL
#include "hash0hash.ic"
#endif
#endif

143
include/hash0hash.ic Normal file
View File

@@ -0,0 +1,143 @@
/******************************************************
The simple hash table utility
(c) 1997 Innobase Oy
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#include "ut0rnd.h"
/****************************************************************
Gets the nth cell in a hash table. */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
/*==============*/
/* out: pointer to cell */
hash_table_t* table, /* in: hash table */
ulint n) /* in: cell index */
{
ut_ad(n < table->n_cells);
return(table->array + n);
}
/*****************************************************************
Clears a hash table so that all the cells become empty. */
UNIV_INLINE
void
hash_table_clear(
/*=============*/
hash_table_t* table) /* in/out: hash table */
{
memset(table->array, 0x0,
table->n_cells * sizeof(*table->array));
}
/*****************************************************************
Returns the number of cells in a hash table. */
UNIV_INLINE
ulint
hash_get_n_cells(
/*=============*/
/* out: number of cells */
hash_table_t* table) /* in: table */
{
return(table->n_cells);
}
/******************************************************************
Calculates the hash value from a folded value. */
UNIV_INLINE
ulint
hash_calc_hash(
/*===========*/
/* out: hashed value */
ulint fold, /* in: folded value */
hash_table_t* table) /* in: hash table */
{
return(ut_hash_ulint(fold, table->n_cells));
}
/****************************************************************
Gets the mutex index for a fold value in a hash table. */
UNIV_INLINE
ulint
hash_get_mutex_no(
/*==============*/
/* out: mutex number */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
ut_ad(ut_is_2pow(table->n_mutexes));
return(ut_2pow_remainder(fold, table->n_mutexes));
}
/****************************************************************
Gets the nth heap in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
/*==============*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint i) /* in: index of the heap */
{
ut_ad(i < table->n_mutexes);
return(table->heaps[i]);
}
/****************************************************************
Gets the heap for a fold value in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
/*==========*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
ulint i;
if (table->heap) {
return(table->heap);
}
i = hash_get_mutex_no(table, fold);
return(hash_get_nth_heap(table, i));
}
/****************************************************************
Gets the nth mutex in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_nth_mutex(
/*===============*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint i) /* in: index of the mutex */
{
ut_ad(i < table->n_mutexes);
return(table->mutexes + i);
}
/****************************************************************
Gets the mutex for a fold value in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_mutex(
/*===========*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
ulint i;
i = hash_get_mutex_no(table, fold);
return(hash_get_nth_mutex(table, i));
}

316
include/ibuf0ibuf.h Normal file
View File

@@ -0,0 +1,316 @@
/******************************************************
Insert buffer
(c) 1997 Innobase Oy
Created 7/19/1997 Heikki Tuuri
*******************************************************/
#ifndef ibuf0ibuf_h
#define ibuf0ibuf_h
#include "univ.i"
#include "dict0mem.h"
#include "dict0dict.h"
#include "mtr0mtr.h"
#include "que0types.h"
#include "ibuf0types.h"
#include "fsp0fsp.h"
extern ibuf_t* ibuf;
/**********************************************************************
Creates the insert buffer data struct for a single tablespace. Reads the
root page of the insert buffer tree in the tablespace. This function can
be called only after the dictionary system has been initialized, as this
creates also the insert buffer table and index for this tablespace. */
UNIV_INTERN
ibuf_data_t*
ibuf_data_init_for_space(
/*=====================*/
/* out, own: ibuf data struct, linked to the list
in ibuf control structure. */
ulint space); /* in: space id */
/**********************************************************************
Creates the insert buffer data structure at a database startup and
initializes the data structures for the insert buffer of each tablespace. */
UNIV_INTERN
void
ibuf_init_at_db_start(void);
/*=======================*/
/*************************************************************************
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
UNIV_INTERN
void
ibuf_update_max_tablespace_id(void);
/*===============================*/
/*************************************************************************
Initializes an ibuf bitmap page. */
UNIV_INTERN
void
ibuf_bitmap_page_init(
/*==================*/
buf_block_t* block, /* in: bitmap page */
mtr_t* mtr); /* in: mtr */
/****************************************************************************
Resets the free bits of the page in the ibuf bitmap. This is done in a
separate mini-transaction, hence this operation does not restrict further
work to only ibuf bitmap operations, which would result if the latch to the
bitmap page were kept. */
UNIV_INTERN
void
ibuf_reset_free_bits(
/*=================*/
buf_block_t* block); /* in: index page; free bits are set to 0
if the index is a non-clustered
non-unique, and page level is 0 */
/****************************************************************************
Updates the free bits of an uncompressed page in the ibuf bitmap if
there is not enough free on the page any more. This is done in a
separate mini-transaction, hence this operation does not restrict
further work to only ibuf bitmap operations, which would result if the
latch to the bitmap page were kept. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
buf_block_t* block, /* in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
ulint max_ins_size,/* in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
ulint increase);/* in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
/**************************************************************************
Updates the free bits for an uncompressed page to reflect the present state.
Does this in the mtr given, which means that the latching order rules virtually
prevent any further operations for this OS thread until mtr is committed. */
UNIV_INTERN
void
ibuf_update_free_bits_low(
/*======================*/
const buf_block_t* block, /* in: index page */
ulint max_ins_size, /* in: value of
maximum insert size
with reorganize before
the latest operation
performed to the page */
mtr_t* mtr); /* in/out: mtr */
/**************************************************************************
Updates the free bits for a compressed page to reflect the present state.
Does this in the mtr given, which means that the latching order rules virtually
prevent any further operations for this OS thread until mtr is committed. */
UNIV_INTERN
void
ibuf_update_free_bits_zip(
/*======================*/
buf_block_t* block, /* in/out: index page */
mtr_t* mtr); /* in/out: mtr */
/**************************************************************************
Updates the free bits for the two pages to reflect the present state. Does
this in the mtr given, which means that the latching order rules virtually
prevent any further operations until mtr is committed. */
UNIV_INTERN
void
ibuf_update_free_bits_for_two_pages_low(
/*====================================*/
ulint zip_size,/* in: compressed page size in bytes;
0 for uncompressed pages */
buf_block_t* block1, /* in: index page */
buf_block_t* block2, /* in: index page */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
dict_index_t* index, /* in: index where to insert */
ulint ignore_sec_unique); /* in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
/**********************************************************************
Returns TRUE if the current OS thread is performing an insert buffer
routine. */
UNIV_INTERN
ibool
ibuf_inside(void);
/*=============*/
/* out: TRUE if inside an insert buffer routine: for instance,
a read-ahead of non-ibuf pages is then forbidden */
/***************************************************************************
Checks if a page address is an ibuf bitmap page (level 3 page) address. */
UNIV_INLINE
ibool
ibuf_bitmap_page(
/*=============*/
/* out: TRUE if a bitmap page */
ulint zip_size,/* in: compressed page size in bytes;
0 for uncompressed pages */
ulint page_no);/* in: page number */
/***************************************************************************
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
UNIV_INTERN
ibool
ibuf_page(
/*======*/
/* out: TRUE if level 2 or level 3 page */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint page_no);/* in: page number */
/***************************************************************************
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
UNIV_INTERN
ibool
ibuf_page_low(
/*==========*/
/* out: TRUE if level 2 or level 3 page */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint page_no,/* in: page number */
mtr_t* mtr); /* in: mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed
address ibuf pages */
/***************************************************************************
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
UNIV_INTERN
void
ibuf_free_excess_pages(
/*===================*/
ulint space); /* in: space id */
/*************************************************************************
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
or unique. */
UNIV_INTERN
ibool
ibuf_insert(
/*========*/
/* out: TRUE if success */
const dtuple_t* entry, /* in: index entry to insert */
dict_index_t* index, /* in: index where to insert */
ulint space, /* in: space id where to insert */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint page_no,/* in: page number where to insert */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which subsequently was dropped. */
UNIV_INTERN
void
ibuf_merge_or_delete_for_page(
/*==========================*/
buf_block_t* block, /* in: if page has been read from
disk, pointer to the page x-latched,
else NULL */
ulint space, /* in: space id of the index page */
ulint page_no,/* in: page number of the index page */
ulint zip_size,/* in: compressed page size in bytes,
or 0 */
ibool update_ibuf_bitmap);/* in: normally this is set
to TRUE, but if we have deleted or are
deleting the tablespace, then we
naturally do not want to update a
non-existent bitmap page */
/*************************************************************************
Deletes all entries in the insert buffer for a given space id. This is used
in DISCARD TABLESPACE and IMPORT TABLESPACE.
NOTE: this does not update the page free bitmaps in the space. The space will
become CORRUPT when you call this function! */
UNIV_INTERN
void
ibuf_delete_for_discarded_space(
/*============================*/
ulint space); /* in: space id */
/*************************************************************************
Contracts insert buffer trees by reading pages to the buffer pool. */
UNIV_INTERN
ulint
ibuf_contract(
/*==========*/
/* out: a lower limit for the combined size in bytes
of entries which will be merged from ibuf trees to the
pages read, 0 if ibuf is empty */
ibool sync); /* in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
/*************************************************************************
Contracts insert buffer trees by reading pages to the buffer pool. */
UNIV_INTERN
ulint
ibuf_contract_for_n_pages(
/*======================*/
/* out: a lower limit for the combined size in bytes
of entries which will be merged from ibuf trees to the
pages read, 0 if ibuf is empty */
ibool sync, /* in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
ulint n_pages);/* in: try to read at least this many pages to
the buffer pool and merge the ibuf contents to
them */
/*************************************************************************
Parses a redo log record of an ibuf bitmap page init. */
UNIV_INTERN
byte*
ibuf_parse_bitmap_init(
/*===================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
buf_block_t* block, /* in: block or NULL */
mtr_t* mtr); /* in: mtr or NULL */
#ifdef UNIV_IBUF_COUNT_DEBUG
/**********************************************************************
Gets the ibuf count for a given page. */
UNIV_INTERN
ulint
ibuf_count_get(
/*===========*/
/* out: number of entries in the insert buffer
currently buffered for this page */
ulint space, /* in: space id */
ulint page_no);/* in: page number */
#endif
/**********************************************************************
Looks if the insert buffer is empty. */
UNIV_INTERN
ibool
ibuf_is_empty(void);
/*===============*/
/* out: TRUE if empty */
/**********************************************************************
Prints info of ibuf. */
UNIV_INTERN
void
ibuf_print(
/*=======*/
FILE* file); /* in: file where to print */
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
/* The ibuf header page currently contains only the file segment header
for the file segment from which the pages for the ibuf tree are allocated */
#define IBUF_HEADER PAGE_DATA
#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
#ifndef UNIV_NONINL
#include "ibuf0ibuf.ic"
#endif
#endif

308
include/ibuf0ibuf.ic Normal file
View File

@@ -0,0 +1,308 @@
/******************************************************
Insert buffer
(c) 1997 Innobase Oy
Created 7/19/1997 Heikki Tuuri
*******************************************************/
#include "buf0lru.h"
#include "page0page.h"
#include "page0zip.h"
extern ulint ibuf_flush_count;
/* If this number is n, an index page must contain at least the page size
per n bytes of free space for ibuf to try to buffer inserts to this page.
If there is this much of free space, the corresponding bits are set in the
ibuf bitmap. */
#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
/* Insert buffer data struct for a single tablespace */
struct ibuf_data_struct{
ulint space; /* space id */
ulint seg_size;/* allocated pages if the file segment
containing ibuf header and tree */
ulint size; /* size of the insert buffer tree in pages */
ibool empty; /* after an insert to the ibuf tree is
performed, this is set to FALSE, and if a
contract operation finds the tree empty, this
is set to TRUE */
ulint free_list_len;
/* length of the free list */
ulint height; /* tree height */
dict_index_t* index; /* insert buffer index */
UT_LIST_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs */
ulint n_inserts;/* number of inserts made to the insert
buffer */
ulint n_merges;/* number of pages merged */
ulint n_merged_recs;/* number of records merged */
};
struct ibuf_struct{
ulint size; /* current size of the ibuf index
trees in pages */
ulint max_size; /* recommended maximum size in pages
for the ibuf index tree */
UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs for
each tablespace */
};
/****************************************************************************
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
UNIV_INTERN
void
ibuf_set_free_bits_func(
/*====================*/
buf_block_t* block, /* in: index page of a non-clustered index;
free bit is reset if page level is 0 */
#ifdef UNIV_IBUF_DEBUG
ulint max_val,/* in: ULINT_UNDEFINED or a maximum
value which the bits must have before
setting; this is for debugging */
#endif /* UNIV_IBUF_DEBUG */
ulint val); /* in: value to set: < 4 */
#ifdef UNIV_IBUF_DEBUG
# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
#else /* UNIV_IBUF_DEBUG */
# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
#endif /* UNIV_IBUF_DEBUG */
/**************************************************************************
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
dict_index_t* index, /* in: index where to insert */
ulint ignore_sec_unique) /* in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
{
if (!dict_index_is_clust(index)
&& (ignore_sec_unique || !dict_index_is_unique(index))) {
ibuf_flush_count++;
if (ibuf_flush_count % 8 == 0) {
buf_LRU_try_free_flushed_blocks();
}
return(TRUE);
}
return(FALSE);
}
/***************************************************************************
Checks if a page address is an ibuf bitmap page address. */
UNIV_INLINE
ibool
ibuf_bitmap_page(
/*=============*/
/* out: TRUE if a bitmap page */
ulint zip_size,/* in: compressed page size in bytes;
0 for uncompressed pages */
ulint page_no)/* in: page number */
{
ut_ad(ut_is_2pow(zip_size));
if (!zip_size) {
return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
== FSP_IBUF_BITMAP_OFFSET));
}
return(UNIV_UNLIKELY((page_no & (zip_size - 1))
== FSP_IBUF_BITMAP_OFFSET));
}
/*************************************************************************
Translates the free space on a page to a value in the ibuf bitmap.*/
UNIV_INLINE
ulint
ibuf_index_page_calc_free_bits(
/*===========================*/
/* out: value for ibuf bitmap bits */
ulint zip_size, /* in: compressed page size in bytes;
0 for uncompressed pages */
ulint max_ins_size) /* in: maximum insert size after reorganize
for the page */
{
ulint n;
ut_ad(ut_is_2pow(zip_size));
ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
ut_ad(zip_size <= UNIV_PAGE_SIZE);
if (zip_size) {
n = max_ins_size
/ (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
} else {
n = max_ins_size
/ (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
if (n == 3) {
n = 2;
}
if (n > 3) {
n = 3;
}
return(n);
}
/*************************************************************************
Translates the ibuf free bits to the free space on a page in bytes. */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_from_bits(
/*================================*/
/* out: maximum insert size after reorganize for the
page */
ulint zip_size,/* in: compressed page size in bytes;
0 for uncompressed pages */
ulint bits) /* in: value for ibuf bitmap bits */
{
ut_ad(bits < 4);
ut_ad(ut_is_2pow(zip_size));
ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
ut_ad(zip_size <= UNIV_PAGE_SIZE);
if (zip_size) {
if (bits == 3) {
return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
if (bits == 3) {
return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
}
/*************************************************************************
Translates the free space on a compressed page to a value in the ibuf bitmap.*/
UNIV_INLINE
ulint
ibuf_index_page_calc_free_zip(
/*==========================*/
/* out: value for ibuf bitmap bits */
ulint zip_size,
/* in: compressed page size in bytes */
const buf_block_t* block) /* in: buffer block */
{
ulint max_ins_size;
const page_zip_des_t* page_zip;
lint zip_max_ins;
ut_ad(zip_size == buf_block_get_zip_size(block));
ut_ad(zip_size);
max_ins_size = page_get_max_insert_size_after_reorganize(
buf_block_get_frame(block), 1);
page_zip = buf_block_get_page_zip(block);
zip_max_ins = page_zip_max_ins_size(page_zip,
FALSE/* not clustered */);
if (UNIV_UNLIKELY(zip_max_ins < 0)) {
return(0);
} else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) {
max_ins_size = (ulint) zip_max_ins;
}
return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
}
/*************************************************************************
Translates the free space on a page to a value in the ibuf bitmap.*/
UNIV_INLINE
ulint
ibuf_index_page_calc_free(
/*======================*/
/* out: value for ibuf bitmap bits */
ulint zip_size,/* in: compressed page size in bytes;
0 for uncompressed pages */
const buf_block_t* block) /* in: buffer block */
{
ut_ad(zip_size == buf_block_get_zip_size(block));
if (!zip_size) {
ulint max_ins_size;
max_ins_size = page_get_max_insert_size_after_reorganize(
buf_block_get_frame(block), 1);
return(ibuf_index_page_calc_free_bits(0, max_ins_size));
} else {
return(ibuf_index_page_calc_free_zip(zip_size, block));
}
}
/****************************************************************************
Updates the free bits of an uncompressed page in the ibuf bitmap if
there is not enough free on the page any more. This is done in a
separate mini-transaction, hence this operation does not restrict
further work to only ibuf bitmap operations, which would result if the
latch to the bitmap page were kept. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
buf_block_t* block, /* in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
ulint max_ins_size,/* in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
ulint increase)/* in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
{
ulint before;
ulint after;
ut_ad(!buf_block_get_page_zip(block));
before = ibuf_index_page_calc_free_bits(0, max_ins_size);
if (max_ins_size >= increase) {
#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
#endif
after = ibuf_index_page_calc_free_bits(0, max_ins_size
- increase);
#ifdef UNIV_IBUF_DEBUG
ut_a(after <= ibuf_index_page_calc_free(0, block));
#endif
} else {
after = ibuf_index_page_calc_free(0, block);
}
if (after == 0) {
/* We move the page to the front of the buffer pool LRU list:
the purpose of this is to prevent those pages to which we
cannot make inserts using the insert buffer from slipping
out of the buffer pool */
buf_page_make_young(&block->page);
}
if (before > after) {
ibuf_set_free_bits(block, after, before);
}
}

Some files were not shown because too many files have changed in this diff Show More