mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
buf0buf.c, buf0buf.ic, buf0buf.h:
Reduce memory usage of the buffer headers Many files: Merge InnoDB-4.1 with AWE support sql/mysqld.cc: Merge InnoDB-4.1 with AWE support sql/set_var.cc: Merge InnoDB-4.1 with AWE support sql/ha_innodb.h: Merge InnoDB-4.1 with AWE support sql/ha_innodb.cc: Merge InnoDB-4.1 with AWE support innobase/btr/btr0cur.c: Merge InnoDB-4.1 with AWE support innobase/btr/btr0pcur.c: Merge InnoDB-4.1 with AWE support innobase/buf/buf0flu.c: Merge InnoDB-4.1 with AWE support innobase/buf/buf0lru.c: Merge InnoDB-4.1 with AWE support innobase/buf/buf0rea.c: Merge InnoDB-4.1 with AWE support innobase/include/btr0pcur.h: Merge InnoDB-4.1 with AWE support innobase/include/buf0lru.h: Merge InnoDB-4.1 with AWE support innobase/include/log0recv.h: Merge InnoDB-4.1 with AWE support innobase/include/os0proc.h: Merge InnoDB-4.1 with AWE support innobase/include/srv0srv.h: Merge InnoDB-4.1 with AWE support innobase/log/log0log.c: Merge InnoDB-4.1 with AWE support innobase/log/log0recv.c: Merge InnoDB-4.1 with AWE support innobase/os/os0file.c: Merge InnoDB-4.1 with AWE support innobase/os/os0proc.c: Merge InnoDB-4.1 with AWE support innobase/srv/srv0srv.c: Merge InnoDB-4.1 with AWE support innobase/srv/srv0start.c: Merge InnoDB-4.1 with AWE support innobase/trx/trx0sys.c: Merge InnoDB-4.1 with AWE support innobase/trx/trx0trx.c: Merge InnoDB-4.1 with AWE support innobase/ut/ut0ut.c: Merge InnoDB-4.1 with AWE support innobase/include/buf0buf.h: Reduce memory usage of the buffer headers innobase/include/buf0buf.ic: Reduce memory usage of the buffer headers innobase/buf/buf0buf.c: Reduce memory usage of the buffer headers
This commit is contained in:
@ -291,6 +291,7 @@ btr_cur_search_to_nth_level(
|
|||||||
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
|
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
|
||||||
&& !estimate
|
&& !estimate
|
||||||
&& mode != PAGE_CUR_LE_OR_EXTENDS
|
&& mode != PAGE_CUR_LE_OR_EXTENDS
|
||||||
|
&& srv_use_adaptive_hash_indexes
|
||||||
&& btr_search_guess_on_hash(index, info, tuple, mode,
|
&& btr_search_guess_on_hash(index, info, tuple, mode,
|
||||||
latch_mode, cursor,
|
latch_mode, cursor,
|
||||||
has_search_latch, mtr)) {
|
has_search_latch, mtr)) {
|
||||||
@ -495,9 +496,11 @@ retry_page_get:
|
|||||||
cursor->up_bytes = up_bytes;
|
cursor->up_bytes = up_bytes;
|
||||||
|
|
||||||
#ifdef BTR_CUR_ADAPT
|
#ifdef BTR_CUR_ADAPT
|
||||||
btr_search_info_update(index, cursor);
|
if (srv_use_adaptive_hash_indexes) {
|
||||||
#endif
|
|
||||||
|
|
||||||
|
btr_search_info_update(index, cursor);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
ut_ad(cursor->up_match != ULINT_UNDEFINED
|
ut_ad(cursor->up_match != ULINT_UNDEFINED
|
||||||
|| mode != PAGE_CUR_GE);
|
|| mode != PAGE_CUR_GE);
|
||||||
ut_ad(cursor->up_match != ULINT_UNDEFINED
|
ut_ad(cursor->up_match != ULINT_UNDEFINED
|
||||||
|
@ -95,7 +95,9 @@ btr_pcur_store_position(
|
|||||||
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
|
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
|
||||||
|
|
||||||
if (page_get_n_recs(page) == 0) {
|
if (page_get_n_recs(page) == 0) {
|
||||||
/* It must be an empty index tree */
|
/* It must be an empty index tree; NOTE that in this case
|
||||||
|
we do not store the modify_clock, but always do a search
|
||||||
|
if we restore the cursor position */
|
||||||
|
|
||||||
ut_a(btr_page_get_next(page, mtr) == FIL_NULL
|
ut_a(btr_page_get_next(page, mtr) == FIL_NULL
|
||||||
&& btr_page_get_prev(page, mtr) == FIL_NULL);
|
&& btr_page_get_prev(page, mtr) == FIL_NULL);
|
||||||
@ -128,12 +130,13 @@ btr_pcur_store_position(
|
|||||||
} else {
|
} else {
|
||||||
cursor->rel_pos = BTR_PCUR_ON;
|
cursor->rel_pos = BTR_PCUR_ON;
|
||||||
}
|
}
|
||||||
|
|
||||||
cursor->old_stored = BTR_PCUR_OLD_STORED;
|
cursor->old_stored = BTR_PCUR_OLD_STORED;
|
||||||
cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec,
|
cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec,
|
||||||
&(cursor->old_rec_buf),
|
&(cursor->old_rec_buf),
|
||||||
&(cursor->buf_size));
|
&(cursor->buf_size));
|
||||||
|
|
||||||
|
cursor->block_when_stored = buf_block_align(page);
|
||||||
cursor->modify_clock = buf_frame_get_modify_clock(page);
|
cursor->modify_clock = buf_frame_get_modify_clock(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,6 +208,9 @@ btr_pcur_restore_position(
|
|||||||
if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|
if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|
||||||
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
|
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
|
||||||
|
|
||||||
|
/* In these cases we do not try an optimistic restoration,
|
||||||
|
but always do a search */
|
||||||
|
|
||||||
if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
|
if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
|
||||||
from_left = TRUE;
|
from_left = TRUE;
|
||||||
} else {
|
} else {
|
||||||
@ -214,6 +220,10 @@ btr_pcur_restore_position(
|
|||||||
btr_cur_open_at_index_side(from_left,
|
btr_cur_open_at_index_side(from_left,
|
||||||
btr_pcur_get_btr_cur(cursor)->index, latch_mode,
|
btr_pcur_get_btr_cur(cursor)->index, latch_mode,
|
||||||
btr_pcur_get_btr_cur(cursor), mtr);
|
btr_pcur_get_btr_cur(cursor), mtr);
|
||||||
|
|
||||||
|
cursor->block_when_stored =
|
||||||
|
buf_block_align(btr_pcur_get_page(cursor));
|
||||||
|
|
||||||
return(FALSE);
|
return(FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,8 +234,9 @@ btr_pcur_restore_position(
|
|||||||
if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) {
|
if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) {
|
||||||
/* Try optimistic restoration */
|
/* Try optimistic restoration */
|
||||||
|
|
||||||
if (buf_page_optimistic_get(latch_mode, page,
|
if (buf_page_optimistic_get(latch_mode,
|
||||||
cursor->modify_clock, mtr)) {
|
cursor->block_when_stored, page,
|
||||||
|
cursor->modify_clock, mtr)) {
|
||||||
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
|
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
|
||||||
|
|
||||||
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
|
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
|
||||||
@ -270,8 +281,6 @@ btr_pcur_restore_position(
|
|||||||
|
|
||||||
btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
|
btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
|
||||||
mode, latch_mode, cursor, 0, mtr);
|
mode, latch_mode, cursor, 0, mtr);
|
||||||
|
|
||||||
cursor->old_stored = BTR_PCUR_OLD_STORED;
|
|
||||||
|
|
||||||
/* Restore the old search mode */
|
/* Restore the old search mode */
|
||||||
cursor->search_mode = old_mode;
|
cursor->search_mode = old_mode;
|
||||||
@ -280,11 +289,18 @@ btr_pcur_restore_position(
|
|||||||
&& btr_pcur_is_on_user_rec(cursor, mtr)
|
&& btr_pcur_is_on_user_rec(cursor, mtr)
|
||||||
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) {
|
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) {
|
||||||
|
|
||||||
/* We have to store the NEW value for the modify clock, since
|
/* We have to store the NEW value for the modify clock, since
|
||||||
the cursor can now be on a different page! */
|
the cursor can now be on a different page! But we can retain
|
||||||
|
the value of old_rec */
|
||||||
|
|
||||||
|
cursor->modify_clock =
|
||||||
|
buf_frame_get_modify_clock(btr_pcur_get_page(cursor));
|
||||||
|
|
||||||
|
cursor->block_when_stored =
|
||||||
|
buf_block_align(btr_pcur_get_page(cursor));
|
||||||
|
|
||||||
|
cursor->old_stored = BTR_PCUR_OLD_STORED;
|
||||||
|
|
||||||
cursor->modify_clock = buf_frame_get_modify_clock(
|
|
||||||
buf_frame_align(btr_pcur_get_rec(cursor)));
|
|
||||||
mem_heap_free(heap);
|
mem_heap_free(heap);
|
||||||
|
|
||||||
return(TRUE);
|
return(TRUE);
|
||||||
@ -292,6 +308,12 @@ btr_pcur_restore_position(
|
|||||||
|
|
||||||
mem_heap_free(heap);
|
mem_heap_free(heap);
|
||||||
|
|
||||||
|
/* We have to store new position information, modify_clock etc.,
|
||||||
|
to the cursor because it can now be on a different page, the record
|
||||||
|
under it may have been removed, etc. */
|
||||||
|
|
||||||
|
btr_pcur_store_position(cursor, mtr);
|
||||||
|
|
||||||
return(FALSE);
|
return(FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,7 +196,29 @@ If a new page is referenced in the buf_pool, and several pages
|
|||||||
of its random access area (for instance, 32 consecutive pages
|
of its random access area (for instance, 32 consecutive pages
|
||||||
in a tablespace) have recently been referenced, we may predict
|
in a tablespace) have recently been referenced, we may predict
|
||||||
that the whole area may be needed in the near future, and issue
|
that the whole area may be needed in the near future, and issue
|
||||||
the read requests for the whole area. */
|
the read requests for the whole area.
|
||||||
|
|
||||||
|
AWE implementation
|
||||||
|
------------------
|
||||||
|
|
||||||
|
By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
|
||||||
|
we mean the physical 16 kB memory area allocated from RAM for that block.
|
||||||
|
By a 'frame' we mean a 16 kB area in the virtual address space of the
|
||||||
|
process, in the frame_mem of buf_pool.
|
||||||
|
|
||||||
|
We can map pages to the frames of the buffer pool.
|
||||||
|
|
||||||
|
1) A buffer block allocated to use as a non-data page, e.g., to the lock
|
||||||
|
table, is always mapped to a frame.
|
||||||
|
2) A bufferfixed or io-fixed data page is always mapped to a frame.
|
||||||
|
3) When we need to map a block to frame, we look from the list
|
||||||
|
awe_LRU_free_mapped and try to unmap its last block, but note that
|
||||||
|
bufferfixed or io-fixed pages cannot be unmapped.
|
||||||
|
4) For every frame in the buffer pool there is always a block whose page is
|
||||||
|
mapped to it. When we create the buffer pool, we map the first elements
|
||||||
|
in the free list to the frames.
|
||||||
|
5) When we have AWE enabled, we disable adaptive hash indexes.
|
||||||
|
*/
|
||||||
|
|
||||||
buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
|
buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
|
||||||
|
|
||||||
@ -346,12 +368,15 @@ void
|
|||||||
buf_block_init(
|
buf_block_init(
|
||||||
/*===========*/
|
/*===========*/
|
||||||
buf_block_t* block, /* in: pointer to control block */
|
buf_block_t* block, /* in: pointer to control block */
|
||||||
byte* frame) /* in: pointer to buffer frame */
|
byte* frame) /* in: pointer to buffer frame, or NULL if in
|
||||||
|
the case of AWE there is no frame */
|
||||||
{
|
{
|
||||||
block->state = BUF_BLOCK_NOT_USED;
|
block->state = BUF_BLOCK_NOT_USED;
|
||||||
|
|
||||||
block->frame = frame;
|
block->frame = frame;
|
||||||
|
|
||||||
|
block->awe_info = NULL;
|
||||||
|
|
||||||
block->modify_clock = ut_dulint_zero;
|
block->modify_clock = ut_dulint_zero;
|
||||||
|
|
||||||
block->file_page_was_freed = FALSE;
|
block->file_page_was_freed = FALSE;
|
||||||
@ -364,29 +389,37 @@ buf_block_init(
|
|||||||
rw_lock_create(&(block->read_lock));
|
rw_lock_create(&(block->read_lock));
|
||||||
rw_lock_set_level(&(block->read_lock), SYNC_NO_ORDER_CHECK);
|
rw_lock_set_level(&(block->read_lock), SYNC_NO_ORDER_CHECK);
|
||||||
|
|
||||||
|
#ifdef UNIV_SYNC_DEBUG
|
||||||
rw_lock_create(&(block->debug_latch));
|
rw_lock_create(&(block->debug_latch));
|
||||||
rw_lock_set_level(&(block->debug_latch), SYNC_NO_ORDER_CHECK);
|
rw_lock_set_level(&(block->debug_latch), SYNC_NO_ORDER_CHECK);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
Creates a buffer buf_pool object. */
|
Creates the buffer pool. */
|
||||||
static
|
|
||||||
buf_pool_t*
|
buf_pool_t*
|
||||||
buf_pool_create(
|
buf_pool_init(
|
||||||
/*============*/
|
/*==========*/
|
||||||
/* out, own: buf_pool object, NULL if not
|
/* out, own: buf_pool object, NULL if not
|
||||||
enough memory */
|
enough memory or error */
|
||||||
ulint max_size, /* in: maximum size of the buf_pool in
|
ulint max_size, /* in: maximum size of the buf_pool in
|
||||||
blocks */
|
blocks */
|
||||||
ulint curr_size) /* in: current size to use, must be <=
|
ulint curr_size, /* in: current size to use, must be <=
|
||||||
max_size, currently must be equal to
|
max_size, currently must be equal to
|
||||||
max_size */
|
max_size */
|
||||||
|
ulint n_frames) /* in: number of frames; if AWE is used,
|
||||||
|
this is the size of the address space window
|
||||||
|
where physical memory pages are mapped; if
|
||||||
|
AWE is not used then this must be the same
|
||||||
|
as max_size */
|
||||||
{
|
{
|
||||||
byte* frame;
|
byte* frame;
|
||||||
ulint i;
|
ulint i;
|
||||||
buf_block_t* block;
|
buf_block_t* block;
|
||||||
|
|
||||||
ut_a(max_size == curr_size);
|
ut_a(max_size == curr_size);
|
||||||
|
ut_a(srv_use_awe || n_frames == max_size);
|
||||||
|
|
||||||
buf_pool = mem_alloc(sizeof(buf_pool_t));
|
buf_pool = mem_alloc(sizeof(buf_pool_t));
|
||||||
|
|
||||||
@ -396,8 +429,38 @@ buf_pool_create(
|
|||||||
mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
|
mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
|
||||||
|
|
||||||
mutex_enter(&(buf_pool->mutex));
|
mutex_enter(&(buf_pool->mutex));
|
||||||
|
|
||||||
buf_pool->frame_mem = ut_malloc(UNIV_PAGE_SIZE * (max_size + 1));
|
if (srv_use_awe) {
|
||||||
|
/*----------------------------------------*/
|
||||||
|
/* Allocate the virtual address space window, i.e., the
|
||||||
|
buffer pool frames */
|
||||||
|
|
||||||
|
buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
|
||||||
|
UNIV_PAGE_SIZE * (n_frames + 1));
|
||||||
|
|
||||||
|
/* Allocate the physical memory for AWE and the AWE info array
|
||||||
|
for buf_pool */
|
||||||
|
|
||||||
|
if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: physical memory must be allocated in full megabytes.\n"
|
||||||
|
"InnoDB: Trying to allocate %lu database pages.\n",
|
||||||
|
curr_size);
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
|
||||||
|
curr_size / ((1024 * 1024) / UNIV_PAGE_SIZE))) {
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
/*----------------------------------------*/
|
||||||
|
} else {
|
||||||
|
buf_pool->frame_mem = ut_malloc(
|
||||||
|
UNIV_PAGE_SIZE * (n_frames + 1));
|
||||||
|
}
|
||||||
|
|
||||||
if (buf_pool->frame_mem == NULL) {
|
if (buf_pool->frame_mem == NULL) {
|
||||||
|
|
||||||
@ -414,21 +477,60 @@ buf_pool_create(
|
|||||||
buf_pool->max_size = max_size;
|
buf_pool->max_size = max_size;
|
||||||
buf_pool->curr_size = curr_size;
|
buf_pool->curr_size = curr_size;
|
||||||
|
|
||||||
|
buf_pool->n_frames = n_frames;
|
||||||
|
|
||||||
/* Align pointer to the first frame */
|
/* Align pointer to the first frame */
|
||||||
|
|
||||||
frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
|
frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
|
||||||
buf_pool->frame_zero = frame;
|
|
||||||
|
|
||||||
|
buf_pool->frame_zero = frame;
|
||||||
buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size;
|
buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size;
|
||||||
|
|
||||||
/* Init block structs and assign frames for them */
|
if (srv_use_awe) {
|
||||||
|
/*----------------------------------------*/
|
||||||
|
/* Map an initial part of the allocated physical memory to
|
||||||
|
the window */
|
||||||
|
|
||||||
|
os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
|
||||||
|
n_frames *
|
||||||
|
(UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE),
|
||||||
|
buf_pool->awe_info);
|
||||||
|
/*----------------------------------------*/
|
||||||
|
}
|
||||||
|
|
||||||
|
buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
|
||||||
|
|
||||||
|
if (buf_pool->blocks_of_frames == NULL) {
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Init block structs and assign frames for them; in the case of
|
||||||
|
AWE there are less frames than blocks. Then we assign the frames
|
||||||
|
to the first blocks (we already mapped the memory above). We also
|
||||||
|
init the awe_info for every block. */
|
||||||
|
|
||||||
for (i = 0; i < max_size; i++) {
|
for (i = 0; i < max_size; i++) {
|
||||||
|
|
||||||
block = buf_pool_get_nth_block(buf_pool, i);
|
block = buf_pool_get_nth_block(buf_pool, i);
|
||||||
|
|
||||||
|
if (i < n_frames) {
|
||||||
|
frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
|
||||||
|
*(buf_pool->blocks_of_frames + i) = block;
|
||||||
|
} else {
|
||||||
|
frame = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
buf_block_init(block, frame);
|
buf_block_init(block, frame);
|
||||||
frame = frame + UNIV_PAGE_SIZE;
|
|
||||||
|
if (srv_use_awe) {
|
||||||
|
/*----------------------------------------*/
|
||||||
|
block->awe_info = buf_pool->awe_info
|
||||||
|
+ i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
|
||||||
|
/*----------------------------------------*/
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
buf_pool->page_hash = hash_create(2 * max_size);
|
buf_pool->page_hash = hash_create(2 * max_size);
|
||||||
|
|
||||||
buf_pool->n_pend_reads = 0;
|
buf_pool->n_pend_reads = 0;
|
||||||
@ -438,12 +540,14 @@ buf_pool_create(
|
|||||||
buf_pool->n_pages_read = 0;
|
buf_pool->n_pages_read = 0;
|
||||||
buf_pool->n_pages_written = 0;
|
buf_pool->n_pages_written = 0;
|
||||||
buf_pool->n_pages_created = 0;
|
buf_pool->n_pages_created = 0;
|
||||||
|
buf_pool->n_pages_awe_remapped = 0;
|
||||||
|
|
||||||
buf_pool->n_page_gets = 0;
|
buf_pool->n_page_gets = 0;
|
||||||
buf_pool->n_page_gets_old = 0;
|
buf_pool->n_page_gets_old = 0;
|
||||||
buf_pool->n_pages_read_old = 0;
|
buf_pool->n_pages_read_old = 0;
|
||||||
buf_pool->n_pages_written_old = 0;
|
buf_pool->n_pages_written_old = 0;
|
||||||
buf_pool->n_pages_created_old = 0;
|
buf_pool->n_pages_created_old = 0;
|
||||||
|
buf_pool->n_pages_awe_remapped_old = 0;
|
||||||
|
|
||||||
/* 2. Initialize flushing fields
|
/* 2. Initialize flushing fields
|
||||||
---------------------------- */
|
---------------------------- */
|
||||||
@ -466,40 +570,120 @@ buf_pool_create(
|
|||||||
|
|
||||||
buf_pool->LRU_old = NULL;
|
buf_pool->LRU_old = NULL;
|
||||||
|
|
||||||
|
UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
|
||||||
|
|
||||||
/* Add control blocks to the free list */
|
/* Add control blocks to the free list */
|
||||||
UT_LIST_INIT(buf_pool->free);
|
UT_LIST_INIT(buf_pool->free);
|
||||||
|
|
||||||
for (i = 0; i < curr_size; i++) {
|
for (i = 0; i < curr_size; i++) {
|
||||||
|
|
||||||
block = buf_pool_get_nth_block(buf_pool, i);
|
block = buf_pool_get_nth_block(buf_pool, i);
|
||||||
|
|
||||||
/* Wipe contents of page to eliminate a Purify warning */
|
if (block->frame) {
|
||||||
memset(block->frame, '\0', UNIV_PAGE_SIZE);
|
/* Wipe contents of frame to eliminate a Purify
|
||||||
|
warning */
|
||||||
|
|
||||||
UT_LIST_ADD_FIRST(free, buf_pool->free, block);
|
memset(block->frame, '\0', UNIV_PAGE_SIZE);
|
||||||
|
|
||||||
|
if (srv_use_awe) {
|
||||||
|
/* Add to the list of blocks mapped to
|
||||||
|
frames */
|
||||||
|
|
||||||
|
UT_LIST_ADD_LAST(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped, block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UT_LIST_ADD_LAST(free, buf_pool->free, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
|
|
||||||
btr_search_sys_create(curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64);
|
if (srv_use_adaptive_hash_indexes) {
|
||||||
|
btr_search_sys_create(
|
||||||
|
curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64);
|
||||||
|
} else {
|
||||||
|
/* Create only a small dummy system */
|
||||||
|
btr_search_sys_create(1000);
|
||||||
|
}
|
||||||
|
|
||||||
return(buf_pool);
|
return(buf_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
Initializes the buffer buf_pool of the database. */
|
Maps the page of block to a frame, if not mapped yet. Unmaps some page
|
||||||
|
from the end of the awe_LRU_free_mapped. */
|
||||||
|
|
||||||
void
|
void
|
||||||
buf_pool_init(
|
buf_awe_map_page_to_frame(
|
||||||
/*==========*/
|
/*======================*/
|
||||||
ulint max_size, /* in: maximum size of the buf_pool in blocks */
|
buf_block_t* block, /* in: block whose page should be
|
||||||
ulint curr_size) /* in: current size to use, must be <=
|
mapped to a frame */
|
||||||
max_size */
|
ibool add_to_mapped_list) /* in: TRUE if we in the case
|
||||||
|
we need to map the page should also
|
||||||
|
add the block to the
|
||||||
|
awe_LRU_free_mapped list */
|
||||||
{
|
{
|
||||||
ut_a(buf_pool == NULL);
|
buf_block_t* bck;
|
||||||
|
|
||||||
buf_pool_create(max_size, curr_size);
|
ut_ad(mutex_own(&(buf_pool->mutex)));
|
||||||
|
ut_ad(block);
|
||||||
|
|
||||||
ut_ad(buf_validate());
|
if (block->frame) {
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scan awe_LRU_free_mapped from the end and try to find a block
|
||||||
|
which is not bufferfixed or io-fixed */
|
||||||
|
|
||||||
|
bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
|
||||||
|
|
||||||
|
while (bck) {
|
||||||
|
if (bck->state == BUF_BLOCK_FILE_PAGE
|
||||||
|
&& (bck->buf_fix_count != 0 || bck->io_fix != 0)) {
|
||||||
|
|
||||||
|
/* We have to skip this */
|
||||||
|
bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
|
||||||
|
} else {
|
||||||
|
/* We can map block to the frame of bck */
|
||||||
|
|
||||||
|
os_awe_map_physical_mem_to_window(
|
||||||
|
bck->frame,
|
||||||
|
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
|
||||||
|
block->awe_info);
|
||||||
|
|
||||||
|
block->frame = bck->frame;
|
||||||
|
|
||||||
|
*(buf_pool->blocks_of_frames
|
||||||
|
+ (((ulint)(block->frame
|
||||||
|
- buf_pool->frame_zero))
|
||||||
|
>> UNIV_PAGE_SIZE_SHIFT))
|
||||||
|
= block;
|
||||||
|
|
||||||
|
bck->frame = NULL;
|
||||||
|
UT_LIST_REMOVE(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped,
|
||||||
|
bck);
|
||||||
|
|
||||||
|
if (add_to_mapped_list) {
|
||||||
|
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped,
|
||||||
|
block);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf_pool->n_pages_awe_remapped++;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
|
||||||
|
"InnoDB: awe_LRU_free_mapped list length %lu\n",
|
||||||
|
UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
|
||||||
|
|
||||||
|
ut_a(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
@ -508,7 +692,9 @@ UNIV_INLINE
|
|||||||
buf_block_t*
|
buf_block_t*
|
||||||
buf_block_alloc(void)
|
buf_block_alloc(void)
|
||||||
/*=================*/
|
/*=================*/
|
||||||
/* out, own: the allocated block */
|
/* out, own: the allocated block; also if AWE
|
||||||
|
is used it is guaranteed that the page is
|
||||||
|
mapped to a frame */
|
||||||
{
|
{
|
||||||
buf_block_t* block;
|
buf_block_t* block;
|
||||||
|
|
||||||
@ -846,6 +1032,19 @@ loop:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If AWE is enabled and the page is not mapped to a frame, then
|
||||||
|
map it */
|
||||||
|
|
||||||
|
if (block->frame == NULL) {
|
||||||
|
ut_a(srv_use_awe);
|
||||||
|
|
||||||
|
/* We set second parameter TRUE because the block is in the
|
||||||
|
LRU list and we must put it to awe_LRU_free_mapped list once
|
||||||
|
mapped to a frame */
|
||||||
|
|
||||||
|
buf_awe_map_page_to_frame(block, TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef UNIV_SYNC_DEBUG
|
#ifdef UNIV_SYNC_DEBUG
|
||||||
buf_block_buf_fix_inc_debug(block, file, line);
|
buf_block_buf_fix_inc_debug(block, file, line);
|
||||||
#else
|
#else
|
||||||
@ -940,28 +1139,27 @@ buf_page_optimistic_get_func(
|
|||||||
/*=========================*/
|
/*=========================*/
|
||||||
/* out: TRUE if success */
|
/* out: TRUE if success */
|
||||||
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
|
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
|
||||||
buf_frame_t* guess, /* in: guessed frame */
|
buf_block_t* block, /* in: guessed buffer block */
|
||||||
|
buf_frame_t* guess, /* in: guessed frame; note that AWE may move
|
||||||
|
frames */
|
||||||
dulint modify_clock,/* in: modify clock value if mode is
|
dulint modify_clock,/* in: modify clock value if mode is
|
||||||
..._GUESS_ON_CLOCK */
|
..._GUESS_ON_CLOCK */
|
||||||
char* file, /* in: file name */
|
char* file, /* in: file name */
|
||||||
ulint line, /* in: line where called */
|
ulint line, /* in: line where called */
|
||||||
mtr_t* mtr) /* in: mini-transaction */
|
mtr_t* mtr) /* in: mini-transaction */
|
||||||
{
|
{
|
||||||
buf_block_t* block;
|
|
||||||
ibool accessed;
|
ibool accessed;
|
||||||
ibool success;
|
ibool success;
|
||||||
ulint fix_type;
|
ulint fix_type;
|
||||||
|
|
||||||
ut_ad(mtr && guess);
|
ut_ad(mtr && block);
|
||||||
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
|
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
|
||||||
|
|
||||||
buf_pool->n_page_gets++;
|
|
||||||
|
|
||||||
block = buf_block_align(guess);
|
|
||||||
|
|
||||||
mutex_enter(&(buf_pool->mutex));
|
mutex_enter(&(buf_pool->mutex));
|
||||||
|
|
||||||
if (block->state != BUF_BLOCK_FILE_PAGE) {
|
/* If AWE is used, block may have a different frame now, e.g., NULL */
|
||||||
|
|
||||||
|
if (block->state != BUF_BLOCK_FILE_PAGE || block->frame != guess) {
|
||||||
|
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
|
|
||||||
@ -1054,12 +1252,15 @@ buf_page_optimistic_get_func(
|
|||||||
#ifdef UNIV_IBUF_DEBUG
|
#ifdef UNIV_IBUF_DEBUG
|
||||||
ut_a(ibuf_count_get(block->space, block->offset) == 0);
|
ut_a(ibuf_count_get(block->space, block->offset) == 0);
|
||||||
#endif
|
#endif
|
||||||
|
buf_pool->n_page_gets++;
|
||||||
|
|
||||||
return(TRUE);
|
return(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
This is used to get access to a known database page, when no waiting can be
|
This is used to get access to a known database page, when no waiting can be
|
||||||
done. */
|
done. For example, if a search in an adaptive hash index leads us to this
|
||||||
|
frame. */
|
||||||
|
|
||||||
ibool
|
ibool
|
||||||
buf_page_get_known_nowait(
|
buf_page_get_known_nowait(
|
||||||
@ -1078,13 +1279,11 @@ buf_page_get_known_nowait(
|
|||||||
|
|
||||||
ut_ad(mtr);
|
ut_ad(mtr);
|
||||||
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
|
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
|
||||||
|
|
||||||
buf_pool->n_page_gets++;
|
|
||||||
|
|
||||||
block = buf_block_align(guess);
|
|
||||||
|
|
||||||
mutex_enter(&(buf_pool->mutex));
|
mutex_enter(&(buf_pool->mutex));
|
||||||
|
|
||||||
|
block = buf_block_align(guess);
|
||||||
|
|
||||||
if (block->state == BUF_BLOCK_REMOVE_HASH) {
|
if (block->state == BUF_BLOCK_REMOVE_HASH) {
|
||||||
/* Another thread is just freeing the block from the LRU list
|
/* Another thread is just freeing the block from the LRU list
|
||||||
of the buffer pool: do not try to access this page; this
|
of the buffer pool: do not try to access this page; this
|
||||||
@ -1152,6 +1351,8 @@ buf_page_get_known_nowait(
|
|||||||
ut_a((mode == BUF_KEEP_OLD)
|
ut_a((mode == BUF_KEEP_OLD)
|
||||||
|| (ibuf_count_get(block->space, block->offset) == 0));
|
|| (ibuf_count_get(block->space, block->offset) == 0));
|
||||||
#endif
|
#endif
|
||||||
|
buf_pool->n_page_gets++;
|
||||||
|
|
||||||
return(TRUE);
|
return(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1732,7 +1933,7 @@ buf_print(void)
|
|||||||
|
|
||||||
ut_ad(buf_pool);
|
ut_ad(buf_pool);
|
||||||
|
|
||||||
size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
|
size = buf_pool->curr_size;
|
||||||
|
|
||||||
index_ids = mem_alloc(sizeof(dulint) * size);
|
index_ids = mem_alloc(sizeof(dulint) * size);
|
||||||
counts = mem_alloc(sizeof(ulint) * size);
|
counts = mem_alloc(sizeof(ulint) * size);
|
||||||
@ -1847,7 +2048,7 @@ buf_print_io(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
|
size = buf_pool->curr_size;
|
||||||
|
|
||||||
mutex_enter(&(buf_pool->mutex));
|
mutex_enter(&(buf_pool->mutex));
|
||||||
|
|
||||||
@ -1866,6 +2067,15 @@ buf_print_io(
|
|||||||
buf += sprintf(buf,
|
buf += sprintf(buf,
|
||||||
"Modified db pages %lu\n",
|
"Modified db pages %lu\n",
|
||||||
UT_LIST_GET_LEN(buf_pool->flush_list));
|
UT_LIST_GET_LEN(buf_pool->flush_list));
|
||||||
|
if (srv_use_awe) {
|
||||||
|
buf += sprintf(buf,
|
||||||
|
"AWE: Buffer pool memory frames %lu\n",
|
||||||
|
buf_pool->n_frames);
|
||||||
|
|
||||||
|
buf += sprintf(buf,
|
||||||
|
"AWE: Database pages and free buffers mapped in frames %lu\n",
|
||||||
|
UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
|
||||||
|
}
|
||||||
|
|
||||||
buf += sprintf(buf, "Pending reads %lu \n", buf_pool->n_pend_reads);
|
buf += sprintf(buf, "Pending reads %lu \n", buf_pool->n_pend_reads);
|
||||||
|
|
||||||
@ -1891,6 +2101,13 @@ buf_print_io(
|
|||||||
(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
|
(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
|
||||||
/ time_elapsed);
|
/ time_elapsed);
|
||||||
|
|
||||||
|
if (srv_use_awe) {
|
||||||
|
buf += sprintf(buf, "AWE: %.2f page remaps/s\n",
|
||||||
|
(buf_pool->n_pages_awe_remapped
|
||||||
|
- buf_pool->n_pages_awe_remapped_old)
|
||||||
|
/ time_elapsed);
|
||||||
|
}
|
||||||
|
|
||||||
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
|
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
|
||||||
buf += sprintf(buf, "Buffer pool hit rate %lu / 1000\n",
|
buf += sprintf(buf, "Buffer pool hit rate %lu / 1000\n",
|
||||||
1000
|
1000
|
||||||
@ -1906,6 +2123,7 @@ buf_print_io(
|
|||||||
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
|
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
|
||||||
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
|
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
|
||||||
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
|
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
|
||||||
|
buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
|
||||||
|
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
}
|
}
|
||||||
@ -1922,6 +2140,7 @@ buf_refresh_io_stats(void)
|
|||||||
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
|
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
|
||||||
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
|
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
|
||||||
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
|
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
|
||||||
|
buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
|
@ -24,6 +24,7 @@ Created 11/11/1995 Heikki Tuuri
|
|||||||
#include "log0log.h"
|
#include "log0log.h"
|
||||||
#include "os0file.h"
|
#include "os0file.h"
|
||||||
#include "trx0sys.h"
|
#include "trx0sys.h"
|
||||||
|
#include "srv0srv.h"
|
||||||
|
|
||||||
/* When flushed, dirty blocks are searched in neigborhoods of this size, and
|
/* When flushed, dirty blocks are searched in neigborhoods of this size, and
|
||||||
flushed along with the original page. */
|
flushed along with the original page. */
|
||||||
@ -103,7 +104,7 @@ buf_flush_ready_for_replace(
|
|||||||
/*========================*/
|
/*========================*/
|
||||||
/* out: TRUE if can replace immediately */
|
/* out: TRUE if can replace immediately */
|
||||||
buf_block_t* block) /* in: buffer control block, must be in state
|
buf_block_t* block) /* in: buffer control block, must be in state
|
||||||
BUF_BLOCK_FILE_PAGE and in the LRU list*/
|
BUF_BLOCK_FILE_PAGE and in the LRU list */
|
||||||
{
|
{
|
||||||
ut_ad(mutex_own(&(buf_pool->mutex)));
|
ut_ad(mutex_own(&(buf_pool->mutex)));
|
||||||
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
|
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
|
||||||
@ -134,7 +135,6 @@ buf_flush_ready_for_flush(
|
|||||||
|
|
||||||
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
|
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
|
||||||
&& (block->io_fix == 0)) {
|
&& (block->io_fix == 0)) {
|
||||||
|
|
||||||
if (flush_type != BUF_FLUSH_LRU) {
|
if (flush_type != BUF_FLUSH_LRU) {
|
||||||
|
|
||||||
return(TRUE);
|
return(TRUE);
|
||||||
@ -436,6 +436,20 @@ buf_flush_try_page(
|
|||||||
&& block && buf_flush_ready_for_flush(block, flush_type)) {
|
&& block && buf_flush_ready_for_flush(block, flush_type)) {
|
||||||
|
|
||||||
block->io_fix = BUF_IO_WRITE;
|
block->io_fix = BUF_IO_WRITE;
|
||||||
|
|
||||||
|
/* If AWE is enabled and the page is not mapped to a frame,
|
||||||
|
then map it */
|
||||||
|
|
||||||
|
if (block->frame == NULL) {
|
||||||
|
ut_a(srv_use_awe);
|
||||||
|
|
||||||
|
/* We set second parameter TRUE because the block is
|
||||||
|
in the LRU list and we must put it to
|
||||||
|
awe_LRU_free_mapped list once mapped to a frame */
|
||||||
|
|
||||||
|
buf_awe_map_page_to_frame(block, TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
block->flush_type = flush_type;
|
block->flush_type = flush_type;
|
||||||
|
|
||||||
if (buf_pool->n_flush[flush_type] == 0) {
|
if (buf_pool->n_flush[flush_type] == 0) {
|
||||||
@ -486,6 +500,20 @@ buf_flush_try_page(
|
|||||||
..._ready_for_flush). */
|
..._ready_for_flush). */
|
||||||
|
|
||||||
block->io_fix = BUF_IO_WRITE;
|
block->io_fix = BUF_IO_WRITE;
|
||||||
|
|
||||||
|
/* If AWE is enabled and the page is not mapped to a frame,
|
||||||
|
then map it */
|
||||||
|
|
||||||
|
if (block->frame == NULL) {
|
||||||
|
ut_a(srv_use_awe);
|
||||||
|
|
||||||
|
/* We set second parameter TRUE because the block is
|
||||||
|
in the LRU list and we must put it to
|
||||||
|
awe_LRU_free_mapped list once mapped to a frame */
|
||||||
|
|
||||||
|
buf_awe_map_page_to_frame(block, TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
block->flush_type = flush_type;
|
block->flush_type = flush_type;
|
||||||
|
|
||||||
if (buf_pool->n_flush[flush_type] == 0) {
|
if (buf_pool->n_flush[flush_type] == 0) {
|
||||||
@ -511,6 +539,20 @@ buf_flush_try_page(
|
|||||||
&& buf_flush_ready_for_flush(block, flush_type)) {
|
&& buf_flush_ready_for_flush(block, flush_type)) {
|
||||||
|
|
||||||
block->io_fix = BUF_IO_WRITE;
|
block->io_fix = BUF_IO_WRITE;
|
||||||
|
|
||||||
|
/* If AWE is enabled and the page is not mapped to a frame,
|
||||||
|
then map it */
|
||||||
|
|
||||||
|
if (block->frame == NULL) {
|
||||||
|
ut_a(srv_use_awe);
|
||||||
|
|
||||||
|
/* We set second parameter TRUE because the block is
|
||||||
|
in the LRU list and we must put it to
|
||||||
|
awe_LRU_free_mapped list once mapped to a frame */
|
||||||
|
|
||||||
|
buf_awe_map_page_to_frame(block, TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
block->flush_type = flush_type;
|
block->flush_type = flush_type;
|
||||||
|
|
||||||
if (buf_pool->n_flush[block->flush_type] == 0) {
|
if (buf_pool->n_flush[block->flush_type] == 0) {
|
||||||
|
@ -132,7 +132,13 @@ buf_LRU_search_and_free_block(
|
|||||||
|
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
|
|
||||||
btr_search_drop_page_hash_index(block->frame);
|
/* Remove possible adaptive hash index built on the
|
||||||
|
page; in the case of AWE the block may not have a
|
||||||
|
frame at all */
|
||||||
|
|
||||||
|
if (block->frame) {
|
||||||
|
btr_search_drop_page_hash_index(block->frame);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&(buf_pool->mutex));
|
mutex_enter(&(buf_pool->mutex));
|
||||||
|
|
||||||
@ -196,7 +202,9 @@ list. */
|
|||||||
buf_block_t*
|
buf_block_t*
|
||||||
buf_LRU_get_free_block(void)
|
buf_LRU_get_free_block(void)
|
||||||
/*========================*/
|
/*========================*/
|
||||||
/* out: the free control block */
|
/* out: the free control block; also if AWE is
|
||||||
|
used, it is guaranteed that the block has its
|
||||||
|
page mapped to a frame when we return */
|
||||||
{
|
{
|
||||||
buf_block_t* block = NULL;
|
buf_block_t* block = NULL;
|
||||||
ibool freed;
|
ibool freed;
|
||||||
@ -257,6 +265,22 @@ loop:
|
|||||||
|
|
||||||
block = UT_LIST_GET_FIRST(buf_pool->free);
|
block = UT_LIST_GET_FIRST(buf_pool->free);
|
||||||
UT_LIST_REMOVE(free, buf_pool->free, block);
|
UT_LIST_REMOVE(free, buf_pool->free, block);
|
||||||
|
|
||||||
|
if (srv_use_awe) {
|
||||||
|
if (block->frame) {
|
||||||
|
/* Remove from the list of mapped pages */
|
||||||
|
|
||||||
|
UT_LIST_REMOVE(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped, block);
|
||||||
|
} else {
|
||||||
|
/* We map the page to a frame; second param
|
||||||
|
FALSE below because we do not want it to be
|
||||||
|
added to the awe_LRU_free_mapped list */
|
||||||
|
|
||||||
|
buf_awe_map_page_to_frame(block, FALSE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
block->state = BUF_BLOCK_READY_FOR_USE;
|
block->state = BUF_BLOCK_READY_FOR_USE;
|
||||||
|
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
@ -429,6 +453,13 @@ buf_LRU_remove_block(
|
|||||||
/* Remove the block from the LRU list */
|
/* Remove the block from the LRU list */
|
||||||
UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
|
UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
|
||||||
|
|
||||||
|
if (srv_use_awe && block->frame) {
|
||||||
|
/* Remove from the list of mapped pages */
|
||||||
|
|
||||||
|
UT_LIST_REMOVE(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped, block);
|
||||||
|
}
|
||||||
|
|
||||||
/* If the LRU list is so short that LRU_old not defined, return */
|
/* If the LRU list is so short that LRU_old not defined, return */
|
||||||
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
|
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
|
||||||
|
|
||||||
@ -475,6 +506,13 @@ buf_LRU_add_block_to_end_low(
|
|||||||
|
|
||||||
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
|
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
|
||||||
|
|
||||||
|
if (srv_use_awe && block->frame) {
|
||||||
|
/* Add to the list of mapped pages */
|
||||||
|
|
||||||
|
UT_LIST_ADD_LAST(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped, block);
|
||||||
|
}
|
||||||
|
|
||||||
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
|
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
|
||||||
|
|
||||||
buf_pool->LRU_old_len++;
|
buf_pool->LRU_old_len++;
|
||||||
@ -518,6 +556,15 @@ buf_LRU_add_block_low(
|
|||||||
block->old = old;
|
block->old = old;
|
||||||
cl = buf_pool_clock_tic();
|
cl = buf_pool_clock_tic();
|
||||||
|
|
||||||
|
if (srv_use_awe && block->frame) {
|
||||||
|
/* Add to the list of mapped pages; for simplicity we always
|
||||||
|
add to the start, even if the user would have set 'old'
|
||||||
|
TRUE */
|
||||||
|
|
||||||
|
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped, block);
|
||||||
|
}
|
||||||
|
|
||||||
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
|
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
|
||||||
|
|
||||||
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
|
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
|
||||||
@ -613,6 +660,13 @@ buf_LRU_block_free_non_file_page(
|
|||||||
memset(block->frame, '\0', UNIV_PAGE_SIZE);
|
memset(block->frame, '\0', UNIV_PAGE_SIZE);
|
||||||
#endif
|
#endif
|
||||||
UT_LIST_ADD_FIRST(free, buf_pool->free, block);
|
UT_LIST_ADD_FIRST(free, buf_pool->free, block);
|
||||||
|
|
||||||
|
if (srv_use_awe && block->frame) {
|
||||||
|
/* Add to the list of mapped pages */
|
||||||
|
|
||||||
|
UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
|
||||||
|
buf_pool->awe_LRU_free_mapped, block);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
@ -639,7 +693,9 @@ buf_LRU_block_remove_hashed_page(
|
|||||||
|
|
||||||
buf_pool->freed_page_clock += 1;
|
buf_pool->freed_page_clock += 1;
|
||||||
|
|
||||||
buf_frame_modify_clock_inc(block->frame);
|
/* Note that if AWE is enabled the block may not have a frame at all */
|
||||||
|
|
||||||
|
buf_block_modify_clock_inc(block);
|
||||||
|
|
||||||
HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
|
HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
|
||||||
buf_page_address_fold(block->space, block->offset),
|
buf_page_address_fold(block->space, block->offset),
|
||||||
|
@ -576,7 +576,7 @@ buf_read_recv_pages(
|
|||||||
|
|
||||||
os_aio_print_debug = FALSE;
|
os_aio_print_debug = FALSE;
|
||||||
|
|
||||||
while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) {
|
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
|
||||||
|
|
||||||
os_aio_simulated_wake_handler_threads();
|
os_aio_simulated_wake_handler_threads();
|
||||||
os_thread_sleep(500000);
|
os_thread_sleep(500000);
|
||||||
|
@ -466,6 +466,9 @@ struct btr_pcur_struct{
|
|||||||
BTR_PCUR_AFTER, depending on whether
|
BTR_PCUR_AFTER, depending on whether
|
||||||
cursor was on, before, or after the
|
cursor was on, before, or after the
|
||||||
old_rec record */
|
old_rec record */
|
||||||
|
buf_block_t* block_when_stored;/* buffer block when the position was
|
||||||
|
stored; note that if AWE is on, frames
|
||||||
|
may move */
|
||||||
dulint modify_clock; /* the modify clock value of the
|
dulint modify_clock; /* the modify clock value of the
|
||||||
buffer block when the cursor position
|
buffer block when the cursor position
|
||||||
was stored */
|
was stored */
|
||||||
|
@ -30,6 +30,7 @@ Created 11/5/1995 Heikki Tuuri
|
|||||||
#include "sync0rw.h"
|
#include "sync0rw.h"
|
||||||
#include "hash0hash.h"
|
#include "hash0hash.h"
|
||||||
#include "ut0byte.h"
|
#include "ut0byte.h"
|
||||||
|
#include "os0proc.h"
|
||||||
|
|
||||||
/* Flags for flush types */
|
/* Flags for flush types */
|
||||||
#define BUF_FLUSH_LRU 1
|
#define BUF_FLUSH_LRU 1
|
||||||
@ -58,23 +59,34 @@ extern ibool buf_debug_prints;/* If this is set TRUE, the program
|
|||||||
occurs */
|
occurs */
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
Initializes the buffer pool of the database. */
|
Creates the buffer pool. */
|
||||||
|
|
||||||
void
|
buf_pool_t*
|
||||||
buf_pool_init(
|
buf_pool_init(
|
||||||
/*==========*/
|
/*==========*/
|
||||||
ulint max_size, /* in: maximum size of the pool in blocks */
|
/* out, own: buf_pool object, NULL if not
|
||||||
ulint curr_size); /* in: current size to use, must be <=
|
enough memory or error */
|
||||||
|
ulint max_size, /* in: maximum size of the buf_pool in
|
||||||
|
blocks */
|
||||||
|
ulint curr_size, /* in: current size to use, must be <=
|
||||||
|
max_size, currently must be equal to
|
||||||
max_size */
|
max_size */
|
||||||
|
ulint n_frames); /* in: number of frames; if AWE is used,
|
||||||
|
this is the size of the address space window
|
||||||
|
where physical memory pages are mapped; if
|
||||||
|
AWE is not used then this must be the same
|
||||||
|
as max_size */
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Gets the current size of buffer pool in bytes. */
|
Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
|
||||||
|
size of AWE window (= the frames). */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
ulint
|
ulint
|
||||||
buf_pool_get_curr_size(void);
|
buf_pool_get_curr_size(void);
|
||||||
/*========================*/
|
/*========================*/
|
||||||
/* out: size in bytes */
|
/* out: size in bytes */
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Gets the maximum size of buffer pool in bytes. */
|
Gets the maximum size of buffer pool in bytes. In the case of AWE, the
|
||||||
|
size of AWE window (= the frames). */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
ulint
|
ulint
|
||||||
buf_pool_get_max_size(void);
|
buf_pool_get_max_size(void);
|
||||||
@ -138,8 +150,8 @@ improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
|
|||||||
NOTE! The following macros should be used instead of
|
NOTE! The following macros should be used instead of
|
||||||
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
|
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
|
||||||
RW_X_LATCH are allowed as LA! */
|
RW_X_LATCH are allowed as LA! */
|
||||||
#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
|
#define buf_page_optimistic_get(LA, BL, G, MC, MTR) buf_page_optimistic_get_func(\
|
||||||
LA, G, MC, IB__FILE__, __LINE__, MTR)
|
LA, BL, G, MC, IB__FILE__, __LINE__, MTR)
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
This is the general function used to get optimistic access to a database
|
This is the general function used to get optimistic access to a database
|
||||||
page. */
|
page. */
|
||||||
@ -149,7 +161,9 @@ buf_page_optimistic_get_func(
|
|||||||
/*=========================*/
|
/*=========================*/
|
||||||
/* out: TRUE if success */
|
/* out: TRUE if success */
|
||||||
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
|
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
|
||||||
buf_frame_t* guess, /* in: guessed frame */
|
buf_block_t* block, /* in: guessed block */
|
||||||
|
buf_frame_t* guess, /* in: guessed frame; note that AWE may move
|
||||||
|
frames */
|
||||||
dulint modify_clock,/* in: modify clock value if mode is
|
dulint modify_clock,/* in: modify clock value if mode is
|
||||||
..._GUESS_ON_CLOCK */
|
..._GUESS_ON_CLOCK */
|
||||||
char* file, /* in: file name */
|
char* file, /* in: file name */
|
||||||
@ -350,6 +364,16 @@ buf_frame_modify_clock_inc(
|
|||||||
/* out: new value */
|
/* out: new value */
|
||||||
buf_frame_t* frame); /* in: pointer to a frame */
|
buf_frame_t* frame); /* in: pointer to a frame */
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
|
Increments the modify clock of a frame by 1. The caller must (1) own the
|
||||||
|
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
|
||||||
|
on the block. */
|
||||||
|
UNIV_INLINE
|
||||||
|
dulint
|
||||||
|
buf_block_modify_clock_inc(
|
||||||
|
/*=======================*/
|
||||||
|
/* out: new value */
|
||||||
|
buf_block_t* block); /* in: block */
|
||||||
|
/************************************************************************
|
||||||
Returns the value of the modify clock. The caller must have an s-lock
|
Returns the value of the modify clock. The caller must have an s-lock
|
||||||
or x-lock on the block. */
|
or x-lock on the block. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
@ -428,7 +452,7 @@ UNIV_INLINE
|
|||||||
buf_frame_t*
|
buf_frame_t*
|
||||||
buf_frame_align(
|
buf_frame_align(
|
||||||
/*============*/
|
/*============*/
|
||||||
/* out: pointer to block */
|
/* out: pointer to frame */
|
||||||
byte* ptr); /* in: pointer to a frame */
|
byte* ptr); /* in: pointer to a frame */
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
Checks if a pointer points to the block array of the buffer pool (blocks, not
|
Checks if a pointer points to the block array of the buffer pool (blocks, not
|
||||||
@ -505,6 +529,19 @@ buf_pool_invalidate(void);
|
|||||||
--------------------------- LOWER LEVEL ROUTINES -------------------------
|
--------------------------- LOWER LEVEL ROUTINES -------------------------
|
||||||
=========================================================================*/
|
=========================================================================*/
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
Maps the page of block to a frame, if not mapped yet. Unmaps some page
|
||||||
|
from the end of the awe_LRU_free_mapped. */
|
||||||
|
|
||||||
|
void
|
||||||
|
buf_awe_map_page_to_frame(
|
||||||
|
/*======================*/
|
||||||
|
buf_block_t* block, /* in: block whose page should be
|
||||||
|
mapped to a frame */
|
||||||
|
ibool add_to_mapped_list);/* in: TRUE if we in the case
|
||||||
|
we need to map the page should also
|
||||||
|
add the block to the
|
||||||
|
awe_LRU_free_mapped list */
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Adds latch level info for the rw-lock protecting the buffer frame. This
|
Adds latch level info for the rw-lock protecting the buffer frame. This
|
||||||
should be called in the debug version after a successful latching of a
|
should be called in the debug version after a successful latching of a
|
||||||
@ -638,7 +675,16 @@ struct buf_block_struct{
|
|||||||
byte* frame; /* pointer to buffer frame which
|
byte* frame; /* pointer to buffer frame which
|
||||||
is of size UNIV_PAGE_SIZE, and
|
is of size UNIV_PAGE_SIZE, and
|
||||||
aligned to an address divisible by
|
aligned to an address divisible by
|
||||||
UNIV_PAGE_SIZE */
|
UNIV_PAGE_SIZE; if AWE is used, this
|
||||||
|
will be NULL for the pages which are
|
||||||
|
currently not mapped into the virtual
|
||||||
|
address space window of the buffer
|
||||||
|
pool */
|
||||||
|
os_awe_t* awe_info; /* if AWE is used, then an array of
|
||||||
|
awe page infos for
|
||||||
|
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
|
||||||
|
(normally = 4) physical memory
|
||||||
|
pages; otherwise NULL */
|
||||||
ulint space; /* space id of the page */
|
ulint space; /* space id of the page */
|
||||||
ulint offset; /* page number within the space */
|
ulint offset; /* page number within the space */
|
||||||
ulint lock_hash_val; /* hashed value of the page address
|
ulint lock_hash_val; /* hashed value of the page address
|
||||||
@ -691,6 +737,10 @@ struct buf_block_struct{
|
|||||||
/* node of the free block list */
|
/* node of the free block list */
|
||||||
UT_LIST_NODE_T(buf_block_t) LRU;
|
UT_LIST_NODE_T(buf_block_t) LRU;
|
||||||
/* node of the LRU list */
|
/* node of the LRU list */
|
||||||
|
UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
|
||||||
|
/* in the AWE version node in the
|
||||||
|
list of free and LRU blocks which are
|
||||||
|
mapped to a frame */
|
||||||
ulint LRU_position; /* value which monotonically
|
ulint LRU_position; /* value which monotonically
|
||||||
decreases (or may stay constant if
|
decreases (or may stay constant if
|
||||||
the block is in the old blocks) toward
|
the block is in the old blocks) toward
|
||||||
@ -758,11 +808,12 @@ struct buf_block_struct{
|
|||||||
BTR_SEARCH_RIGHT_SIDE in hash
|
BTR_SEARCH_RIGHT_SIDE in hash
|
||||||
indexing */
|
indexing */
|
||||||
/* 6. Debug fields */
|
/* 6. Debug fields */
|
||||||
|
#ifdef UNIV_SYNC_DEBUG
|
||||||
rw_lock_t debug_latch; /* in the debug version, each thread
|
rw_lock_t debug_latch; /* in the debug version, each thread
|
||||||
which bufferfixes the block acquires
|
which bufferfixes the block acquires
|
||||||
an s-latch here; so we can use the
|
an s-latch here; so we can use the
|
||||||
debug utilities in sync0rw */
|
debug utilities in sync0rw */
|
||||||
|
#endif
|
||||||
ibool file_page_was_freed;
|
ibool file_page_was_freed;
|
||||||
/* this is set to TRUE when fsp
|
/* this is set to TRUE when fsp
|
||||||
frees a page in buffer pool */
|
frees a page in buffer pool */
|
||||||
@ -781,16 +832,36 @@ struct buf_pool_struct{
|
|||||||
struct and control blocks, except the
|
struct and control blocks, except the
|
||||||
read-write lock in them */
|
read-write lock in them */
|
||||||
byte* frame_mem; /* pointer to the memory area which
|
byte* frame_mem; /* pointer to the memory area which
|
||||||
was allocated for the frames */
|
was allocated for the frames; in AWE
|
||||||
|
this is the virtual address space
|
||||||
|
window where we map pages stored
|
||||||
|
in physical memory */
|
||||||
byte* frame_zero; /* pointer to the first buffer frame:
|
byte* frame_zero; /* pointer to the first buffer frame:
|
||||||
this may differ from frame_mem, because
|
this may differ from frame_mem, because
|
||||||
this is aligned by the frame size */
|
this is aligned by the frame size */
|
||||||
byte* high_end; /* pointer to the end of the
|
byte* high_end; /* pointer to the end of the buffer
|
||||||
buffer pool */
|
frames */
|
||||||
|
ulint n_frames; /* number of frames */
|
||||||
buf_block_t* blocks; /* array of buffer control blocks */
|
buf_block_t* blocks; /* array of buffer control blocks */
|
||||||
|
buf_block_t** blocks_of_frames;/* inverse mapping which can be used
|
||||||
|
to retrieve the buffer control block
|
||||||
|
of a frame; this is an array which
|
||||||
|
lists the blocks of frames in the
|
||||||
|
order frame_zero,
|
||||||
|
frame_zero + UNIV_PAGE_SIZE, ...
|
||||||
|
a control block is always assigned
|
||||||
|
for each frame, even if the frame does
|
||||||
|
not contain any data; note that in AWE
|
||||||
|
there are more control blocks than
|
||||||
|
buffer frames */
|
||||||
|
os_awe_t* awe_info; /* if AWE is used, AWE info for the
|
||||||
|
physical 4 kB memory pages associated
|
||||||
|
with buffer frames */
|
||||||
ulint max_size; /* number of control blocks ==
|
ulint max_size; /* number of control blocks ==
|
||||||
maximum pool size in pages */
|
maximum pool size in pages */
|
||||||
ulint curr_size; /* current pool size in pages */
|
ulint curr_size; /* current pool size in pages;
|
||||||
|
currently always the same as
|
||||||
|
max_size */
|
||||||
hash_table_t* page_hash; /* hash table of the file pages */
|
hash_table_t* page_hash; /* hash table of the file pages */
|
||||||
|
|
||||||
ulint n_pend_reads; /* number of pending read operations */
|
ulint n_pend_reads; /* number of pending read operations */
|
||||||
@ -802,11 +873,14 @@ struct buf_pool_struct{
|
|||||||
ulint n_pages_created;/* number of pages created in the pool
|
ulint n_pages_created;/* number of pages created in the pool
|
||||||
with no read */
|
with no read */
|
||||||
ulint n_page_gets; /* number of page gets performed;
|
ulint n_page_gets; /* number of page gets performed;
|
||||||
also successful seraches through
|
also successful searches through
|
||||||
the adaptive hash index are
|
the adaptive hash index are
|
||||||
counted as page gets; this field
|
counted as page gets; this field
|
||||||
is NOT protected by the buffer
|
is NOT protected by the buffer
|
||||||
pool mutex */
|
pool mutex */
|
||||||
|
ulint n_pages_awe_remapped; /* if AWE is enabled, the
|
||||||
|
number of remaps of blocks to
|
||||||
|
buffer frames */
|
||||||
ulint n_page_gets_old;/* n_page_gets when buf_print was
|
ulint n_page_gets_old;/* n_page_gets when buf_print was
|
||||||
last time called: used to calculate
|
last time called: used to calculate
|
||||||
hit rate */
|
hit rate */
|
||||||
@ -815,6 +889,7 @@ struct buf_pool_struct{
|
|||||||
ulint n_pages_written_old;/* number write operations */
|
ulint n_pages_written_old;/* number write operations */
|
||||||
ulint n_pages_created_old;/* number of pages created in
|
ulint n_pages_created_old;/* number of pages created in
|
||||||
the pool with no read */
|
the pool with no read */
|
||||||
|
ulint n_pages_awe_remapped_old;
|
||||||
/* 2. Page flushing algorithm fields */
|
/* 2. Page flushing algorithm fields */
|
||||||
|
|
||||||
UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
|
UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
|
||||||
@ -847,7 +922,10 @@ struct buf_pool_struct{
|
|||||||
/* 3. LRU replacement algorithm fields */
|
/* 3. LRU replacement algorithm fields */
|
||||||
|
|
||||||
UT_LIST_BASE_NODE_T(buf_block_t) free;
|
UT_LIST_BASE_NODE_T(buf_block_t) free;
|
||||||
/* base node of the free block list */
|
/* base node of the free block list;
|
||||||
|
in the case of AWE, at the start are
|
||||||
|
always free blocks for which the
|
||||||
|
physical memory is mapped to a frame */
|
||||||
UT_LIST_BASE_NODE_T(buf_block_t) LRU;
|
UT_LIST_BASE_NODE_T(buf_block_t) LRU;
|
||||||
/* base node of the LRU list */
|
/* base node of the LRU list */
|
||||||
buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
|
buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
|
||||||
@ -859,6 +937,12 @@ struct buf_pool_struct{
|
|||||||
see buf0lru.c for the restrictions
|
see buf0lru.c for the restrictions
|
||||||
on this value; not defined if
|
on this value; not defined if
|
||||||
LRU_old == NULL */
|
LRU_old == NULL */
|
||||||
|
UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
|
||||||
|
/* list of those blocks which are
|
||||||
|
in the LRU list or the free list, and
|
||||||
|
where the page is mapped to a frame;
|
||||||
|
thus, frames allocated, e.g., to the
|
||||||
|
locki table, are not in this list */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* States of a control block */
|
/* States of a control block */
|
||||||
|
@ -36,25 +36,27 @@ buf_block_peek_if_too_old(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Gets the current size of buffer buf_pool in bytes. */
|
Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
|
||||||
|
size of AWE window (= the frames). */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
ulint
|
ulint
|
||||||
buf_pool_get_curr_size(void)
|
buf_pool_get_curr_size(void)
|
||||||
/*========================*/
|
/*========================*/
|
||||||
/* out: size in bytes */
|
/* out: size in bytes */
|
||||||
{
|
{
|
||||||
return((buf_pool->curr_size) * UNIV_PAGE_SIZE);
|
return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Gets the maximum size of buffer buf_pool in bytes. */
|
Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
|
||||||
|
size of AWE window (= the frames). */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
ulint
|
ulint
|
||||||
buf_pool_get_max_size(void)
|
buf_pool_get_max_size(void)
|
||||||
/*=======================*/
|
/*=======================*/
|
||||||
/* out: size in bytes */
|
/* out: size in bytes */
|
||||||
{
|
{
|
||||||
return((buf_pool->max_size) * UNIV_PAGE_SIZE);
|
return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
@ -207,54 +209,24 @@ buf_block_align(
|
|||||||
|
|
||||||
frame_zero = buf_pool->frame_zero;
|
frame_zero = buf_pool->frame_zero;
|
||||||
|
|
||||||
ut_ad((ulint)ptr >= (ulint)frame_zero);
|
if ((ulint)ptr < (ulint)frame_zero
|
||||||
|
|| (ulint)ptr > (ulint)(buf_pool->high_end)) {
|
||||||
block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
|
|
||||||
>> UNIV_PAGE_SIZE_SHIFT);
|
|
||||||
if (block < buf_pool->blocks
|
|
||||||
|| block >= buf_pool->blocks + buf_pool->max_size) {
|
|
||||||
|
|
||||||
|
ut_print_timestamp(stderr);
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Error: trying to access a stray pointer %lx\n"
|
" InnoDB: Error: trying to access a stray pointer %lx\n"
|
||||||
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
|
"InnoDB: buf pool start is at %lx, end at %lx\n"
|
||||||
(ulint)frame_zero, buf_pool->max_size);
|
"InnoDB: Probable reason is database corruption or memory\n"
|
||||||
|
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
|
||||||
|
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
|
||||||
|
"InnoDB: how to force recovery.\n",
|
||||||
|
(ulint)ptr, (ulint)frame_zero,
|
||||||
|
(ulint)(buf_pool->high_end));
|
||||||
ut_a(0);
|
ut_a(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return(block);
|
block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
|
||||||
}
|
>> UNIV_PAGE_SIZE_SHIFT));
|
||||||
|
|
||||||
/***********************************************************************
|
|
||||||
Gets the block to whose frame the pointer is pointing to. Does not
|
|
||||||
require a file page to be bufferfixed. */
|
|
||||||
UNIV_INLINE
|
|
||||||
buf_block_t*
|
|
||||||
buf_block_align_low(
|
|
||||||
/*================*/
|
|
||||||
/* out: pointer to block */
|
|
||||||
byte* ptr) /* in: pointer to a frame */
|
|
||||||
{
|
|
||||||
buf_block_t* block;
|
|
||||||
buf_frame_t* frame_zero;
|
|
||||||
|
|
||||||
ut_ad(ptr);
|
|
||||||
|
|
||||||
frame_zero = buf_pool->frame_zero;
|
|
||||||
|
|
||||||
ut_ad((ulint)ptr >= (ulint)frame_zero);
|
|
||||||
|
|
||||||
block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
|
|
||||||
>> UNIV_PAGE_SIZE_SHIFT);
|
|
||||||
if (block < buf_pool->blocks
|
|
||||||
|| block >= buf_pool->blocks + buf_pool->max_size) {
|
|
||||||
|
|
||||||
fprintf(stderr,
|
|
||||||
"InnoDB: Error: trying to access a stray pointer %lx\n"
|
|
||||||
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
|
|
||||||
(ulint)frame_zero, buf_pool->max_size);
|
|
||||||
ut_a(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return(block);
|
return(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -264,7 +236,7 @@ UNIV_INLINE
|
|||||||
buf_frame_t*
|
buf_frame_t*
|
||||||
buf_frame_align(
|
buf_frame_align(
|
||||||
/*============*/
|
/*============*/
|
||||||
/* out: pointer to block */
|
/* out: pointer to frame */
|
||||||
byte* ptr) /* in: pointer to a frame */
|
byte* ptr) /* in: pointer to a frame */
|
||||||
{
|
{
|
||||||
buf_frame_t* frame;
|
buf_frame_t* frame;
|
||||||
@ -273,14 +245,19 @@ buf_frame_align(
|
|||||||
|
|
||||||
frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
|
frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
|
||||||
|
|
||||||
if (((ulint)frame
|
if (((ulint)frame < (ulint)(buf_pool->frame_zero))
|
||||||
< (ulint)(buf_pool->frame_zero))
|
|| (ulint)frame >= (ulint)(buf_pool->high_end)) {
|
||||||
|| ((ulint)frame > (ulint)(buf_pool_get_nth_block(buf_pool,
|
|
||||||
buf_pool->max_size - 1)->frame))) {
|
ut_print_timestamp(stderr);
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Error: trying to access a stray pointer %lx\n"
|
" InnoDB: Error: trying to access a stray pointer %lx\n"
|
||||||
"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
|
"InnoDB: buf pool start is at %lx, end at %lx\n"
|
||||||
(ulint)(buf_pool->frame_zero), buf_pool->max_size);
|
"InnoDB: Probable reason is database corruption or memory\n"
|
||||||
|
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
|
||||||
|
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
|
||||||
|
"InnoDB: how to force recovery.\n",
|
||||||
|
(ulint)ptr, (ulint)(buf_pool->frame_zero),
|
||||||
|
(ulint)(buf_pool->high_end));
|
||||||
ut_a(0);
|
ut_a(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -469,7 +446,7 @@ buf_frame_modify_clock_inc(
|
|||||||
|
|
||||||
ut_ad(frame);
|
ut_ad(frame);
|
||||||
|
|
||||||
block = buf_block_align_low(frame);
|
block = buf_block_align(frame);
|
||||||
|
|
||||||
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|
||||||
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
|
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
|
||||||
@ -479,6 +456,25 @@ buf_frame_modify_clock_inc(
|
|||||||
return(block->modify_clock);
|
return(block->modify_clock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
Increments the modify clock of a frame by 1. The caller must (1) own the
|
||||||
|
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
|
||||||
|
on the block. */
|
||||||
|
UNIV_INLINE
|
||||||
|
dulint
|
||||||
|
buf_block_modify_clock_inc(
|
||||||
|
/*=======================*/
|
||||||
|
/* out: new value */
|
||||||
|
buf_block_t* block) /* in: block */
|
||||||
|
{
|
||||||
|
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|
||||||
|
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
|
||||||
|
|
||||||
|
UT_DULINT_INC(block->modify_clock);
|
||||||
|
|
||||||
|
return(block->modify_clock);
|
||||||
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
Returns the value of the modify clock. The caller must have an s-lock
|
Returns the value of the modify clock. The caller must have an s-lock
|
||||||
or x-lock on the block. */
|
or x-lock on the block. */
|
||||||
@ -508,15 +504,16 @@ void
|
|||||||
buf_block_buf_fix_inc_debug(
|
buf_block_buf_fix_inc_debug(
|
||||||
/*========================*/
|
/*========================*/
|
||||||
buf_block_t* block, /* in: block to bufferfix */
|
buf_block_t* block, /* in: block to bufferfix */
|
||||||
char* file, /* in: file name */
|
char* file __attribute__ ((unused)), /* in: file name */
|
||||||
ulint line) /* in: line */
|
ulint line __attribute__ ((unused))) /* in: line */
|
||||||
{
|
{
|
||||||
|
#ifdef UNIV_SYNC_DEBUG
|
||||||
ibool ret;
|
ibool ret;
|
||||||
|
|
||||||
ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
|
ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
|
||||||
|
|
||||||
ut_ad(ret == TRUE);
|
ut_ad(ret == TRUE);
|
||||||
|
#endif
|
||||||
block->buf_fix_count++;
|
block->buf_fix_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,7 +53,9 @@ LRU list to the free list. */
|
|||||||
buf_block_t*
|
buf_block_t*
|
||||||
buf_LRU_get_free_block(void);
|
buf_LRU_get_free_block(void);
|
||||||
/*=========================*/
|
/*=========================*/
|
||||||
/* out: the free control block */
|
/* out: the free control block; also if AWE is
|
||||||
|
used, it is guaranteed that the block has its
|
||||||
|
page mapped to a frame when we return */
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
Puts a block back to the free list. */
|
Puts a block back to the free list. */
|
||||||
|
|
||||||
|
@ -355,12 +355,7 @@ in the debug version: spaces with an odd number as the id are replicate
|
|||||||
spaces */
|
spaces */
|
||||||
#define RECV_REPLICA_SPACE_ADD 1
|
#define RECV_REPLICA_SPACE_ADD 1
|
||||||
|
|
||||||
/* This many blocks must be left free in the buffer pool when we scan
|
extern ulint recv_n_pool_free_frames;
|
||||||
the log and store the scanned log records in the buffer pool: we will
|
|
||||||
use these free blocks to read in pages when we start applying the
|
|
||||||
log records to the database. */
|
|
||||||
|
|
||||||
#define RECV_POOL_N_FREE_BLOCKS (ut_min(256, buf_pool_get_curr_size() / 8))
|
|
||||||
|
|
||||||
#ifndef UNIV_NONINL
|
#ifndef UNIV_NONINL
|
||||||
#include "log0recv.ic"
|
#include "log0recv.ic"
|
||||||
|
@ -15,6 +15,76 @@ Created 9/30/1995 Heikki Tuuri
|
|||||||
typedef void* os_process_t;
|
typedef void* os_process_t;
|
||||||
typedef unsigned long int os_process_id_t;
|
typedef unsigned long int os_process_id_t;
|
||||||
|
|
||||||
|
/* The cell type in os_awe_allocate_mem page info */
|
||||||
|
#ifdef __NT__
|
||||||
|
typedef ULONG_PTR os_awe_t;
|
||||||
|
#else
|
||||||
|
typedef ulint os_awe_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Physical page size when Windows AWE is used. This is the normal
|
||||||
|
page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
|
||||||
|
pages. */
|
||||||
|
#define OS_AWE_X86_PAGE_SIZE 4096
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
|
||||||
|
the current process so that the current process can allocate memory-locked
|
||||||
|
virtual address space to act as the window where AWE maps physical memory. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
os_awe_enable_lock_pages_in_mem(void);
|
||||||
|
/*=================================*/
|
||||||
|
/* out: TRUE if success, FALSE if error;
|
||||||
|
prints error info to stderr if no success */
|
||||||
|
/********************************************************************
|
||||||
|
Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
|
||||||
|
processor. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
os_awe_allocate_physical_mem(
|
||||||
|
/*=========================*/
|
||||||
|
/* out: TRUE if success */
|
||||||
|
os_awe_t** page_info, /* out, own: array of opaque data containing
|
||||||
|
the info for allocated physical memory pages;
|
||||||
|
each allocated 4 kB physical memory page has
|
||||||
|
one slot of type os_awe_t in the array */
|
||||||
|
ulint n_megabytes); /* in: number of megabytes to allocate */
|
||||||
|
/********************************************************************
|
||||||
|
Allocates a window in the virtual address space where we can map then
|
||||||
|
pages of physical memory. */
|
||||||
|
|
||||||
|
byte*
|
||||||
|
os_awe_allocate_virtual_mem_window(
|
||||||
|
/*===============================*/
|
||||||
|
/* out, own: allocated memory, or NULL if did not
|
||||||
|
succeed */
|
||||||
|
ulint size); /* in: virtual memory allocation size in bytes, must
|
||||||
|
be < 2 GB */
|
||||||
|
/********************************************************************
|
||||||
|
With this function you can map parts of physical memory allocated with
|
||||||
|
the ..._allocate_physical_mem to the virtual address space allocated with
|
||||||
|
the previous function. Intel implements this so that the process page
|
||||||
|
tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
|
||||||
|
showed that this takes < 1 microsecond, much better than the estimated 80 us
|
||||||
|
for copying a 16 kB page memory to memory. But, the operation will at least
|
||||||
|
partially invalidate the translation lookaside buffer (TLB) of all
|
||||||
|
processors. Under a real-world load the performance hit may be bigger. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
os_awe_map_physical_mem_to_window(
|
||||||
|
/*==============================*/
|
||||||
|
/* out: TRUE if success; the function
|
||||||
|
calls exit(1) in case of an error */
|
||||||
|
byte* ptr, /* in: a page-aligned pointer to
|
||||||
|
somewhere in the virtual address
|
||||||
|
space window; we map the physical mem
|
||||||
|
pages here */
|
||||||
|
ulint n_mem_pages, /* in: number of 4 kB mem pages to
|
||||||
|
map */
|
||||||
|
os_awe_t* page_info); /* in: array of page infos for those
|
||||||
|
pages; each page has one slot in the
|
||||||
|
array */
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
Converts the current process id to a number. It is not guaranteed that the
|
Converts the current process id to a number. It is not guaranteed that the
|
||||||
number is unique. In Linux returns the 'process number' of the current
|
number is unique. In Linux returns the 'process number' of the current
|
||||||
|
@ -61,6 +61,7 @@ extern ulint srv_flush_log_at_trx_commit;
|
|||||||
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
|
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
|
||||||
character set */
|
character set */
|
||||||
extern ulint srv_pool_size;
|
extern ulint srv_pool_size;
|
||||||
|
extern ulint srv_awe_window_size;
|
||||||
extern ulint srv_mem_pool_size;
|
extern ulint srv_mem_pool_size;
|
||||||
extern ulint srv_lock_table_size;
|
extern ulint srv_lock_table_size;
|
||||||
|
|
||||||
@ -86,6 +87,8 @@ extern ibool srv_use_doublewrite_buf;
|
|||||||
extern ibool srv_set_thread_priorities;
|
extern ibool srv_set_thread_priorities;
|
||||||
extern int srv_query_thread_priority;
|
extern int srv_query_thread_priority;
|
||||||
|
|
||||||
|
extern ibool srv_use_awe;
|
||||||
|
extern ibool srv_use_adaptive_hash_indexes;
|
||||||
/*-------------------------------------------*/
|
/*-------------------------------------------*/
|
||||||
|
|
||||||
extern ulint srv_n_rows_inserted;
|
extern ulint srv_n_rows_inserted;
|
||||||
|
@ -437,25 +437,29 @@ log_group_calc_lsn_offset(
|
|||||||
dulint lsn, /* in: lsn, must be within 4 GB of group->lsn */
|
dulint lsn, /* in: lsn, must be within 4 GB of group->lsn */
|
||||||
log_group_t* group) /* in: log group */
|
log_group_t* group) /* in: log group */
|
||||||
{
|
{
|
||||||
dulint gr_lsn;
|
dulint gr_lsn;
|
||||||
ulint gr_lsn_size_offset;
|
ib_longlong gr_lsn_size_offset;
|
||||||
ulint difference;
|
ib_longlong difference;
|
||||||
ulint group_size;
|
ib_longlong group_size;
|
||||||
ulint offset;
|
ib_longlong offset;
|
||||||
|
|
||||||
ut_ad(mutex_own(&(log_sys->mutex)));
|
ut_ad(mutex_own(&(log_sys->mutex)));
|
||||||
|
|
||||||
|
/* If total log file size is > 2 GB we can easily get overflows
|
||||||
|
with 32-bit integers. Use 64-bit integers instead. */
|
||||||
|
|
||||||
gr_lsn = group->lsn;
|
gr_lsn = group->lsn;
|
||||||
|
|
||||||
gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset,
|
gr_lsn_size_offset = (ib_longlong)
|
||||||
group);
|
log_group_calc_size_offset(group->lsn_offset, group);
|
||||||
group_size = log_group_get_capacity(group);
|
|
||||||
|
group_size = (ib_longlong) log_group_get_capacity(group);
|
||||||
|
|
||||||
if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
|
if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
|
||||||
|
|
||||||
difference = ut_dulint_minus(lsn, gr_lsn);
|
difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn);
|
||||||
} else {
|
} else {
|
||||||
difference = ut_dulint_minus(gr_lsn, lsn);
|
difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn);
|
||||||
|
|
||||||
difference = difference % group_size;
|
difference = difference % group_size;
|
||||||
|
|
||||||
@ -464,7 +468,13 @@ log_group_calc_lsn_offset(
|
|||||||
|
|
||||||
offset = (gr_lsn_size_offset + difference) % group_size;
|
offset = (gr_lsn_size_offset + difference) % group_size;
|
||||||
|
|
||||||
return(log_group_calc_real_offset(offset, group));
|
ut_a(offset <= 0xFFFFFFFF);
|
||||||
|
|
||||||
|
/* printf("Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
|
||||||
|
(ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
|
||||||
|
*/
|
||||||
|
|
||||||
|
return(log_group_calc_real_offset((ulint)offset, group));
|
||||||
}
|
}
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
@ -3054,8 +3064,8 @@ log_check_log_recs(
|
|||||||
ut_memcpy(scan_buf, start, end - start);
|
ut_memcpy(scan_buf, start, end - start);
|
||||||
|
|
||||||
recv_scan_log_recs(TRUE,
|
recv_scan_log_recs(TRUE,
|
||||||
buf_pool_get_curr_size() -
|
(buf_pool->n_frames -
|
||||||
RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
|
recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
|
||||||
FALSE, scan_buf, end - start,
|
FALSE, scan_buf, end - start,
|
||||||
ut_dulint_align_down(buf_start_lsn,
|
ut_dulint_align_down(buf_start_lsn,
|
||||||
OS_FILE_LOG_BLOCK_SIZE),
|
OS_FILE_LOG_BLOCK_SIZE),
|
||||||
|
@ -71,6 +71,14 @@ ulint recv_previous_parsed_rec_is_multi = 0;
|
|||||||
|
|
||||||
ulint recv_max_parsed_page_no = 0;
|
ulint recv_max_parsed_page_no = 0;
|
||||||
|
|
||||||
|
/* This many frames must be left free in the buffer pool when we scan
|
||||||
|
the log and store the scanned log records in the buffer pool: we will
|
||||||
|
use these free frames to read in pages when we start applying the
|
||||||
|
log records to the database. */
|
||||||
|
|
||||||
|
ulint recv_n_pool_free_frames = 256;
|
||||||
|
|
||||||
|
|
||||||
/************************************************************
|
/************************************************************
|
||||||
Creates the recovery system. */
|
Creates the recovery system. */
|
||||||
|
|
||||||
@ -1018,10 +1026,10 @@ recv_recover_page(
|
|||||||
block = buf_block_align(page);
|
block = buf_block_align(page);
|
||||||
|
|
||||||
if (just_read_in) {
|
if (just_read_in) {
|
||||||
/* Move the ownership of the x-latch on the page to this OS
|
/* Move the ownership of the x-latch on the page to
|
||||||
thread, so that we can acquire a second x-latch on it. This
|
this OS thread, so that we can acquire a second
|
||||||
is needed for the operations to the page to pass the debug
|
x-latch on it. This is needed for the operations to
|
||||||
checks. */
|
the page to pass the debug checks. */
|
||||||
|
|
||||||
rw_lock_x_lock_move_ownership(&(block->lock));
|
rw_lock_x_lock_move_ownership(&(block->lock));
|
||||||
}
|
}
|
||||||
@ -2362,8 +2370,8 @@ recv_group_scan_log_recs(
|
|||||||
group, start_lsn, end_lsn);
|
group, start_lsn, end_lsn);
|
||||||
|
|
||||||
finished = recv_scan_log_recs(TRUE,
|
finished = recv_scan_log_recs(TRUE,
|
||||||
buf_pool_get_curr_size()
|
(buf_pool->n_frames
|
||||||
- RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
|
- recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
|
||||||
TRUE, log_sys->buf,
|
TRUE, log_sys->buf,
|
||||||
RECV_SCAN_SIZE, start_lsn,
|
RECV_SCAN_SIZE, start_lsn,
|
||||||
contiguous_lsn, group_scanned_lsn);
|
contiguous_lsn, group_scanned_lsn);
|
||||||
@ -3001,8 +3009,8 @@ ask_again:
|
|||||||
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
|
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
|
||||||
|
|
||||||
ret = recv_scan_log_recs(TRUE,
|
ret = recv_scan_log_recs(TRUE,
|
||||||
buf_pool_get_curr_size() -
|
(buf_pool->n_frames -
|
||||||
RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
|
recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
|
||||||
TRUE, buf, len, start_lsn,
|
TRUE, buf, len, start_lsn,
|
||||||
&dummy_lsn, &scanned_lsn);
|
&dummy_lsn, &scanned_lsn);
|
||||||
|
|
||||||
|
@ -2127,7 +2127,7 @@ os_aio_simulated_handle(
|
|||||||
ulint offs;
|
ulint offs;
|
||||||
ulint lowest_offset;
|
ulint lowest_offset;
|
||||||
byte* combined_buf;
|
byte* combined_buf;
|
||||||
byte* combined_buf2= 0; /* Remove warning */
|
byte* combined_buf2;
|
||||||
ibool ret;
|
ibool ret;
|
||||||
ulint n;
|
ulint n;
|
||||||
ulint i;
|
ulint i;
|
||||||
|
@ -12,11 +12,469 @@ Created 9/30/1995 Heikki Tuuri
|
|||||||
#include "os0proc.ic"
|
#include "os0proc.ic"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __WIN__
|
#include "ut0mem.h"
|
||||||
#include <windows.h>
|
#include "ut0byte.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
How to get AWE to compile on Windows?
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
the Visual C++ has to be relatively recent and _WIN32_WINNT has to be
|
||||||
|
defined to a value >= 0x0500 when windows.h is included. An easy way
|
||||||
|
to accomplish that is to put
|
||||||
|
|
||||||
|
#define _WIN32_WINNT 0x0500
|
||||||
|
|
||||||
|
to the start of file \mysql\include\config-win.h
|
||||||
|
|
||||||
|
Where does AWE work?
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
See the error message in os_awe_allocate_physical_mem().
|
||||||
|
|
||||||
|
How to assign privileges for mysqld to use AWE?
|
||||||
|
-----------------------------------------------
|
||||||
|
|
||||||
|
See the error message in os_awe_enable_lock_pages_in_mem().
|
||||||
|
|
||||||
|
Use Windows AWE functions in this order
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
(1) os_awe_enable_lock_pages_in_mem();
|
||||||
|
(2) os_awe_allocate_physical_mem();
|
||||||
|
(3) os_awe_allocate_virtual_mem_window();
|
||||||
|
(4) os_awe_map_physical_mem_to_window().
|
||||||
|
|
||||||
|
To test 'AWE' in a computer which does not have the AWE API,
|
||||||
|
you can compile with UNIV_SIMULATE_AWE defined in this file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef UNIV_SIMULATE_AWE
|
||||||
|
/* If we simulate AWE, we allocate the 'physical memory' here */
|
||||||
|
byte* os_awe_simulate_mem;
|
||||||
|
ulint os_awe_simulate_mem_size;
|
||||||
|
os_awe_t* os_awe_simulate_page_info;
|
||||||
|
byte* os_awe_simulate_window;
|
||||||
|
ulint os_awe_simulate_window_size;
|
||||||
|
/* In simulated AWE the following contains a NULL pointer or a pointer
|
||||||
|
to a mapped 'physical page' for each 4 kB page in the AWE window */
|
||||||
|
byte** os_awe_simulate_map;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "ut0mem.h"
|
#ifdef __NT__
|
||||||
|
os_awe_t* os_awe_page_info;
|
||||||
|
ulint os_awe_n_pages;
|
||||||
|
byte* os_awe_window;
|
||||||
|
ulint os_awe_window_size;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
|
||||||
|
the current process so that the current process can allocate memory-locked
|
||||||
|
virtual address space to act as the window where AWE maps physical memory. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
os_awe_enable_lock_pages_in_mem(void)
|
||||||
|
/*=================================*/
|
||||||
|
/* out: TRUE if success, FALSE if error;
|
||||||
|
prints error info to stderr if no success */
|
||||||
|
{
|
||||||
|
#ifdef UNIV_SIMULATE_AWE
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
|
||||||
|
#elif defined(__NT__)
|
||||||
|
struct {
|
||||||
|
DWORD Count;
|
||||||
|
LUID_AND_ATTRIBUTES Privilege[1];
|
||||||
|
} Info;
|
||||||
|
HANDLE hProcess;
|
||||||
|
HANDLE Token;
|
||||||
|
BOOL Result;
|
||||||
|
|
||||||
|
hProcess = GetCurrentProcess();
|
||||||
|
|
||||||
|
/* Open the token of the current process */
|
||||||
|
|
||||||
|
Result = OpenProcessToken(hProcess,
|
||||||
|
TOKEN_ADJUST_PRIVILEGES,
|
||||||
|
&Token);
|
||||||
|
if (Result != TRUE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Cannot open process token, error %lu\n",
|
||||||
|
(ulint)GetLastError());
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
Info.Count = 1;
|
||||||
|
|
||||||
|
Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||||
|
|
||||||
|
/* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
|
||||||
|
privilege */
|
||||||
|
|
||||||
|
Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
|
||||||
|
&(Info.Privilege[0].Luid));
|
||||||
|
if (Result != TRUE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Cannot get local privilege value for %s, error %lu.\n",
|
||||||
|
SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to adjust the privilege */
|
||||||
|
|
||||||
|
Result = AdjustTokenPrivileges(Token, FALSE,
|
||||||
|
(PTOKEN_PRIVILEGES)&Info,
|
||||||
|
0, NULL, NULL);
|
||||||
|
/* Check the result */
|
||||||
|
|
||||||
|
if (Result != TRUE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Cannot adjust process token privileges, error %u.\n",
|
||||||
|
GetLastError());
|
||||||
|
return(FALSE);
|
||||||
|
} else if (GetLastError() != ERROR_SUCCESS) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege, error %lu.\n"
|
||||||
|
"InnoDB: In Windows XP Home you cannot use AWE. In Windows 2000 and XP\n"
|
||||||
|
"InnoDB: Professional you must go to the Control Panel, to\n"
|
||||||
|
"InnoDB: Security Settings, to Local Policies, and enable\n"
|
||||||
|
"InnoDB: the 'lock pages in memory' privilege for the user who runs\n"
|
||||||
|
"InnoDB: the MySQL server.\n", GetLastError());
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
CloseHandle(Token);
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
#else
|
||||||
|
#ifdef __WIN__
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: to use AWE you must use a ...-nt MySQL executable.\n");
|
||||||
|
#endif
|
||||||
|
return(FALSE);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
|
||||||
|
processor. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
os_awe_allocate_physical_mem(
|
||||||
|
/*=========================*/
|
||||||
|
/* out: TRUE if success */
|
||||||
|
os_awe_t** page_info, /* out, own: array of opaque data containing
|
||||||
|
the info for allocated physical memory pages;
|
||||||
|
each allocated 4 kB physical memory page has
|
||||||
|
one slot of type os_awe_t in the array */
|
||||||
|
ulint n_megabytes) /* in: number of megabytes to allocate */
|
||||||
|
{
|
||||||
|
#ifdef UNIV_SIMULATE_AWE
|
||||||
|
os_awe_simulate_page_info = ut_malloc(sizeof(os_awe_t) *
|
||||||
|
n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
|
||||||
|
|
||||||
|
os_awe_simulate_mem = ut_align(ut_malloc(
|
||||||
|
4096 + 1024 * 1024 * n_megabytes),
|
||||||
|
4096);
|
||||||
|
os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
|
||||||
|
|
||||||
|
*page_info = os_awe_simulate_page_info;
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
|
||||||
|
#elif defined(__NT__)
|
||||||
|
BOOL bResult;
|
||||||
|
ULONG_PTR NumberOfPages; /* Question: why does Windows
|
||||||
|
use the name ULONG_PTR for
|
||||||
|
a scalar integer type? Maybe
|
||||||
|
because we may also refer to
|
||||||
|
&NumberOfPages? */
|
||||||
|
ULONG_PTR NumberOfPagesInitial;
|
||||||
|
SYSTEM_INFO sSysInfo;
|
||||||
|
int PFNArraySize;
|
||||||
|
|
||||||
|
if (n_megabytes > 64 * 1024) {
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: tried to allocate %lu MB.\n"
|
||||||
|
"InnoDB: AWE cannot allocate more than 64 GB in any computer.\n", n_megabytes);
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
GetSystemInfo(&sSysInfo); /* fill the system information structure */
|
||||||
|
|
||||||
|
if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: this computer has a page size of %lu.\n"
|
||||||
|
"InnoDB: Should be 4096 bytes for InnoDB AWE support to work.\n",
|
||||||
|
(ulint)sSysInfo.dwPageSize);
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Calculate the number of pages of memory to request */
|
||||||
|
|
||||||
|
NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
|
||||||
|
|
||||||
|
/* Calculate the size of page_info for allocated physical pages */
|
||||||
|
|
||||||
|
PFNArraySize = NumberOfPages * sizeof(ULONG_PTR);
|
||||||
|
|
||||||
|
*page_info = (ULONG_PTR*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
|
||||||
|
|
||||||
|
if (*page_info == NULL) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Failed to allocate page info array from process heap, error %lu\n",
|
||||||
|
(ulint)GetLastError());
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ut_total_allocated_memory += PFNArraySize;
|
||||||
|
|
||||||
|
/* Enable this process' privilege to lock pages to physical memory */
|
||||||
|
|
||||||
|
if (!os_awe_enable_lock_pages_in_mem()) {
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate the physical memory */
|
||||||
|
|
||||||
|
NumberOfPagesInitial = NumberOfPages;
|
||||||
|
|
||||||
|
os_awe_page_info = *page_info;
|
||||||
|
os_awe_n_pages = (ulint)NumberOfPages;
|
||||||
|
|
||||||
|
/* Compilation note: if the compiler complains the function is not
|
||||||
|
defined, see the note at the start of this file */
|
||||||
|
|
||||||
|
bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
|
||||||
|
&NumberOfPages,
|
||||||
|
*page_info);
|
||||||
|
if (bResult != TRUE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Cannot allocate physical pages, error %lu.\n",
|
||||||
|
(ulint)GetLastError());
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NumberOfPagesInitial != NumberOfPages) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: allocated only %lu pages of %lu requested.\n"
|
||||||
|
"InnoDB: Check that you have enough free RAM.\n"
|
||||||
|
"InnoDB: In Windows XP Professional and 2000 Professional\n"
|
||||||
|
"InnoDB: Windows PAE size is max 4 GB. In 2000 and .NET"
|
||||||
|
"InnoDB: Advanced Servers and 2000 Datacenter Server it is 32 GB,\n"
|
||||||
|
"InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
|
||||||
|
"InnoDB: A Microsoft web page said that the processor must be an Intel\n"
|
||||||
|
"InnoDB: processor.",
|
||||||
|
(ulint)NumberOfPages,
|
||||||
|
(ulint)NumberOfPagesInitial);
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Using Address Windowing Extensions (AWE); allocated %lu MB\n",
|
||||||
|
n_megabytes);
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
#else
|
||||||
|
return(FALSE);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Allocates a window in the virtual address space where we can map then
|
||||||
|
pages of physical memory. */
|
||||||
|
|
||||||
|
byte*
|
||||||
|
os_awe_allocate_virtual_mem_window(
|
||||||
|
/*===============================*/
|
||||||
|
/* out, own: allocated memory, or NULL if did not
|
||||||
|
succeed */
|
||||||
|
ulint size) /* in: virtual memory allocation size in bytes, must
|
||||||
|
be < 2 GB */
|
||||||
|
{
|
||||||
|
#ifdef UNIV_SIMULATE_AWE
|
||||||
|
ulint i;
|
||||||
|
|
||||||
|
os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
|
||||||
|
os_awe_simulate_window_size = size;
|
||||||
|
|
||||||
|
os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
|
||||||
|
|
||||||
|
for (i = 0; i < (size / 4096); i++) {
|
||||||
|
*(os_awe_simulate_map + i) = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return(os_awe_simulate_window);
|
||||||
|
|
||||||
|
#elif defined(__NT__)
|
||||||
|
byte* ptr;
|
||||||
|
|
||||||
|
if (size > 0x7FFFFFFFFF) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory\n", size);
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
|
||||||
|
PAGE_READWRITE);
|
||||||
|
if (ptr == NULL) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory, error %lu\n",
|
||||||
|
size, (ulint)GetLastError());
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
os_awe_window = ptr;
|
||||||
|
os_awe_window_size = size;
|
||||||
|
|
||||||
|
ut_total_allocated_memory += size;
|
||||||
|
|
||||||
|
return(ptr);
|
||||||
|
#else
|
||||||
|
return(NULL);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
With this function you can map parts of physical memory allocated with
|
||||||
|
the ..._allocate_physical_mem to the virtual address space allocated with
|
||||||
|
the previous function. Intel implements this so that the process page
|
||||||
|
tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
|
||||||
|
showed that this takes < 1 microsecond, much better than the estimated 80 us
|
||||||
|
for copying a 16 kB page memory to memory. But, the operation will at least
|
||||||
|
partially invalidate the translation lookaside buffer (TLB) of all
|
||||||
|
processors. Under a real-world load the performance hit may be bigger. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
os_awe_map_physical_mem_to_window(
|
||||||
|
/*==============================*/
|
||||||
|
/* out: TRUE if success; the function
|
||||||
|
calls exit(1) in case of an error */
|
||||||
|
byte* ptr, /* in: a page-aligned pointer to
|
||||||
|
somewhere in the virtual address
|
||||||
|
space window; we map the physical mem
|
||||||
|
pages here */
|
||||||
|
ulint n_mem_pages, /* in: number of 4 kB mem pages to
|
||||||
|
map */
|
||||||
|
os_awe_t* page_info) /* in: array of page infos for those
|
||||||
|
pages; each page has one slot in the
|
||||||
|
array */
|
||||||
|
{
|
||||||
|
#ifdef UNIV_SIMULATE_AWE
|
||||||
|
ulint i;
|
||||||
|
byte** map;
|
||||||
|
byte* page;
|
||||||
|
byte* phys_page;
|
||||||
|
|
||||||
|
ut_a(ptr >= os_awe_simulate_window);
|
||||||
|
ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
|
||||||
|
ut_a(page_info >= os_awe_simulate_page_info);
|
||||||
|
ut_a(page_info < os_awe_simulate_page_info +
|
||||||
|
(os_awe_simulate_mem_size / 4096));
|
||||||
|
|
||||||
|
/* First look if some other 'physical pages' are mapped at ptr,
|
||||||
|
and copy them back to where they were if yes */
|
||||||
|
|
||||||
|
map = os_awe_simulate_map
|
||||||
|
+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
|
||||||
|
page = ptr;
|
||||||
|
|
||||||
|
for (i = 0; i < n_mem_pages; i++) {
|
||||||
|
if (*map != NULL) {
|
||||||
|
ut_memcpy(*map, page, 4096);
|
||||||
|
}
|
||||||
|
map++;
|
||||||
|
page += 4096;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Then copy to ptr the 'physical pages' determined by page_info; we
|
||||||
|
assume page_info is a segment of the array we created at the start */
|
||||||
|
|
||||||
|
phys_page = os_awe_simulate_mem
|
||||||
|
+ (ulint)(page_info - os_awe_simulate_page_info)
|
||||||
|
* 4096;
|
||||||
|
|
||||||
|
ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
|
||||||
|
|
||||||
|
/* Update the map */
|
||||||
|
|
||||||
|
map = os_awe_simulate_map
|
||||||
|
+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
|
||||||
|
|
||||||
|
for (i = 0; i < n_mem_pages; i++) {
|
||||||
|
*map = phys_page;
|
||||||
|
|
||||||
|
map++;
|
||||||
|
phys_page += 4096;
|
||||||
|
}
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
|
||||||
|
#elif defined(__NT__)
|
||||||
|
BOOL bResult;
|
||||||
|
ULONG_PTR n_pages;
|
||||||
|
|
||||||
|
n_pages = (ULONG_PTR)n_mem_pages;
|
||||||
|
|
||||||
|
if (!(ptr >= os_awe_window)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: trying to map to address %lx but AWE window start %lx\n",
|
||||||
|
(ulint)ptr, (ulint)os_awe_window);
|
||||||
|
ut_a(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: trying to map to address %lx but AWE window end %lx\n",
|
||||||
|
(ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
|
||||||
|
ut_a(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(page_info >= os_awe_page_info)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: trying to map page info at %lx but array start %lx\n",
|
||||||
|
(ulint)page_info, (ulint)os_awe_page_info);
|
||||||
|
ut_a(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: AWE: Error: trying to map page info at %lx but array end %lx\n",
|
||||||
|
(ulint)page_info, (ulint)(os_awe_page_info + os_awe_n_pages));
|
||||||
|
ut_a(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
|
||||||
|
|
||||||
|
if (bResult != TRUE) {
|
||||||
|
ut_print_timestamp(stderr);
|
||||||
|
fprintf(stderr,
|
||||||
|
" InnoDB: AWE: Mapping of %lu physical pages to address %lx failed,\n"
|
||||||
|
"InnoDB: error %lu.\n"
|
||||||
|
"InnoDB: Cannot continue operation.\n",
|
||||||
|
n_mem_pages, (ulint)ptr, (ulint)GetLastError());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
#else
|
||||||
|
return(FALSE);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
Converts the current process id to a number. It is not guaranteed that the
|
Converts the current process id to a number. It is not guaranteed that the
|
||||||
|
@ -140,9 +140,14 @@ byte srv_latin1_ordering[256] /* The sort order table of the latin1
|
|||||||
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
|
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
|
||||||
};
|
};
|
||||||
|
|
||||||
ulint srv_pool_size = ULINT_MAX; /* size in database pages;
|
ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits
|
||||||
MySQL originally sets this
|
this to size in kilobytes but
|
||||||
value in megabytes */
|
we normalize this to pages in
|
||||||
|
srv_boot() */
|
||||||
|
ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
|
||||||
|
this to bytes, but we
|
||||||
|
normalize it to pages in
|
||||||
|
srv_boot() */
|
||||||
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
|
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
|
||||||
ulint srv_lock_table_size = ULINT_MAX;
|
ulint srv_lock_table_size = ULINT_MAX;
|
||||||
|
|
||||||
@ -218,6 +223,13 @@ ibool srv_use_doublewrite_buf = TRUE;
|
|||||||
|
|
||||||
ibool srv_set_thread_priorities = TRUE;
|
ibool srv_set_thread_priorities = TRUE;
|
||||||
int srv_query_thread_priority = 0;
|
int srv_query_thread_priority = 0;
|
||||||
|
|
||||||
|
/* TRUE if the Address Windowing Extensions of Windows are used; then we must
|
||||||
|
disable adaptive hash indexes */
|
||||||
|
ibool srv_use_awe = FALSE;
|
||||||
|
ibool srv_use_adaptive_hash_indexes = TRUE;
|
||||||
|
|
||||||
|
|
||||||
/*-------------------------------------------*/
|
/*-------------------------------------------*/
|
||||||
ulint srv_n_spin_wait_rounds = 20;
|
ulint srv_n_spin_wait_rounds = 20;
|
||||||
ulint srv_spin_wait_delay = 5;
|
ulint srv_spin_wait_delay = 5;
|
||||||
@ -1956,9 +1968,19 @@ srv_normalize_init_values(void)
|
|||||||
|
|
||||||
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
|
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
|
||||||
|
|
||||||
srv_pool_size = srv_pool_size / UNIV_PAGE_SIZE;
|
srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
|
||||||
|
|
||||||
|
srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
|
||||||
|
|
||||||
srv_lock_table_size = 20 * srv_pool_size;
|
if (srv_use_awe) {
|
||||||
|
/* If we are using AWE we must save memory in the 32-bit
|
||||||
|
address space of the process, and cannot bind the lock
|
||||||
|
table size to the real buffer pool size. */
|
||||||
|
|
||||||
|
srv_lock_table_size = 20 * srv_awe_window_size;
|
||||||
|
} else {
|
||||||
|
srv_lock_table_size = 20 * srv_pool_size;
|
||||||
|
}
|
||||||
|
|
||||||
return(DB_SUCCESS);
|
return(DB_SUCCESS);
|
||||||
}
|
}
|
||||||
@ -2323,6 +2345,12 @@ srv_sprintf_innodb_monitor(
|
|||||||
"Total memory allocated %lu; in additional pool allocated %lu\n",
|
"Total memory allocated %lu; in additional pool allocated %lu\n",
|
||||||
ut_total_allocated_memory,
|
ut_total_allocated_memory,
|
||||||
mem_pool_get_reserved(mem_comm_pool));
|
mem_pool_get_reserved(mem_comm_pool));
|
||||||
|
if (srv_use_awe) {
|
||||||
|
buf += sprintf(buf,
|
||||||
|
"In addition to that %lu MB of AWE memory allocated\n",
|
||||||
|
srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE));
|
||||||
|
}
|
||||||
|
|
||||||
buf_print_io(buf, buf_end);
|
buf_print_io(buf, buf_end);
|
||||||
buf = buf + strlen(buf);
|
buf = buf + strlen(buf);
|
||||||
ut_a(buf < buf_end + 1500);
|
ut_a(buf < buf_end + 1500);
|
||||||
|
@ -935,6 +935,7 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
/*====================================*/
|
/*====================================*/
|
||||||
/* out: DB_SUCCESS or error code */
|
/* out: DB_SUCCESS or error code */
|
||||||
{
|
{
|
||||||
|
buf_pool_t* ret;
|
||||||
ibool create_new_db;
|
ibool create_new_db;
|
||||||
ibool log_file_created;
|
ibool log_file_created;
|
||||||
ibool log_created = FALSE;
|
ibool log_created = FALSE;
|
||||||
@ -970,6 +971,11 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
#ifdef UNIV_MEM_DEBUG
|
#ifdef UNIV_MEM_DEBUG
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n");
|
"InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef UNIV_SIMULATE_AWE
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: !!!!!!!!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!!!!!!!!!\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
|
if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
|
||||||
@ -1002,6 +1008,17 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
srv_startup_is_before_trx_rollback_phase = TRUE;
|
srv_startup_is_before_trx_rollback_phase = TRUE;
|
||||||
os_aio_use_native_aio = FALSE;
|
os_aio_use_native_aio = FALSE;
|
||||||
|
|
||||||
|
#if !defined(__NT__) && !defined(UNIV_SIMULATE_AWE)
|
||||||
|
if (srv_use_awe) {
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Error: You have specified innodb_buffer_pool_awe_mem_mb\n"
|
||||||
|
"InnoDB: in my.cnf, but AWE can only be used in Windows 2000 and later.\n");
|
||||||
|
|
||||||
|
return(DB_ERROR);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __WIN__
|
#ifdef __WIN__
|
||||||
if (os_get_os_version() == OS_WIN95
|
if (os_get_os_version() == OS_WIN95
|
||||||
|| os_get_os_version() == OS_WIN31
|
|| os_get_os_version() == OS_WIN31
|
||||||
@ -1057,6 +1074,9 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
return(DB_ERROR);
|
return(DB_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Note that the call srv_boot() also changes the values of
|
||||||
|
srv_pool_size etc. to the units used by InnoDB internally */
|
||||||
|
|
||||||
err = srv_boot();
|
err = srv_boot();
|
||||||
|
|
||||||
if (err != DB_SUCCESS) {
|
if (err != DB_SUCCESS) {
|
||||||
@ -1088,7 +1108,26 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
|
|
||||||
fil_init(SRV_MAX_N_OPEN_FILES);
|
fil_init(SRV_MAX_N_OPEN_FILES);
|
||||||
|
|
||||||
buf_pool_init(srv_pool_size, srv_pool_size);
|
if (srv_use_awe) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Using AWE: Memory window is %lu MB and AWE memory is %lu MB\n",
|
||||||
|
srv_awe_window_size / ((1024 * 1024) / UNIV_PAGE_SIZE),
|
||||||
|
srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE));
|
||||||
|
|
||||||
|
/* We must disable adaptive hash indexes because they do not
|
||||||
|
tolerate remapping of pages in AWE */
|
||||||
|
|
||||||
|
srv_use_adaptive_hash_indexes = FALSE;
|
||||||
|
ret = buf_pool_init(srv_pool_size, srv_pool_size,
|
||||||
|
srv_awe_window_size);
|
||||||
|
} else {
|
||||||
|
ret = buf_pool_init(srv_pool_size, srv_pool_size,
|
||||||
|
srv_pool_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret == NULL) {
|
||||||
|
return(DB_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
fsp_init();
|
fsp_init();
|
||||||
log_init();
|
log_init();
|
||||||
|
@ -472,9 +472,9 @@ trx_sys_update_mysql_binlog_offset(
|
|||||||
if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME,
|
if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME,
|
||||||
file_name, 1 + ut_strlen(file_name))) {
|
file_name, 1 + ut_strlen(file_name))) {
|
||||||
|
|
||||||
mlog_write_string((byte*) (sys_header + field
|
mlog_write_string(sys_header + field
|
||||||
+ TRX_SYS_MYSQL_LOG_NAME),
|
+ TRX_SYS_MYSQL_LOG_NAME,
|
||||||
(byte*) file_name, 1 + ut_strlen(file_name), mtr);
|
file_name, 1 + ut_strlen(file_name), mtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mach_read_from_4(sys_header + field
|
if (mach_read_from_4(sys_header + field
|
||||||
|
@ -99,7 +99,7 @@ trx_create(
|
|||||||
|
|
||||||
trx->mysql_log_file_name = NULL;
|
trx->mysql_log_file_name = NULL;
|
||||||
trx->mysql_log_offset = 0;
|
trx->mysql_log_offset = 0;
|
||||||
trx->mysql_master_log_file_name = (char*) "";
|
trx->mysql_master_log_file_name = "";
|
||||||
trx->mysql_master_log_pos = 0;
|
trx->mysql_master_log_pos = 0;
|
||||||
|
|
||||||
trx->ignore_duplicates_in_insert = FALSE;
|
trx->ignore_duplicates_in_insert = FALSE;
|
||||||
|
@ -197,6 +197,7 @@ ut_get_year_month_day(
|
|||||||
*month = (ulint)cal_tm.wMonth;
|
*month = (ulint)cal_tm.wMonth;
|
||||||
*day = (ulint)cal_tm.wDay;
|
*day = (ulint)cal_tm.wDay;
|
||||||
#else
|
#else
|
||||||
|
struct tm cal_tm;
|
||||||
struct tm* cal_tm_ptr;
|
struct tm* cal_tm_ptr;
|
||||||
time_t tm;
|
time_t tm;
|
||||||
|
|
||||||
|
@ -82,7 +82,8 @@ are declared in mysqld.cc: */
|
|||||||
|
|
||||||
long innobase_mirrored_log_groups, innobase_log_files_in_group,
|
long innobase_mirrored_log_groups, innobase_log_files_in_group,
|
||||||
innobase_log_file_size, innobase_log_buffer_size,
|
innobase_log_file_size, innobase_log_buffer_size,
|
||||||
innobase_buffer_pool_size, innobase_additional_mem_pool_size,
|
innobase_buffer_pool_size, innobase_buffer_pool_awe_mem_mb,
|
||||||
|
innobase_additional_mem_pool_size,
|
||||||
innobase_file_io_threads, innobase_lock_wait_timeout,
|
innobase_file_io_threads, innobase_lock_wait_timeout,
|
||||||
innobase_thread_concurrency, innobase_force_recovery;
|
innobase_thread_concurrency, innobase_force_recovery;
|
||||||
|
|
||||||
@ -753,7 +754,25 @@ innobase_init(void)
|
|||||||
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
|
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
|
||||||
srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
|
srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
|
||||||
|
|
||||||
srv_pool_size = (ulint) innobase_buffer_pool_size;
|
/* We set srv_pool_size here in units of 1 kB. InnoDB internally
|
||||||
|
changes the value so that it becomes the number of database pages. */
|
||||||
|
|
||||||
|
if (innobase_buffer_pool_awe_mem_mb == 0) {
|
||||||
|
/* Careful here: we first convert the signed long int to ulint
|
||||||
|
and only after that divide */
|
||||||
|
|
||||||
|
srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
|
||||||
|
} else {
|
||||||
|
srv_use_awe = TRUE;
|
||||||
|
srv_pool_size = (ulint)
|
||||||
|
(1024 * innobase_buffer_pool_awe_mem_mb);
|
||||||
|
srv_awe_window_size = (ulint) innobase_buffer_pool_size;
|
||||||
|
|
||||||
|
/* Note that what the user specified as
|
||||||
|
innodb_buffer_pool_size is actually the AWE memory window
|
||||||
|
size in this case, and the real buffer pool size is
|
||||||
|
determined by .._awe_mem_mb. */
|
||||||
|
}
|
||||||
|
|
||||||
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
|
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
|
||||||
|
|
||||||
|
@ -178,7 +178,8 @@ extern char *innobase_home, *innobase_tmpdir, *innobase_logdir;
|
|||||||
extern long innobase_lock_scan_time;
|
extern long innobase_lock_scan_time;
|
||||||
extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
|
extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
|
||||||
extern long innobase_log_file_size, innobase_log_buffer_size;
|
extern long innobase_log_file_size, innobase_log_buffer_size;
|
||||||
extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size;
|
extern long innobase_buffer_pool_size, innobase_buffer_pool_awe_mem_mb,
|
||||||
|
innobase_additional_mem_pool_size;
|
||||||
extern long innobase_file_io_threads, innobase_lock_wait_timeout;
|
extern long innobase_file_io_threads, innobase_lock_wait_timeout;
|
||||||
extern long innobase_force_recovery, innobase_thread_concurrency;
|
extern long innobase_force_recovery, innobase_thread_concurrency;
|
||||||
extern char *innobase_data_home_dir, *innobase_data_file_path;
|
extern char *innobase_data_home_dir, *innobase_data_file_path;
|
||||||
|
@ -3194,6 +3194,7 @@ enum options {
|
|||||||
OPT_INNODB_LOG_FILE_SIZE,
|
OPT_INNODB_LOG_FILE_SIZE,
|
||||||
OPT_INNODB_LOG_BUFFER_SIZE,
|
OPT_INNODB_LOG_BUFFER_SIZE,
|
||||||
OPT_INNODB_BUFFER_POOL_SIZE,
|
OPT_INNODB_BUFFER_POOL_SIZE,
|
||||||
|
OPT_INNODB_BUFFER_POOL_AWE_MEM_MB,
|
||||||
OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
|
OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
|
||||||
OPT_INNODB_FILE_IO_THREADS,
|
OPT_INNODB_FILE_IO_THREADS,
|
||||||
OPT_INNODB_LOCK_WAIT_TIMEOUT,
|
OPT_INNODB_LOCK_WAIT_TIMEOUT,
|
||||||
@ -3753,6 +3754,10 @@ struct my_option my_long_options[] =
|
|||||||
"The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
|
"The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
|
||||||
(gptr*) &innobase_buffer_pool_size, (gptr*) &innobase_buffer_pool_size, 0,
|
(gptr*) &innobase_buffer_pool_size, (gptr*) &innobase_buffer_pool_size, 0,
|
||||||
GET_LONG, REQUIRED_ARG, 8*1024*1024L, 1024*1024L, ~0L, 0, 1024*1024L, 0},
|
GET_LONG, REQUIRED_ARG, 8*1024*1024L, 1024*1024L, ~0L, 0, 1024*1024L, 0},
|
||||||
|
{"innodb_buffer_pool_awe_mem_mb", OPT_INNODB_BUFFER_POOL_AWE_MEM_MB,
|
||||||
|
"If Windows AWE is used, the size of InnoDB buffer pool allocated from the AWE memory.",
|
||||||
|
(gptr*) &innobase_buffer_pool_awe_mem_mb, (gptr*) &innobase_buffer_pool_awe_mem_mb, 0,
|
||||||
|
GET_LONG, REQUIRED_ARG, 0, 0, 63000, 0, 1, 0},
|
||||||
{"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
|
{"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE,
|
||||||
"Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
|
"Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
|
||||||
(gptr*) &innobase_additional_mem_pool_size,
|
(gptr*) &innobase_additional_mem_pool_size,
|
||||||
|
@ -449,6 +449,7 @@ struct show_var_st init_vars[]= {
|
|||||||
#ifdef HAVE_INNOBASE_DB
|
#ifdef HAVE_INNOBASE_DB
|
||||||
{"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG },
|
{"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG },
|
||||||
{"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG },
|
{"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG },
|
||||||
|
{"innodb_buffer_pool_awe_mem_mb", (char*) &innobase_buffer_pool_awe_mem_mb, SHOW_LONG },
|
||||||
{"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR},
|
{"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR},
|
||||||
{"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR},
|
{"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR},
|
||||||
{"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
|
{"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
|
||||||
|
Reference in New Issue
Block a user