From 233fd79232b580b6f25f02d447b8de32c0daca71 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 4 Jun 2011 20:06:01 +0200 Subject: [PATCH 01/14] improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix OS_FILE_LIMIT - on Windows it is about 16 millions --- include/config-win.h | 2 +- storage/innodb_plugin/CMakeLists.txt | 4 +- storage/innodb_plugin/include/os0file.h | 4 + storage/innodb_plugin/include/os0sync.h | 54 +- storage/innodb_plugin/include/os0sync.ic | 9 +- storage/innodb_plugin/include/srv0srv.h | 4 +- storage/innodb_plugin/include/sync0sync.h | 2 +- storage/innodb_plugin/os/os0file.c | 195 ++------ storage/innodb_plugin/os/os0sync.c | 497 +++++++++++------- storage/innodb_plugin/srv/srv0srv.c | 14 + storage/innodb_plugin/srv/srv0start.c | 26 +- storage/xtradb/CMakeLists.txt | 15 +- storage/xtradb/include/os0file.h | 4 + storage/xtradb/include/os0sync.h | 71 ++- storage/xtradb/include/os0sync.ic | 9 +- storage/xtradb/include/srv0srv.h | 4 +- storage/xtradb/include/sync0sync.h | 2 +- storage/xtradb/os/os0file.c | 196 ++------ storage/xtradb/os/os0sync.c | 583 +++++++++++++++------- storage/xtradb/srv/srv0srv.c | 14 + storage/xtradb/srv/srv0start.c | 45 +- 21 files changed, 955 insertions(+), 799 deletions(-) diff --git a/include/config-win.h b/include/config-win.h index 6d12bb0e33f..e538cf66268 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d) #define FN_DEVCHAR ':' #define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */ #define FN_NO_CASE_SENCE /* Files are not case-sensitive */ -#define OS_FILE_LIMIT 2048 +#define OS_FILE_LIMIT 16*1024*1024 #define DO_NOT_REMOVE_THREAD_WRAPPERS #define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V)) diff --git a/storage/innodb_plugin/CMakeLists.txt b/storage/innodb_plugin/CMakeLists.txt index 6dfac4e52cb..36e4a0ebb36 100644 --- a/storage/innodb_plugin/CMakeLists.txt +++ b/storage/innodb_plugin/CMakeLists.txt @@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c) -# Windows atomics do not perform well. Disable Windows atomics by default. -# See bug#52102 for details. -#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION) + ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION) MYSQL_STORAGE_ENGINE(INNODB_PLUGIN) diff --git a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/include/os0file.h index d645cae38bb..297d59f2b98 100644 --- a/storage/innodb_plugin/include/os0file.h +++ b/storage/innodb_plugin/include/os0file.h @@ -182,6 +182,10 @@ log. */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ +#define OS_WINXP 5 /*!< Microsoft Windows XP */ +#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */ +#define OS_WIN7 7 /*!< Microsoft Windows 7 */ + extern ulint os_n_file_reads; extern ulint os_n_file_writes; diff --git a/storage/innodb_plugin/include/os0sync.h b/storage/innodb_plugin/include/os0sync.h index f32e7ab710a..d29c231c2f0 100644 --- a/storage/innodb_plugin/include/os0sync.h +++ b/storage/innodb_plugin/include/os0sync.h @@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri #include "univ.i" #include "ut0lst.h" -#ifdef __WIN__ - +#ifdef _WIN32 +/** Native event (slow)*/ +typedef HANDLE os_native_event_t; /** Native mutex */ -#define os_fast_mutex_t CRITICAL_SECTION - -/** Native event */ -typedef HANDLE os_native_event_t; - -/** Operating system event */ -typedef struct os_event_struct os_event_struct_t; -/** Operating system event handle */ -typedef os_event_struct_t* os_event_t; - -/** An asynchronous signal sent between threads */ -struct os_event_struct { - os_native_event_t handle; - /*!< Windows event */ - UT_LIST_NODE_T(os_event_struct_t) os_event_list; - /*!< list of all created events */ -}; +typedef CRITICAL_SECTION os_fast_mutex_t; +/** Native condition variable */ +typedef CONDITION_VARIABLE os_cond_t; #else /** Native mutex */ typedef pthread_mutex_t os_fast_mutex_t; +/** Native condition variable */ +typedef pthread_cond_t os_cond_t; +#endif /** Operating system event */ typedef struct os_event_struct os_event_struct_t; @@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; /** An asynchronous signal sent between threads */ struct os_event_struct { +#ifdef _WIN32 + HANDLE handle; /*!< kernel event object, slow, used on older Windows */ +#endif os_fast_mutex_t os_mutex; /*!< this mutex protects the next fields */ ibool is_set; /*!< this is TRUE when the event is @@ -76,12 +69,14 @@ struct os_event_struct { this event */ ib_int64_t signal_count; /*!< this is incremented each time the event becomes signaled */ - pthread_cond_t cond_var; /*!< condition variable is used in + os_cond_t cond_var; /*!< condition variable is used in waiting for the event */ UT_LIST_NODE_T(os_event_struct_t) os_event_list; /*!< list of all created events */ }; -#endif + + + /** Operating system mutex */ typedef struct os_mutex_struct os_mutex_str_t; @@ -198,21 +193,6 @@ os_event_wait_time( os_event_t event, /*!< in: event to wait */ ulint time); /*!< in: timeout in microseconds, or OS_SYNC_INFINITE_TIME */ -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( -/*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array); - /*!< in: pointer to an array of event - handles */ -#endif /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the mutex semaphore of InnoDB itself (mutex_t) should be used where possible. @@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) -#elif defined(HAVE_WINDOWS_ATOMICS) +#elif defined(_WIN32) #define HAVE_ATOMIC_BUILTINS diff --git a/storage/innodb_plugin/include/os0sync.ic b/storage/innodb_plugin/include/os0sync.ic index 1f3ce38fa65..2c6c1dbe629 100644 --- a/storage/innodb_plugin/include/os0sync.ic +++ b/storage/innodb_plugin/include/os0sync.ic @@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri #endif /**********************************************************//** -Acquires ownership of a fast mutex. Currently in Windows this is the same -as os_fast_mutex_lock! +Acquires ownership of a fast mutex. @return 0 if success, != 0 if was reserved by another thread */ UNIV_INLINE ulint @@ -38,9 +37,9 @@ os_fast_mutex_trylock( os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ { #ifdef __WIN__ - EnterCriticalSection(fast_mutex); - - return(0); + if (TryEnterCriticalSection(fast_mutex)) + return 0; + return(1); #else /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock so that it returns 0 on success. In the operating system diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h index 91ae895040c..f8330988b77 100644 --- a/storage/innodb_plugin/include/srv0srv.h +++ b/storage/innodb_plugin/include/srv0srv.h @@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup; on duplicate key checking and foreign key checking */ extern ibool srv_locks_unsafe_for_binlog; #endif /* !UNIV_HOTBACKUP */ - +#ifdef __WIN__ +extern ibool srv_use_native_conditions; +#endif extern ulint srv_n_data_files; extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; diff --git a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h index 71c9920a10b..e246e256586 100644 --- a/storage/innodb_plugin/include/sync0sync.h +++ b/storage/innodb_plugin/include/sync0sync.h @@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri extern my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef HAVE_WINDOWS_ATOMICS +#ifdef _WIN32 typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates on LONG variable */ #else diff --git a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c index ff80f7ed1b4..59885f20566 100644 --- a/storage/innodb_plugin/os/os0file.c +++ b/storage/innodb_plugin/os/os0file.c @@ -121,7 +121,7 @@ struct os_aio_slot_struct{ which pending aio operation was completed */ #ifdef WIN_ASYNC_IO - os_event_t event; /*!< event object we need in the + HANDLE handle; /*!< handle object we need in the OVERLAPPED struct */ OVERLAPPED control; /*!< Windows control block for the aio request */ @@ -155,7 +155,7 @@ struct os_aio_array_struct{ aio array outside the ibuf segment */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ #ifdef __WIN__ - os_native_event_t* native_events; + HANDLE* handles; /*!< Pointer to an array of OS native event handles where we copied the handles from slots, in the same @@ -229,10 +229,16 @@ os_get_os_version(void) } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { return(OS_WIN95); } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - if (os_info.dwMajorVersion <= 4) { - return(OS_WINNT); - } else { - return(OS_WIN2000); + switch(os_info.dwMajorVersion){ + case 3: + case 4: + return OS_WINNT; + case 5: + return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP; + case 6: + return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7; + default: + return OS_WIN7; } } else { ut_error; @@ -2272,13 +2278,12 @@ os_file_read( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2293,40 +2298,11 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; - os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2355,9 +2331,6 @@ try_again: (ulong)n, (ulong)offset_high, (ulong)offset, (long)ret); #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error(NULL, "read"); if (retry) { @@ -2399,13 +2372,13 @@ os_file_read_no_error_handling( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2420,40 +2393,12 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2476,9 +2421,6 @@ try_again: return(TRUE); } #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error_no_exit(NULL, "read"); if (retry) { @@ -2531,14 +2473,14 @@ os_file_write( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ulint n_retries = 0; ulint err; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2551,50 +2493,12 @@ os_file_write( ut_ad(buf); ut_ad(n > 0); retry: - low = (DWORD) offset; - high = (DWORD) offset_high; os_mutex_enter(os_file_count_mutex); os_n_pending_writes++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: File pointer positioning to" - " file %s failed at\n" - "InnoDB: offset %lu %lu. Operating system" - " error number %lu.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) GetLastError()); - - return(FALSE); - } - - ret = WriteFile(file, buf, (DWORD) n, &len, NULL); + ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped); /* Always do fsync to reduce the probability that when the OS crashes, a database page is only partially physically written to disk. */ @@ -2605,10 +2509,6 @@ retry: } # endif /* UNIV_DO_FLUSH */ -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - os_mutex_enter(os_file_count_mutex); os_n_pending_writes--; os_mutex_exit(os_file_count_mutex); @@ -3012,7 +2912,7 @@ os_aio_array_create( array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); #ifdef __WIN__ - array->native_events = ut_malloc(n * sizeof(os_native_event_t)); + array->handles = ut_malloc(n * sizeof(HANDLE)); #endif for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i); @@ -3020,13 +2920,14 @@ os_aio_array_create( slot->pos = i; slot->reserved = FALSE; #ifdef WIN_ASYNC_IO - slot->event = os_event_create(NULL); + slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL); + over = &(slot->control); - over->hEvent = slot->event->handle; + over->hEvent = slot->handle; - *((array->native_events) + i) = over->hEvent; + *((array->handles) + i) = over->hEvent; #endif } @@ -3046,12 +2947,12 @@ os_aio_array_free( for (i = 0; i < array->n_slots; i++) { os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); - os_event_free(slot->event); + CloseHandle(slot->handle); } #endif /* WIN_ASYNC_IO */ #ifdef __WIN__ - ut_free(array->native_events); + ut_free(array->handles); #endif /* __WIN__ */ os_mutex_free(array->mutex); os_event_free(array->not_full); @@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown( for (i = 0; i < array->n_slots; i++) { - os_event_set((array->slots + i)->event); + SetEvent(array->slots[i].handle); + } } #endif @@ -3396,7 +3298,7 @@ found: control = &(slot->control); control->Offset = (DWORD)offset; control->OffsetHigh = (DWORD)offset_high; - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); @@ -3433,7 +3335,7 @@ os_aio_array_free_slot( } #ifdef WIN_ASYNC_IO - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); } @@ -3793,15 +3695,18 @@ os_aio_windows_handle( n = array->n_slots / array->n_segments; if (array == os_aio_sync_array) { - os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); + WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE); i = pos; } else { srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); - i = os_event_wait_multiple(n, - (array->native_events) - + segment * n); + i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE); } + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); + } + + os_mutex_enter(array->mutex); slot = os_aio_array_get_nth_slot(array, i + segment * n); diff --git a/storage/innodb_plugin/os/os0sync.c b/storage/innodb_plugin/os/os0sync.c index 60467242e14..3ffef2e3c01 100644 --- a/storage/innodb_plugin/os/os0sync.c +++ b/storage/innodb_plugin/os/os0sync.c @@ -31,6 +31,7 @@ Created 9/6/1995 Heikki Tuuri #ifdef __WIN__ #include +#include #endif #include "ut0mem.h" @@ -71,11 +72,225 @@ UNIV_INTERN ulint os_event_count = 0; UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0; +/* The number of microsecnds in a second. */ +static const ulint MICROSECS_IN_A_SECOND = 1000000; + /* Because a mutex is embedded inside an event and there is an event embedded inside a mutex, on free, this generates a recursive call. This version of the free event function doesn't acquire the global lock */ static void os_event_free_internal(os_event_t event); +/* On Windows (Vista and later), load function pointers for condition +variable handling. Those functions are not available in prior versions, +so we have to use them via runtime loading, as long as we support XP. */ +static void os_cond_module_init(void); + +#ifdef __WIN__ +/* Prototypes and function pointers for condition variable functions */ +typedef VOID (WINAPI* InitializeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static InitializeConditionVariableProc initialize_condition_variable; + +typedef BOOL (WINAPI* SleepConditionVariableCSProc) + (PCONDITION_VARIABLE ConditionVariable, + PCRITICAL_SECTION CriticalSection, + DWORD dwMilliseconds); +static SleepConditionVariableCSProc sleep_condition_variable; + +typedef VOID (WINAPI* WakeAllConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeAllConditionVariableProc wake_all_condition_variable; + +typedef VOID (WINAPI* WakeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeConditionVariableProc wake_condition_variable; +#endif + +/*********************************************************//** +Initialitze condition variable */ +UNIV_INLINE +void +os_cond_init( +/*=========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(initialize_condition_variable != NULL); + initialize_condition_variable(cond); +#else + ut_a(pthread_cond_init(cond, NULL) == 0); +#endif +} + +/*********************************************************//** +Do a timed wait on condition variable. +@return TRUE if timed out, FALSE otherwise */ +UNIV_INLINE +ibool +os_cond_wait_timed( +/*===============*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex, /*!< in: fast mutex */ +#ifndef __WIN__ + const struct timespec* abstime /*!< in: timeout */ +#else + DWORD time_in_ms /*!< in: timeout in + milliseconds*/ +#endif /* !__WIN__ */ +) +{ +#ifdef __WIN__ + BOOL ret; + DWORD err; + + ut_a(sleep_condition_variable != NULL); + + ret = sleep_condition_variable(cond, mutex, time_in_ms); + + if (!ret) { + err = GetLastError(); + /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx, + "Condition variables are subject to spurious wakeups + (those not associated with an explicit wake) and stolen wakeups + (another thread manages to run before the woken thread)." + Check for both types of timeouts. + Conditions are checked by the caller.*/ + if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) { + return(TRUE); + } + } + + ut_a(ret); + + return(FALSE); +#else + int ret; + + ret = pthread_cond_timedwait(cond, mutex, abstime); + + switch (ret) { + case 0: + case ETIMEDOUT: + /* We play it safe by checking for EINTR even though + according to the POSIX documentation it can't return EINTR. */ + case EINTR: + break; + + default: + fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: " + "%d: abstime={%lu,%lu}\n", + ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec); + ut_error; + } + + return(ret == ETIMEDOUT); +#endif +} +/*********************************************************//** +Wait on condition variable */ +UNIV_INLINE +void +os_cond_wait( +/*=========*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex) /*!< in: fast mutex */ +{ + ut_a(cond); + ut_a(mutex); + +#ifdef __WIN__ + ut_a(sleep_condition_variable != NULL); + ut_a(sleep_condition_variable(cond, mutex, INFINITE)); +#else + ut_a(pthread_cond_wait(cond, mutex) == 0); +#endif +} + +/*********************************************************//** +Wakes all threads waiting for condition variable */ +UNIV_INLINE +void +os_cond_broadcast( +/*==============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_all_condition_variable != NULL); + wake_all_condition_variable(cond); +#else + ut_a(pthread_cond_broadcast(cond) == 0); +#endif +} + +/*********************************************************//** +Wakes one thread waiting for condition variable */ +UNIV_INLINE +void +os_cond_signal( +/*==========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_condition_variable != NULL); + wake_condition_variable(cond); +#else + ut_a(pthread_cond_signal(cond) == 0); +#endif +} + +/*********************************************************//** +Destroys condition variable */ +UNIV_INLINE +void +os_cond_destroy( +/*============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ +#ifdef __WIN__ + /* Do nothing */ +#else + ut_a(pthread_cond_destroy(cond) == 0); +#endif +} + +/*********************************************************//** +On Windows (Vista and later), load function pointers for condition variable +handling. Those functions are not available in prior versions, so we have to +use them via runtime loading, as long as we support XP. */ +static +void +os_cond_module_init(void) +/*=====================*/ +{ +#ifdef __WIN__ + HMODULE h_dll; + + + h_dll = GetModuleHandle("kernel32"); + + initialize_condition_variable = (InitializeConditionVariableProc) + GetProcAddress(h_dll, "InitializeConditionVariable"); + sleep_condition_variable = (SleepConditionVariableCSProc) + GetProcAddress(h_dll, "SleepConditionVariableCS"); + wake_all_condition_variable = (WakeAllConditionVariableProc) + GetProcAddress(h_dll, "WakeAllConditionVariable"); + wake_condition_variable = (WakeConditionVariableProc) + GetProcAddress(h_dll, "WakeConditionVariable"); + + /* When using native condition variables, check function pointers */ + ut_a(initialize_condition_variable); + ut_a(sleep_condition_variable); + ut_a(wake_all_condition_variable); + ut_a(wake_condition_variable); +#endif +} + /*********************************************************//** Initializes global event and OS 'slow' mutex lists. */ UNIV_INTERN @@ -89,6 +304,9 @@ os_sync_init(void) os_sync_mutex = NULL; os_sync_mutex_inited = FALSE; + /* Now for Windows only */ + os_cond_module_init(); + os_sync_mutex = os_mutex_create(NULL); os_sync_mutex_inited = TRUE; @@ -143,42 +361,46 @@ os_event_create( const char* name) /*!< in: the name of the event, if NULL the event is created without a name */ { -#ifdef __WIN__ - os_event_t event; - - event = ut_malloc(sizeof(struct os_event_struct)); - - event->handle = CreateEvent(NULL, /* No security attributes */ - TRUE, /* Manual reset */ - FALSE, /* Initial state nonsignaled */ - (LPCTSTR) name); - if (!event->handle) { - fprintf(stderr, - "InnoDB: Could not create a Windows event semaphore;" - " Windows error %lu\n", - (ulong) GetLastError()); - } -#else /* Unix */ os_event_t event; - UT_NOT_USED(name); +#ifdef __WIN__ + if(!srv_use_native_conditions) { - event = ut_malloc(sizeof(struct os_event_struct)); + event = ut_malloc(sizeof(struct os_event_struct)); - os_fast_mutex_init(&(event->os_mutex)); + event->handle = CreateEvent(NULL, + TRUE, + FALSE, + (LPCTSTR) name); + if (!event->handle) { + fprintf(stderr, + "InnoDB: Could not create a Windows event" + " semaphore; Windows error %lu\n", + (ulong) GetLastError()); + } + } else /* Windows with condition variables */ - ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); +#endif - event->is_set = FALSE; + { + UT_NOT_USED(name); - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - event->signal_count = 1; -#endif /* __WIN__ */ + event = ut_malloc(sizeof(struct os_event_struct)); + + os_fast_mutex_init(&(event->os_mutex)); + + os_cond_init(&(event->cond_var)); + + event->is_set = FALSE; + + /* We return this value in os_event_reset(), which can then be + be used to pass to the os_event_wait_low(). The value of zero + is reserved in os_event_wait_low() for the case when the + caller does not want to pass any signal_count value. To + distinguish between the two cases we initialize signal_count + to 1 here. */ + event->signal_count = 1; + } /* The os_sync_mutex can be NULL because during startup an event can be created [ because it's embedded in the mutex/rwlock ] before @@ -208,10 +430,15 @@ os_event_set( /*=========*/ os_event_t event) /*!< in: event to set */ { -#ifdef __WIN__ ut_a(event); - ut_a(SetEvent(event->handle)); -#else + +#ifdef __WIN__ + if (!srv_use_native_conditions) { + ut_a(SetEvent(event->handle)); + return; + } +#endif + ut_a(event); os_fast_mutex_lock(&(event->os_mutex)); @@ -221,11 +448,10 @@ os_event_set( } else { event->is_set = TRUE; event->signal_count += 1; - ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); + os_cond_broadcast(&(event->cond_var)); } os_fast_mutex_unlock(&(event->os_mutex)); -#endif } /**********************************************************//** @@ -244,12 +470,14 @@ os_event_reset( { ib_int64_t ret = 0; -#ifdef __WIN__ ut_a(event); - ut_a(ResetEvent(event->handle)); -#else - ut_a(event); +#ifdef __WIN__ + if(!srv_use_native_conditions) { + ut_a(ResetEvent(event->handle)); + return(0); + } +#endif os_fast_mutex_lock(&(event->os_mutex)); @@ -261,7 +489,6 @@ os_event_reset( ret = event->signal_count; os_fast_mutex_unlock(&(event->os_mutex)); -#endif return(ret); } @@ -274,19 +501,21 @@ os_event_free_internal( os_event_t event) /*!< in: event to free */ { #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - /* This is to avoid freeing the mutex twice */ - os_fast_mutex_free(&(event->os_mutex)); - - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions) { + ut_a(event); + ut_a(CloseHandle(event->handle)); + } else #endif - /* Remove from the list of events */ + { + ut_a(event); + /* This is to avoid freeing the mutex twice */ + os_fast_mutex_free(&(event->os_mutex)); + + os_cond_destroy(&(event->cond_var)); + } + + /* Remove from the list of events */ UT_LIST_REMOVE(os_event_list, os_event_list, event); os_event_count--; @@ -303,18 +532,19 @@ os_event_free( os_event_t event) /*!< in: event to free */ { + ut_a(event); #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - os_fast_mutex_free(&(event->os_mutex)); - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions){ + ut_a(CloseHandle(event->handle)); + } else /*Windows with condition variables */ #endif - /* Remove from the list of events */ + { + os_fast_mutex_free(&(event->os_mutex)); + os_cond_destroy(&(event->cond_var)); + } + + /* Remove from the list of events */ os_mutex_enter(os_sync_mutex); UT_LIST_REMOVE(os_event_list, os_event_list, event); @@ -327,10 +557,7 @@ os_event_free( } /**********************************************************//** -Waits for an event object until it is in the signaled state. If -srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the -waiting thread when the event becomes signaled (or immediately if the -event is already in the signaled state). +Waits for an event object until it is in the signaled state. Typically, if the event has been signalled after the os_event_reset() we'll return immediately because event->is_set == TRUE. @@ -355,23 +582,27 @@ os_event_wait_low( returned by previous call of os_event_reset(). */ { -#ifdef __WIN__ - DWORD err; - - ut_a(event); - - UT_NOT_USED(reset_sig_count); - - /* Specify an infinite time limit for waiting */ - err = WaitForSingleObject(event->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } -#else ib_int64_t old_signal_count; +#ifdef __WIN__ + if(!srv_use_native_conditions) { + DWORD err; + + ut_a(event); + + UT_NOT_USED(reset_sig_count); + + /* Specify an infinite wait */ + err = WaitForSingleObject(event->handle, INFINITE); + + ut_a(err == WAIT_OBJECT_0); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); + } + return; + } +#endif + os_fast_mutex_lock(&(event->os_mutex)); @@ -396,13 +627,12 @@ os_event_wait_low( return; } - pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); + os_cond_wait(&(event->cond_var), &(event->os_mutex)); /* Solaris manual said that spurious wakeups may occur: we have to check if the event really has been signaled after we came here to wait */ } -#endif } /**********************************************************//** @@ -418,27 +648,29 @@ os_event_wait_time( OS_SYNC_INFINITE_TIME */ { #ifdef __WIN__ - DWORD err; + if(!srv_use_native_conditions) { + DWORD err; - ut_a(event); + ut_a(event); - if (time != OS_SYNC_INFINITE_TIME) { - err = WaitForSingleObject(event->handle, (DWORD) time / 1000); - } else { - err = WaitForSingleObject(event->handle, INFINITE); - } + if (time != OS_SYNC_INFINITE_TIME) { + err = WaitForSingleObject(event->handle, (DWORD) time / 1000); + } else { + err = WaitForSingleObject(event->handle, INFINITE); + } - if (err == WAIT_OBJECT_0) { + if (err == WAIT_OBJECT_0) { - return(0); - } else if (err == WAIT_TIMEOUT) { + return(0); + } else if (err == WAIT_TIMEOUT) { - return(OS_SYNC_TIME_EXCEEDED); - } else { - ut_error; - return(1000000); /* dummy value to eliminate compiler warn. */ - } -#else + return(OS_SYNC_TIME_EXCEEDED); + } else { + ut_error; + return(1000000); /* dummy value to eliminate compiler warn. */ + } + } +#endif UT_NOT_USED(time); /* In Posix this is just an ordinary, infinite wait */ @@ -446,43 +678,8 @@ os_event_wait_time( os_event_wait(event); return(0); -#endif } -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( -/*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array) - /*!< in: pointer to an array of event - handles */ -{ - DWORD index; - - ut_a(native_event_array); - ut_a(n > 0); - - index = WaitForMultipleObjects((DWORD) n, native_event_array, - FALSE, /* Wait for any 1 event */ - INFINITE); /* Infinite wait time - limit */ - ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */ - ut_a(index < WAIT_OBJECT_0 + n); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } - - return(index - WAIT_OBJECT_0); -} -#endif /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the @@ -495,24 +692,12 @@ os_mutex_create( const char* name) /*!< in: the name of the mutex, if NULL the mutex is created without a name */ { -#ifdef __WIN__ - HANDLE mutex; - os_mutex_t mutex_str; - - mutex = CreateMutex(NULL, /* No security attributes */ - FALSE, /* Initial state: no owner */ - (LPCTSTR) name); - ut_a(mutex); -#else os_fast_mutex_t* mutex; os_mutex_t mutex_str; - UT_NOT_USED(name); - mutex = ut_malloc(sizeof(os_fast_mutex_t)); os_fast_mutex_init(mutex); -#endif mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str->handle = mutex; @@ -543,25 +728,11 @@ os_mutex_enter( /*===========*/ os_mutex_t mutex) /*!< in: mutex to acquire */ { -#ifdef __WIN__ - DWORD err; - - ut_a(mutex); - - /* Specify infinite time limit for waiting */ - err = WaitForSingleObject(mutex->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - (mutex->count)++; - ut_a(mutex->count == 1); -#else os_fast_mutex_lock(mutex->handle); (mutex->count)++; ut_a(mutex->count == 1); -#endif } /**********************************************************//** @@ -577,11 +748,7 @@ os_mutex_exit( ut_a(mutex->count == 1); (mutex->count)--; -#ifdef __WIN__ - ut_a(ReleaseMutex(mutex->handle)); -#else os_fast_mutex_unlock(mutex->handle); -#endif } /**********************************************************//** @@ -610,15 +777,9 @@ os_mutex_free( os_mutex_exit(os_sync_mutex); } -#ifdef __WIN__ - ut_a(CloseHandle(mutex->handle)); - - ut_free(mutex); -#else os_fast_mutex_free(mutex->handle); ut_free(mutex->handle); ut_free(mutex); -#endif } /*********************************************************//** diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index b1fc1ac67fd..43d57c63a65 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; /** Place locks to records only i.e. do not use next-key locking except on duplicate key checking and foreign key checking */ UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; +#ifdef __WIN__ +/* Windows native condition variables. We use runtime loading / function +pointers, because they are not available on Windows Server 2003 and +Windows XP/2000. + +We use condition for events on Windows if possible, even if os_event +resembles Windows kernel event object well API-wise. The reason is +performance, kernel objects are heavyweights and WaitForSingleObject() is a +performance killer causing calling thread to context switch. Besides, Innodb +is preallocating large number (often millions) of os_events. With kernel event +objects it takes a big chunk out of non-paged pool, which is better suited +for tasks like IO than for storing idle event objects. */ +UNIV_INTERN ibool srv_use_native_conditions = FALSE; +#endif /* __WIN__ */ UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN char** srv_data_file_names = NULL; diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c index f8b5049ca65..b5892c59cfb 100644 --- a/storage/innodb_plugin/srv/srv0start.c +++ b/storage/innodb_plugin/srv/srv0start.c @@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void) case OS_WIN95: case OS_WIN31: case OS_WINNT: - /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, - and NT use simulated aio. In NT Windows provides async i/o, - but when run in conjunction with InnoDB Hot Backup, it seemed - to corrupt the data files. */ + srv_use_native_conditions = FALSE; + break; - os_aio_use_native_aio = FALSE; - break; - default: - /* On Win 2000 and XP use async i/o */ + case OS_WIN2000: + case OS_WINXP: + /* On 2000 and XP, async IO is available, but no condition variables. */ os_aio_use_native_aio = TRUE; - break; - } + srv_use_native_conditions = FALSE; + break; + + default: + /* On Win 2000 and XP use async i/o */ + /* Vista and later have both async IO and condition variables */ + os_aio_use_native_aio = TRUE; + srv_use_native_conditions = TRUE; + break; + } #endif + if (srv_file_flush_method_str == NULL) { /* These are the default options */ diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 509f7f0fe73..6e16c4ced32 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -15,20 +15,10 @@ # This is the CMakeLists for InnoDB Plugin - - -# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin -# CMakeLists.txt still needs to work with previous versions of MySQL. -IF (MYSQL_VERSION_ID GREATER "50137") - INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake") -ENDIF (MYSQL_VERSION_ID GREATER "50137") - IF (CMAKE_SIZEOF_VOID_P MATCHES 8) SET(WIN64 TRUE) ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8) -ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER) - # Include directories under xtradb INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include ${CMAKE_SOURCE_DIR}/storage/xtradb/handler) @@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c) -# Windows atomics do not perform well. Disable Windows atomics by default. -# See bug#52102 for details. -#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION) -ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION) + MYSQL_STORAGE_ENGINE(XTRADB) diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 732e930517b..ae656d92bb7 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -183,6 +183,10 @@ log. */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ +#define OS_WINXP 5 /*!< Microsoft Windows XP */ +#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */ +#define OS_WIN7 7 /*!< Microsoft Windows 7 */ + extern ulint os_n_file_reads; extern ulint os_n_file_writes; diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 7366e2c3402..002abebcb0b 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri #include "univ.i" #include "ut0lst.h" -#ifdef __WIN__ - +#ifdef _WIN32 +/** Native event (slow)*/ +typedef HANDLE os_native_event_t; /** Native mutex */ -#define os_fast_mutex_t CRITICAL_SECTION - -/** Native event */ -typedef HANDLE os_native_event_t; - -/** Operating system event */ -typedef struct os_event_struct os_event_struct_t; -/** Operating system event handle */ -typedef os_event_struct_t* os_event_t; - -/** An asynchronous signal sent between threads */ -struct os_event_struct { - os_native_event_t handle; - /*!< Windows event */ - UT_LIST_NODE_T(os_event_struct_t) os_event_list; - /*!< list of all created events */ -}; +typedef CRITICAL_SECTION os_fast_mutex_t; +/** Native condition variable */ +typedef CONDITION_VARIABLE os_cond_t; #else /** Native mutex */ typedef pthread_mutex_t os_fast_mutex_t; +/** Native condition variable */ +typedef pthread_cond_t os_cond_t; +#endif /** Operating system event */ typedef struct os_event_struct os_event_struct_t; @@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; /** An asynchronous signal sent between threads */ struct os_event_struct { +#ifdef _WIN32 + HANDLE handle; /*!< kernel event object, slow, used on older Windows */ +#endif os_fast_mutex_t os_mutex; /*!< this mutex protects the next fields */ ibool is_set; /*!< this is TRUE when the event is @@ -76,12 +69,14 @@ struct os_event_struct { this event */ ib_int64_t signal_count; /*!< this is incremented each time the event becomes signaled */ - pthread_cond_t cond_var; /*!< condition variable is used in + os_cond_t cond_var; /*!< condition variable is used in waiting for the event */ UT_LIST_NODE_T(os_event_struct_t) os_event_list; /*!< list of all created events */ }; -#endif + + + /** Operating system mutex */ typedef struct os_mutex_struct os_mutex_str_t; @@ -186,33 +181,23 @@ os_event_wait_low( os_event_reset(). */ #define os_event_wait(event) os_event_wait_low(event, 0) - +#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0) /**********************************************************//** Waits for an event object until it is in the signaled state or -a timeout is exceeded. +a timeout is exceeded. In Unix the timeout is always infinite. @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ UNIV_INTERN ulint -os_event_wait_time( -/*===============*/ - os_event_t event, /*!< in: event to wait */ - ulint wtime); /*!< in: timeout in microseconds, or - OS_SYNC_INFINITE_TIME */ -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( +os_event_wait_time_low( /*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array); - /*!< in: pointer to an array of event - handles */ -#endif + os_event_t event, /*!< in: event to wait */ + ulint time_in_usec, /*!< in: timeout in + microseconds, or + OS_SYNC_INFINITE_TIME */ + ib_int64_t reset_sig_count); /*!< in: zero or the value + returned by previous call of + os_event_reset(). */ + /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the mutex semaphore of InnoDB itself (mutex_t) should be used where possible. @@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) -#elif defined(HAVE_WINDOWS_ATOMICS) +#elif defined(_WIN32) #define HAVE_ATOMIC_BUILTINS diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic index 1f3ce38fa65..2c6c1dbe629 100644 --- a/storage/xtradb/include/os0sync.ic +++ b/storage/xtradb/include/os0sync.ic @@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri #endif /**********************************************************//** -Acquires ownership of a fast mutex. Currently in Windows this is the same -as os_fast_mutex_lock! +Acquires ownership of a fast mutex. @return 0 if success, != 0 if was reserved by another thread */ UNIV_INLINE ulint @@ -38,9 +37,9 @@ os_fast_mutex_trylock( os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ { #ifdef __WIN__ - EnterCriticalSection(fast_mutex); - - return(0); + if (TryEnterCriticalSection(fast_mutex)) + return 0; + return(1); #else /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock so that it returns 0 on success. In the operating system diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index d4329d16a62..e47e10217ec 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup; on duplicate key checking and foreign key checking */ extern ibool srv_locks_unsafe_for_binlog; #endif /* !UNIV_HOTBACKUP */ - +#ifdef __WIN__ +extern ibool srv_use_native_conditions; +#endif extern ulint srv_n_data_files; extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index f2ff83101ab..6aaab1cc7d7 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri extern my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef HAVE_WINDOWS_ATOMICS +#ifdef _WIN32 typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates on LONG variable */ #else diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index 5b8e656d8b2..5831b0bd0df 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -149,7 +149,7 @@ struct os_aio_slot_struct{ which pending aio operation was completed */ #ifdef WIN_ASYNC_IO - os_event_t event; /*!< event object we need in the + HANDLE handle; /*!< handle object we need in the OVERLAPPED struct */ OVERLAPPED control; /*!< Windows control block for the aio request */ @@ -183,7 +183,7 @@ struct os_aio_array_struct{ aio array outside the ibuf segment */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ #ifdef __WIN__ - os_native_event_t* native_events; + HANDLE* handles; /*!< Pointer to an array of OS native event handles where we copied the handles from slots, in the same @@ -270,10 +270,16 @@ os_get_os_version(void) } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { return(OS_WIN95); } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - if (os_info.dwMajorVersion <= 4) { - return(OS_WINNT); - } else { - return(OS_WIN2000); + switch(os_info.dwMajorVersion){ + case 3: + case 4: + return OS_WINNT; + case 5: + return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP; + case 6: + return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7; + default: + return OS_WIN7; } } else { ut_error; @@ -2350,13 +2356,12 @@ _os_file_read( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2371,40 +2376,11 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; - os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2433,9 +2409,7 @@ try_again: (ulong)n, (ulong)offset_high, (ulong)offset, (long)ret); #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif + retry = os_file_handle_error(NULL, "read"); if (retry) { @@ -2477,13 +2451,13 @@ os_file_read_no_error_handling( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2498,40 +2472,11 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; - os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2554,9 +2499,6 @@ try_again: return(TRUE); } #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error_no_exit(NULL, "read"); if (retry) { @@ -2609,14 +2551,13 @@ os_file_write( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ulint n_retries = 0; ulint err; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2629,50 +2570,12 @@ os_file_write( ut_ad(buf); ut_ad(n > 0); retry: - low = (DWORD) offset; - high = (DWORD) offset_high; os_mutex_enter(os_file_count_mutex); os_n_pending_writes++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: File pointer positioning to" - " file %s failed at\n" - "InnoDB: offset %lu %lu. Operating system" - " error number %lu.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) GetLastError()); - - return(FALSE); - } - - ret = WriteFile(file, buf, (DWORD) n, &len, NULL); + ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped); /* Always do fsync to reduce the probability that when the OS crashes, a database page is only partially physically written to disk. */ @@ -2683,10 +2586,6 @@ retry: } # endif /* UNIV_DO_FLUSH */ -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - os_mutex_enter(os_file_count_mutex); os_n_pending_writes--; os_mutex_exit(os_file_count_mutex); @@ -3090,7 +2989,7 @@ os_aio_array_create( array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); #ifdef __WIN__ - array->native_events = ut_malloc(n * sizeof(os_native_event_t)); + array->handles = ut_malloc(n * sizeof(HANDLE)); #endif for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i); @@ -3098,13 +2997,14 @@ os_aio_array_create( slot->pos = i; slot->reserved = FALSE; #ifdef WIN_ASYNC_IO - slot->event = os_event_create(NULL); + slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL); + over = &(slot->control); - over->hEvent = slot->event->handle; + over->hEvent = slot->handle; - *((array->native_events) + i) = over->hEvent; + *((array->handles) + i) = over->hEvent; #endif } @@ -3124,12 +3024,12 @@ os_aio_array_free( for (i = 0; i < array->n_slots; i++) { os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); - os_event_free(slot->event); + CloseHandle(slot->handle); } #endif /* WIN_ASYNC_IO */ #ifdef __WIN__ - ut_free(array->native_events); + ut_free(array->handles); #endif /* __WIN__ */ os_mutex_free(array->mutex); os_event_free(array->not_full); @@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown( for (i = 0; i < array->n_slots; i++) { - os_event_set((array->slots + i)->event); + SetEvent(array->slots[i].handle); + } } #endif @@ -3480,7 +3381,7 @@ found: control = &(slot->control); control->Offset = (DWORD)offset; control->OffsetHigh = (DWORD)offset_high; - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); @@ -3518,7 +3419,7 @@ os_aio_array_free_slot( } #ifdef WIN_ASYNC_IO - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); } @@ -3906,15 +3807,18 @@ os_aio_windows_handle( n = array->n_slots; if (array == os_aio_sync_array) { - os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); + WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE); i = pos; } else { srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); - i = os_event_wait_multiple(n, - (array->native_events) - ); + i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE); } + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); + } + + os_mutex_enter(array->mutex); slot = os_aio_array_get_nth_slot(array, i); diff --git a/storage/xtradb/os/os0sync.c b/storage/xtradb/os/os0sync.c index dba997927cb..75bd6d44c2e 100644 --- a/storage/xtradb/os/os0sync.c +++ b/storage/xtradb/os/os0sync.c @@ -38,6 +38,7 @@ Created 9/6/1995 Heikki Tuuri #include "ut0mem.h" #include "srv0start.h" +#include "srv0srv.h" /* Type definition for an operating system mutex struct */ struct os_mutex_struct{ @@ -74,11 +75,225 @@ UNIV_INTERN ulint os_event_count = 0; UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0; +/* The number of microsecnds in a second. */ +static const ulint MICROSECS_IN_A_SECOND = 1000000; + /* Because a mutex is embedded inside an event and there is an event embedded inside a mutex, on free, this generates a recursive call. This version of the free event function doesn't acquire the global lock */ static void os_event_free_internal(os_event_t event); +/* On Windows (Vista and later), load function pointers for condition +variable handling. Those functions are not available in prior versions, +so we have to use them via runtime loading, as long as we support XP. */ +static void os_cond_module_init(void); + +#ifdef __WIN__ +/* Prototypes and function pointers for condition variable functions */ +typedef VOID (WINAPI* InitializeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static InitializeConditionVariableProc initialize_condition_variable; + +typedef BOOL (WINAPI* SleepConditionVariableCSProc) + (PCONDITION_VARIABLE ConditionVariable, + PCRITICAL_SECTION CriticalSection, + DWORD dwMilliseconds); +static SleepConditionVariableCSProc sleep_condition_variable; + +typedef VOID (WINAPI* WakeAllConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeAllConditionVariableProc wake_all_condition_variable; + +typedef VOID (WINAPI* WakeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeConditionVariableProc wake_condition_variable; +#endif + +/*********************************************************//** +Initialitze condition variable */ +UNIV_INLINE +void +os_cond_init( +/*=========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(initialize_condition_variable != NULL); + initialize_condition_variable(cond); +#else + ut_a(pthread_cond_init(cond, NULL) == 0); +#endif +} + +/*********************************************************//** +Do a timed wait on condition variable. +@return TRUE if timed out, FALSE otherwise */ +UNIV_INLINE +ibool +os_cond_wait_timed( +/*===============*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex, /*!< in: fast mutex */ +#ifndef __WIN__ + const struct timespec* abstime /*!< in: timeout */ +#else + DWORD time_in_ms /*!< in: timeout in + milliseconds*/ +#endif /* !__WIN__ */ +) +{ +#ifdef __WIN__ + BOOL ret; + DWORD err; + + ut_a(sleep_condition_variable != NULL); + + ret = sleep_condition_variable(cond, mutex, time_in_ms); + + if (!ret) { + err = GetLastError(); + /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx, + "Condition variables are subject to spurious wakeups + (those not associated with an explicit wake) and stolen wakeups + (another thread manages to run before the woken thread)." + Check for both types of timeouts. + Conditions are checked by the caller.*/ + if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) { + return(TRUE); + } + } + + ut_a(ret); + + return(FALSE); +#else + int ret; + + ret = pthread_cond_timedwait(cond, mutex, abstime); + + switch (ret) { + case 0: + case ETIMEDOUT: + /* We play it safe by checking for EINTR even though + according to the POSIX documentation it can't return EINTR. */ + case EINTR: + break; + + default: + fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: " + "%d: abstime={%lu,%lu}\n", + ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec); + ut_error; + } + + return(ret == ETIMEDOUT); +#endif +} +/*********************************************************//** +Wait on condition variable */ +UNIV_INLINE +void +os_cond_wait( +/*=========*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex) /*!< in: fast mutex */ +{ + ut_a(cond); + ut_a(mutex); + +#ifdef __WIN__ + ut_a(sleep_condition_variable != NULL); + ut_a(sleep_condition_variable(cond, mutex, INFINITE)); +#else + ut_a(pthread_cond_wait(cond, mutex) == 0); +#endif +} + +/*********************************************************//** +Wakes all threads waiting for condition variable */ +UNIV_INLINE +void +os_cond_broadcast( +/*==============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_all_condition_variable != NULL); + wake_all_condition_variable(cond); +#else + ut_a(pthread_cond_broadcast(cond) == 0); +#endif +} + +/*********************************************************//** +Wakes one thread waiting for condition variable */ +UNIV_INLINE +void +os_cond_signal( +/*==========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_condition_variable != NULL); + wake_condition_variable(cond); +#else + ut_a(pthread_cond_signal(cond) == 0); +#endif +} + +/*********************************************************//** +Destroys condition variable */ +UNIV_INLINE +void +os_cond_destroy( +/*============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ +#ifdef __WIN__ + /* Do nothing */ +#else + ut_a(pthread_cond_destroy(cond) == 0); +#endif +} + +/*********************************************************//** +On Windows (Vista and later), load function pointers for condition variable +handling. Those functions are not available in prior versions, so we have to +use them via runtime loading, as long as we support XP. */ +static +void +os_cond_module_init(void) +/*=====================*/ +{ +#ifdef __WIN__ + HMODULE h_dll; + + + h_dll = GetModuleHandle("kernel32"); + + initialize_condition_variable = (InitializeConditionVariableProc) + GetProcAddress(h_dll, "InitializeConditionVariable"); + sleep_condition_variable = (SleepConditionVariableCSProc) + GetProcAddress(h_dll, "SleepConditionVariableCS"); + wake_all_condition_variable = (WakeAllConditionVariableProc) + GetProcAddress(h_dll, "WakeAllConditionVariable"); + wake_condition_variable = (WakeConditionVariableProc) + GetProcAddress(h_dll, "WakeConditionVariable"); + + /* When using native condition variables, check function pointers */ + ut_a(initialize_condition_variable); + ut_a(sleep_condition_variable); + ut_a(wake_all_condition_variable); + ut_a(wake_condition_variable); +#endif +} + /*********************************************************//** Initializes global event and OS 'slow' mutex lists. */ UNIV_INTERN @@ -92,6 +307,9 @@ os_sync_init(void) os_sync_mutex = NULL; os_sync_mutex_inited = FALSE; + /* Now for Windows only */ + os_cond_module_init(); + os_sync_mutex = os_mutex_create(NULL); os_sync_mutex_inited = TRUE; @@ -146,42 +364,45 @@ os_event_create( const char* name) /*!< in: the name of the event, if NULL the event is created without a name */ { -#ifdef __WIN__ - os_event_t event; - - event = ut_malloc(sizeof(struct os_event_struct)); - - event->handle = CreateEvent(NULL, /* No security attributes */ - TRUE, /* Manual reset */ - FALSE, /* Initial state nonsignaled */ - (LPCTSTR) name); - if (!event->handle) { - fprintf(stderr, - "InnoDB: Could not create a Windows event semaphore;" - " Windows error %lu\n", - (ulong) GetLastError()); - } -#else /* Unix */ os_event_t event; - UT_NOT_USED(name); +#ifdef __WIN__ + if(!srv_use_native_conditions) { - event = ut_malloc(sizeof(struct os_event_struct)); + event = ut_malloc(sizeof(struct os_event_struct)); - os_fast_mutex_init(&(event->os_mutex)); + event->handle = CreateEvent(NULL, + TRUE, + FALSE, + (LPCTSTR) name); + if (!event->handle) { + fprintf(stderr, + "InnoDB: Could not create a Windows event" + " semaphore; Windows error %lu\n", + (ulong) GetLastError()); + } + } else /* Windows with condition variables */ +#endif - ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); + { + UT_NOT_USED(name); - event->is_set = FALSE; + event = ut_malloc(sizeof(struct os_event_struct)); - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - event->signal_count = 1; -#endif /* __WIN__ */ + os_fast_mutex_init(&(event->os_mutex)); + + os_cond_init(&(event->cond_var)); + + event->is_set = FALSE; + + /* We return this value in os_event_reset(), which can then be + be used to pass to the os_event_wait_low(). The value of zero + is reserved in os_event_wait_low() for the case when the + caller does not want to pass any signal_count value. To + distinguish between the two cases we initialize signal_count + to 1 here. */ + event->signal_count = 1; + } /* The os_sync_mutex can be NULL because during startup an event can be created [ because it's embedded in the mutex/rwlock ] before @@ -211,10 +432,15 @@ os_event_set( /*=========*/ os_event_t event) /*!< in: event to set */ { -#ifdef __WIN__ ut_a(event); - ut_a(SetEvent(event->handle)); -#else + +#ifdef __WIN__ + if (!srv_use_native_conditions) { + ut_a(SetEvent(event->handle)); + return; + } +#endif + ut_a(event); os_fast_mutex_lock(&(event->os_mutex)); @@ -224,11 +450,10 @@ os_event_set( } else { event->is_set = TRUE; event->signal_count += 1; - ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); + os_cond_broadcast(&(event->cond_var)); } os_fast_mutex_unlock(&(event->os_mutex)); -#endif } /**********************************************************//** @@ -247,12 +472,14 @@ os_event_reset( { ib_int64_t ret = 0; -#ifdef __WIN__ ut_a(event); - ut_a(ResetEvent(event->handle)); -#else - ut_a(event); +#ifdef __WIN__ + if(!srv_use_native_conditions) { + ut_a(ResetEvent(event->handle)); + return(0); + } +#endif os_fast_mutex_lock(&(event->os_mutex)); @@ -264,7 +491,6 @@ os_event_reset( ret = event->signal_count; os_fast_mutex_unlock(&(event->os_mutex)); -#endif return(ret); } @@ -277,17 +503,20 @@ os_event_free_internal( os_event_t event) /*!< in: event to free */ { #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - /* This is to avoid freeing the mutex twice */ - os_fast_mutex_free(&(event->os_mutex)); - - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions) { + ut_a(event); + ut_a(CloseHandle(event->handle)); + } else #endif + { + ut_a(event); + + /* This is to avoid freeing the mutex twice */ + os_fast_mutex_free(&(event->os_mutex)); + + os_cond_destroy(&(event->cond_var)); + } + /* Remove from the list of events */ UT_LIST_REMOVE(os_event_list, os_event_list, event); @@ -306,16 +535,18 @@ os_event_free( os_event_t event) /*!< in: event to free */ { + ut_a(event); #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - os_fast_mutex_free(&(event->os_mutex)); - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions){ + ut_a(CloseHandle(event->handle)); + } else /*Windows with condition variables */ #endif + { + os_fast_mutex_free(&(event->os_mutex)); + + os_cond_destroy(&(event->cond_var)); + } + /* Remove from the list of events */ os_mutex_enter(os_sync_mutex); @@ -358,24 +589,25 @@ os_event_wait_low( returned by previous call of os_event_reset(). */ { -#ifdef __WIN__ - DWORD err; - - ut_a(event); - - UT_NOT_USED(reset_sig_count); - - /* Specify an infinite time limit for waiting */ - err = WaitForSingleObject(event->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } -#else + ib_int64_t old_signal_count; +#ifdef __WIN__ + if(!srv_use_native_conditions) { + DWORD err; + + ut_a(event); + + UT_NOT_USED(reset_sig_count); + + /* Specify an infinite wait */ + err = WaitForSingleObject(event->handle, INFINITE); + + ut_a(err == WAIT_OBJECT_0); + return; + } +#endif + os_fast_mutex_lock(&(event->os_mutex)); if (reset_sig_count) { @@ -399,13 +631,12 @@ os_event_wait_low( return; } - pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); + os_cond_wait(&(event->cond_var), &(event->os_mutex)); /* Solaris manual said that spurious wakeups may occur: we have to check if the event really has been signaled after we came here to wait */ } -#endif } /**********************************************************//** @@ -414,112 +645,112 @@ a timeout is exceeded. @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ UNIV_INTERN ulint -os_event_wait_time( -/*===============*/ - os_event_t event, /*!< in: event to wait */ - ulint wtime) /*!< in: timeout in microseconds, or - OS_SYNC_INFINITE_TIME */ +os_event_wait_time_low( +/*===================*/ + os_event_t event, /*!< in: event to wait */ + ulint time_in_usec, /*!< in: timeout in + microseconds, or + OS_SYNC_INFINITE_TIME */ + ib_int64_t reset_sig_count) /*!< in: zero or the value + returned by previous call of + os_event_reset(). */ + { + ibool timed_out = FALSE; + #ifdef __WIN__ - DWORD err; + DWORD time_in_ms; - ut_a(event); + if (!srv_use_native_conditions) { + DWORD err; - if (wtime != OS_SYNC_INFINITE_TIME) { - err = WaitForSingleObject(event->handle, (DWORD) wtime / 1000); - } else { - err = WaitForSingleObject(event->handle, INFINITE); - } + ut_a(event); - if (err == WAIT_OBJECT_0) { + if (time_in_usec != OS_SYNC_INFINITE_TIME) { + time_in_ms = time_in_usec / 1000; + err = WaitForSingleObject(event->handle, time_in_ms); + } else { + err = WaitForSingleObject(event->handle, INFINITE); + } - return(0); - } else if (err == WAIT_TIMEOUT) { + if (err == WAIT_OBJECT_0) { + return(0); + } else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) { + return(OS_SYNC_TIME_EXCEEDED); + } - return(OS_SYNC_TIME_EXCEEDED); - } else { ut_error; - return(1000000); /* dummy value to eliminate compiler warn. */ - } -#else - int err; - int ret = 0; - ulint tmp; - ib_int64_t old_count; - struct timeval tv_start; - struct timespec timeout; + /* Dummy value to eliminate compiler warning. */ + return(42); + } else { + ut_a(sleep_condition_variable != NULL); - if (wtime == OS_SYNC_INFINITE_TIME) { - os_event_wait(event); - return 0; - } - - /* Compute the absolute point in time at which to time out. */ - gettimeofday(&tv_start, NULL); - tmp = tv_start.tv_usec + wtime; - timeout.tv_sec = tv_start.tv_sec + (tmp / 1000000); - timeout.tv_nsec = (tmp % 1000000) * 1000; - - os_fast_mutex_lock(&(event->os_mutex)); - old_count = event->signal_count; - - for (;;) { - if (event->is_set == TRUE || event->signal_count != old_count) - break; - - err = pthread_cond_timedwait(&(event->cond_var), - &(event->os_mutex), &timeout); - if (err == ETIMEDOUT) { - ret = OS_SYNC_TIME_EXCEEDED; - break; + if (time_in_usec != OS_SYNC_INFINITE_TIME) { + time_in_ms = time_in_usec / 1000; + } else { + time_in_ms = INFINITE; } } +#else + struct timespec abstime; - os_fast_mutex_unlock(&(event->os_mutex)); + if (time_in_usec != OS_SYNC_INFINITE_TIME) { + struct timeval tv; + int ret; + ulint sec; + ulint usec; - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + ret = ut_usectime(&sec, &usec); + ut_a(ret == 0); - os_thread_exit(NULL); + tv.tv_sec = sec; + tv.tv_usec = usec; + + tv.tv_usec += time_in_usec; + + if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) { + tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND; + tv.tv_usec %= MICROSECS_IN_A_SECOND; + } + + abstime.tv_sec = tv.tv_sec; + abstime.tv_nsec = tv.tv_usec * 1000; + } else { + abstime.tv_nsec = 999999999; + abstime.tv_sec = (time_t) ULINT_MAX; } - return ret; -#endif -} + ut_a(abstime.tv_nsec <= 999999999); -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( -/*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array) - /*!< in: pointer to an array of event - handles */ -{ - DWORD index; +#endif /* __WIN__ */ - ut_a(native_event_array); - ut_a(n > 0); + os_fast_mutex_lock(&event->os_mutex); - index = WaitForMultipleObjects((DWORD) n, native_event_array, - FALSE, /* Wait for any 1 event */ - INFINITE); /* Infinite wait time - limit */ - ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */ - ut_a(index < WAIT_OBJECT_0 + n); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); + if (!reset_sig_count) { + reset_sig_count = event->signal_count; } - return(index - WAIT_OBJECT_0); + do { + if (event->is_set || event->signal_count != reset_sig_count) { + + break; + } + + timed_out = os_cond_wait_timed( + &event->cond_var, &event->os_mutex, +#ifndef __WIN__ + &abstime +#else + time_in_ms +#endif /* !__WIN__ */ + ); + + } while (!timed_out); + + os_fast_mutex_unlock(&event->os_mutex); + + return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0); } -#endif /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the @@ -532,15 +763,6 @@ os_mutex_create( const char* name) /*!< in: the name of the mutex, if NULL the mutex is created without a name */ { -#ifdef __WIN__ - HANDLE mutex; - os_mutex_t mutex_str; - - mutex = CreateMutex(NULL, /* No security attributes */ - FALSE, /* Initial state: no owner */ - (LPCTSTR) name); - ut_a(mutex); -#else os_fast_mutex_t* mutex; os_mutex_t mutex_str; @@ -549,7 +771,6 @@ os_mutex_create( mutex = ut_malloc(sizeof(os_fast_mutex_t)); os_fast_mutex_init(mutex); -#endif mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str->handle = mutex; @@ -580,25 +801,11 @@ os_mutex_enter( /*===========*/ os_mutex_t mutex) /*!< in: mutex to acquire */ { -#ifdef __WIN__ - DWORD err; - - ut_a(mutex); - - /* Specify infinite time limit for waiting */ - err = WaitForSingleObject(mutex->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - (mutex->count)++; - ut_a(mutex->count == 1); -#else os_fast_mutex_lock(mutex->handle); (mutex->count)++; ut_a(mutex->count == 1); -#endif } /**********************************************************//** @@ -614,11 +821,7 @@ os_mutex_exit( ut_a(mutex->count == 1); (mutex->count)--; -#ifdef __WIN__ - ut_a(ReleaseMutex(mutex->handle)); -#else os_fast_mutex_unlock(mutex->handle); -#endif } /**********************************************************//** @@ -647,15 +850,9 @@ os_mutex_free( os_mutex_exit(os_sync_mutex); } -#ifdef __WIN__ - ut_a(CloseHandle(mutex->handle)); - - ut_free(mutex); -#else os_fast_mutex_free(mutex->handle); ut_free(mutex->handle); ut_free(mutex); -#endif } /*********************************************************//** diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index f39d1b8a758..86a328e77c0 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; /** Place locks to records only i.e. do not use next-key locking except on duplicate key checking and foreign key checking */ UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; +#ifdef __WIN__ +/* Windows native condition variables. We use runtime loading / function +pointers, because they are not available on Windows Server 2003 and +Windows XP/2000. + +We use condition for events on Windows if possible, even if os_event +resembles Windows kernel event object well API-wise. The reason is +performance, kernel objects are heavyweights and WaitForSingleObject() is a +performance killer causing calling thread to context switch. Besides, Innodb +is preallocating large number (often millions) of os_events. With kernel event +objects it takes a big chunk out of non-paged pool, which is better suited +for tasks like IO than for storing idle event objects. */ +UNIV_INTERN ibool srv_use_native_conditions = FALSE; +#endif /* __WIN__ */ UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN char** srv_data_file_names = NULL; diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index cef045d72e1..e2ff49ce390 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1265,23 +1265,23 @@ innobase_start_or_create_for_mysql(void) case OS_WIN95: case OS_WIN31: case OS_WINNT: - /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, - and NT use simulated aio. In NT Windows provides async i/o, - but when run in conjunction with InnoDB Hot Backup, it seemed - to corrupt the data files. */ + srv_use_native_conditions = FALSE; + break; - os_aio_use_native_aio = FALSE; - break; - default: + case OS_WIN2000: + case OS_WINXP: + /* On 2000 and XP, async IO is available, but no condition variables. */ + os_aio_use_native_aio = TRUE; + srv_use_native_conditions = FALSE; + break; + + default: /* On Win 2000 and XP use async i/o */ - //os_aio_use_native_aio = TRUE; - os_aio_use_native_aio = FALSE; - fprintf(stderr, - "InnoDB: Windows native async i/o is disabled as default.\n" - "InnoDB: It is not applicable for the current" - " multi io threads implementation.\n"); - break; - } + /* Vista and later have both async IO and condition variables */ + os_aio_use_native_aio = TRUE; + srv_use_native_conditions = TRUE; + break; + } #endif if (srv_file_flush_method_str == NULL) { /* These are the default options */ @@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void) srv_unix_file_flush_method = SRV_UNIX_FSYNC; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; +#ifdef __WIN__ + srv_n_read_io_threads = srv_n_write_io_threads = 1; +#endif + #ifndef __WIN__ } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { srv_unix_file_flush_method = SRV_UNIX_FSYNC; @@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void) } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; os_aio_use_native_aio = FALSE; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, - "async_unbuffered")) { - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - os_aio_use_native_aio = TRUE; - srv_n_read_io_threads = srv_n_write_io_threads = 1; - fprintf(stderr, - "InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n" - "InnoDB: Windows native async i/o is enabled.\n" - "InnoDB: And io threads are restricted.\n"); + } #endif } else { fprintf(stderr, From 4171483b539555f50336d4304d931ef743cf7011 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 15:52:07 +0200 Subject: [PATCH 02/14] Backport Fix for Bug#24509 - 2048 file descriptor limit on windows needs increasing. The patch replaces the use of the POSIX I/O interfaces in mysys on Windows with the Win32 API calls (CreateFile, WriteFile, etc). The Windows HANDLE for the open file is stored in the my_file_info struct, along with a flag for append mode (because the Windows API does not support opening files in append mode in all cases) The default max open files has been increased to 16384 and can be increased further by setting --max-open-files= during the server start. Noteworthy benefit of this patch is that it removes limits from the table_cache size - allowing for more simultaneus users --- client/mysqlbinlog.cc | 2 +- client/mysqlslap.c | 2 +- client/readline.cc | 2 +- include/config-win.h | 3 +- include/my_dir.h | 4 + include/my_global.h | 41 +- include/my_sys.h | 23 +- libmysql/CMakeLists.txt | 1 + mysys/CMakeLists.txt | 2 +- mysys/default_modify.c | 8 +- mysys/mf_iocache.c | 3 - mysys/my_chsize.c | 19 +- mysys/my_create.c | 15 +- mysys/my_dup.c | 6 +- mysys/my_file.c | 1 + mysys/my_fopen.c | 48 +- mysys/my_fstream.c | 17 +- mysys/my_init.c | 3 +- mysys/my_lib.c | 26 +- mysys/my_lock.c | 137 ++++- mysys/my_mmap.c | 6 +- mysys/my_open.c | 227 +------ mysys/my_pread.c | 81 +-- mysys/my_quick.c | 22 +- mysys/my_read.c | 7 +- mysys/my_seek.c | 17 +- mysys/my_static.c | 2 +- mysys/my_sync.c | 2 +- mysys/my_winerr.c | 123 ++++ mysys/my_winfile.c | 681 +++++++++++++++++++++ mysys/my_write.c | 40 +- mysys/mysys_priv.h | 24 + sql/discover.cc | 2 +- sql/item_strfunc.cc | 6 +- storage/archive/ha_archive.cc | 6 +- storage/innobase/handler/ha_innodb.cc | 22 + storage/innodb_plugin/handler/ha_innodb.cc | 25 +- storage/maria/ma_extra.c | 3 +- storage/myisam/mi_locking.c | 8 +- storage/xtradb/handler/ha_innodb.cc | 25 +- 40 files changed, 1268 insertions(+), 424 deletions(-) create mode 100644 mysys/my_winerr.c create mode 100644 mysys/my_winfile.c diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index 5d458090352..59e89f02128 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -2057,7 +2057,7 @@ static Exit_status dump_local_log_entries(PRINT_EVENT_INFO *print_event_info, return ERROR_STOP; } #endif - if (init_io_cache(file, fileno(stdin), 0, READ_CACHE, (my_off_t) 0, + if (init_io_cache(file, my_fileno(stdin), 0, READ_CACHE, (my_off_t) 0, 0, MYF(MY_WME | MY_NABP | MY_DONT_CHECK_FILESIZE))) { error("Failed to init IO cache."); diff --git a/client/mysqlslap.c b/client/mysqlslap.c index c38380c7306..de1992e2d57 100644 --- a/client/mysqlslap.c +++ b/client/mysqlslap.c @@ -1223,7 +1223,7 @@ get_options(int *argc,char ***argv) if (opt_csv_str[0] == '-') { - csv_file= fileno(stdout); + csv_file= my_fileno(stdout); } else { diff --git a/client/readline.cc b/client/readline.cc index 4edccebef39..5293f7546e4 100644 --- a/client/readline.cc +++ b/client/readline.cc @@ -43,7 +43,7 @@ LINE_BUFFER *batch_readline_init(ulong max_size,FILE *file) if (!(line_buff=(LINE_BUFFER*) my_malloc(sizeof(*line_buff),MYF(MY_WME | MY_ZEROFILL)))) return 0; - if (init_line_buffer(line_buff,fileno(file),IO_SIZE,max_size)) + if (init_line_buffer(line_buff,my_fileno(file),IO_SIZE,max_size)) { my_free(line_buff,MYF(0)); return 0; diff --git a/include/config-win.h b/include/config-win.h index 6d12bb0e33f..3fd2c6c3a43 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -66,7 +66,6 @@ #endif /* File and lock constants */ -#define O_SHARE 0x1000 /* Open file in sharing mode */ #ifdef __BORLANDC__ #define F_RDLCK LK_NBLCK /* read lock */ #define F_WRLCK LK_NBRLCK /* write lock */ @@ -374,7 +373,7 @@ inline ulonglong double2ulonglong(double d) #define FN_DEVCHAR ':' #define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */ #define FN_NO_CASE_SENCE /* Files are not case-sensitive */ -#define OS_FILE_LIMIT 2048 +#define OS_FILE_LIMIT UINT_MAX #define DO_NOT_REMOVE_THREAD_WRAPPERS #define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V)) diff --git a/include/my_dir.h b/include/my_dir.h index 06509a3af19..90d708ac811 100644 --- a/include/my_dir.h +++ b/include/my_dir.h @@ -69,7 +69,11 @@ typedef struct my_stat #else +#if(_MSC_VER) +#define MY_STAT struct _stati64 /* 64 bit file size */ +#else #define MY_STAT struct stat /* Orginal struct have what we need */ +#endif #endif /* USE_MY_STAT_STRUCT */ diff --git a/include/my_global.h b/include/my_global.h index 8b71410dbf0..60c53ac937c 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -784,7 +784,41 @@ typedef SOCKET_SIZE_TYPE size_socket; #define FN_DIRSEP "/" /* Valid directory separators */ #define FN_ROOTDIR "/" #endif -#define MY_NFILE 64 /* This is only used to save filenames */ + +/* + MY_FILE_MIN is Windows speciality and is used to quickly detect + the mismatch of CRT and mysys file IO usage on Windows at runtime. + CRT file descriptors can be in the range 0-2047, whereas descriptors returned + by my_open() will start with 2048. If a file descriptor with value less then + MY_FILE_MIN is passed to mysys IO function, chances are it stemms from + open()/fileno() and not my_open()/my_fileno. + + For Posix, mysys functions are light wrappers around libc, and MY_FILE_MIN + is logically 0. +*/ + +#ifdef _WIN32 +#define MY_FILE_MIN 2048 +#else +#define MY_FILE_MIN 0 +#endif + +/* + MY_NFILE is the default size of my_file_info array. + + It is larger on Windows, because it all file handles are stored in my_file_info + Default size is 16384 and this should be enough for most cases.If it is not + enough, --max-open-files with larger value can be used. + + For Posix , my_file_info array is only used to store filenames for + error reporting and its size is not a limitation for number of open files. +*/ +#ifdef _WIN32 +#define MY_NFILE (16384 + MY_FILE_MIN) +#else +#define MY_NFILE 64 +#endif + #ifndef OS_FILE_LIMIT #define OS_FILE_LIMIT UINT_MAX #endif @@ -821,9 +855,8 @@ typedef SOCKET_SIZE_TYPE size_socket; /* Some things that this system doesn't have */ #define NO_HASH /* Not needed anymore */ -#ifdef __WIN__ -#define NO_DIR_LIBRARY /* Not standar dir-library */ -#define USE_MY_STAT_STRUCT /* For my_lib */ +#ifdef _WIN32 +#define NO_DIR_LIBRARY /* Not standard dir-library */ #endif /* Some defines of functions for portability */ diff --git a/include/my_sys.h b/include/my_sys.h index d391492983d..3368236694d 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -342,11 +342,12 @@ enum file_type struct st_my_file_info { - char * name; - enum file_type type; -#if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_t mutex; + char *name; +#ifdef _WIN32 + HANDLE fhandle; /* win32 file handle */ + int oflag; /* open flags, e.g O_APPEND */ #endif + enum file_type type; }; extern struct st_my_file_info *my_file_info; @@ -650,12 +651,12 @@ extern void *my_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); -#ifdef __WIN__ -extern int my_access(const char *path, int amode); -extern File my_sopen(const char *path, int oflag, int shflag, int pmode); +#ifdef _WIN32 +extern int my_access(const char *path, int amode); #else #define my_access access #endif + extern int check_if_legal_filename(const char *path); extern int check_if_legal_tablename(const char *path); @@ -666,6 +667,13 @@ extern int nt_share_delete(const char *name,myf MyFlags); #define my_delete_allow_opened(fname,flags) my_delete((fname),(flags)) #endif +#ifdef _WIN32 +/* Windows-only functions (CRT equivalents)*/ +extern File my_sopen(const char *path, int oflag, int shflag, int pmode); +extern HANDLE my_get_osfhandle(File fd); +extern void my_osmaperr(unsigned long last_error); +#endif + #ifndef TERMINATE extern void TERMINATE(FILE *file, uint flag); #endif @@ -675,6 +683,7 @@ extern FILE *my_fopen(const char *FileName,int Flags,myf MyFlags); extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags); extern FILE *my_freopen(const char *path, const char *mode, FILE *stream); extern int my_fclose(FILE *fd,myf MyFlags); +extern File my_fileno(FILE *fd); extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags); extern int my_chmod(const char *name, mode_t mode, myf my_flags); extern int my_sync(File fd, myf my_flags); diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt index 4ac0b9a01ee..7618a8367de 100755 --- a/libmysql/CMakeLists.txt +++ b/libmysql/CMakeLists.txt @@ -87,6 +87,7 @@ SET(CLIENT_SOURCES ../mysys/array.c ../strings/bchange.c ../strings/bmove.c ../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c ../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c ../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c + ../mysys/my_wincond.c ../mysys/my_winthread.c ../mysys/my_winfile.c ../mysys/my_winerr.c ../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c ../mysys/safemalloc.c ../mysys/sha1.c ../strings/str2int.c ../strings/str_alloc.c ../strings/strcend.c ../strings/strcont.c ../strings/strend.c diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 9ab19222caf..0cf10e6e993 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -38,7 +38,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_ my_mkdir.c my_mmap.c my_net.c my_once.c my_open.c my_pread.c my_pthread.c my_quick.c my_read.c my_realloc.c my_redel.c my_rename.c my_seek.c my_sleep.c my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c my_wincond.c - my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c stacktrace.c + my_winerr.c my_winfile.c my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c stacktrace.c rijndael.c safemalloc.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c lf_alloc-pin.c lf_dynarray.c lf_hash.c diff --git a/mysys/default_modify.c b/mysys/default_modify.c index 88df0122da2..ccbf47176a6 100644 --- a/mysys/default_modify.c +++ b/mysys/default_modify.c @@ -21,7 +21,7 @@ #define BUFF_SIZE 1024 #define RESERVE 1024 /* Extend buffer with this extent */ -#ifdef __WIN__ +#ifdef _WIN32 #define NEWLINE "\r\n" #define NEWLINE_LEN 2 #else @@ -78,7 +78,7 @@ int modify_defaults_file(const char *file_location, const char *option, DBUG_RETURN(2); /* my_fstat doesn't use the flag parameter */ - if (my_fstat(fileno(cnf_file), &file_stat, MYF(0))) + if (my_fstat(my_fileno(cnf_file), &file_stat, MYF(0))) goto malloc_err; if (option && option_value) @@ -96,7 +96,7 @@ int modify_defaults_file(const char *file_location, const char *option, NEWLINE_LEN + /* Space for newline */ RESERVE); /* Some additional space */ - buffer_size= (file_stat.st_size + + buffer_size= (uint)(file_stat.st_size + 1); /* The ending zero */ /* @@ -213,7 +213,7 @@ int modify_defaults_file(const char *file_location, const char *option, if (opt_applied) { /* Don't write the file if there are no changes to be made */ - if (my_chsize(fileno(cnf_file), (my_off_t) (dst_ptr - file_buffer), 0, + if (my_chsize(my_fileno(cnf_file), (my_off_t) (dst_ptr - file_buffer), 0, MYF(MY_WME)) || my_fseek(cnf_file, 0, MY_SEEK_SET, MYF(0)) || my_fwrite(cnf_file, (uchar*) file_buffer, (size_t) (dst_ptr - file_buffer), diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 3824669365f..1905437cc0a 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1659,9 +1659,6 @@ int my_block_write(register IO_CACHE *info, const uchar *Buffer, size_t Count, Buffer+=length; pos+= length; Count-= length; -#ifndef HAVE_PREAD - info->seek_not_done=1; -#endif } /* Check if we want to write inside the used part of the buffer.*/ diff --git a/mysys/my_chsize.c b/mysys/my_chsize.c index b1dbb22c687..b9013811b34 100644 --- a/mysys/my_chsize.c +++ b/mysys/my_chsize.c @@ -52,20 +52,13 @@ int my_chsize(File fd, my_off_t newlength, int filler, myf MyFlags) if (oldsize > newlength) { -#if defined(HAVE_SETFILEPOINTER) - /* This is for the moment only true on windows */ - long is_success; - HANDLE win_file= (HANDLE) _get_osfhandle(fd); - long length_low, length_high; - length_low= (long) (ulong) newlength; - length_high= (long) ((ulonglong) newlength >> 32); - is_success= SetFilePointer(win_file, length_low, &length_high, FILE_BEGIN); - if (is_success == -1 && (my_errno= GetLastError()) != NO_ERROR) +#ifdef _WIN32 + if (my_win_chsize(fd, newlength)) + { + my_errno= errno; goto err; - if (SetEndOfFile(win_file)) - DBUG_RETURN(0); - my_errno= GetLastError(); - goto err; + } + DBUG_RETURN(0); #elif defined(HAVE_FTRUNCATE) if (ftruncate(fd, (off_t) newlength)) { diff --git a/mysys/my_create.c b/mysys/my_create.c index 5c9a1e027d2..d0436276d03 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -18,7 +18,7 @@ #include "mysys_err.h" #include #include -#if defined(__WIN__) +#if defined(_WIN32) #include #endif @@ -41,16 +41,12 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, FileName, CreateFlags, access_flags, MyFlags)); #if !defined(NO_OPEN_3) - fd = open((char *) FileName, access_flags | O_CREAT, + fd= open((char *) FileName, access_flags | O_CREAT, CreateFlags ? CreateFlags : my_umask); -#elif defined(VMS) - fd = open((char *) FileName, access_flags | O_CREAT, 0, - "ctx=stm","ctx=bin"); -#elif defined(__WIN__) - fd= my_sopen((char *) FileName, access_flags | O_CREAT | O_BINARY, - SH_DENYNO, MY_S_IREAD | MY_S_IWRITE); +#elif defined(_WIN32) + fd= my_win_open(FileName, access_flags | O_CREAT); #else - fd = open(FileName, access_flags); + fd= open(FileName, access_flags); #endif if ((MyFlags & MY_SYNC_DIR) && (fd >=0) && @@ -71,6 +67,7 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, if (unlikely(fd >= 0 && rc < 0)) { int tmp= my_errno; + my_close(fd, MyFlags); my_delete(FileName, MyFlags); my_errno= tmp; } diff --git a/mysys/my_dup.c b/mysys/my_dup.c index 55f5e0c0099..5fdd6e9f364 100644 --- a/mysys/my_dup.c +++ b/mysys/my_dup.c @@ -29,7 +29,11 @@ File my_dup(File file, myf MyFlags) const char *filename; DBUG_ENTER("my_dup"); DBUG_PRINT("my",("file: %d MyFlags: %d", file, MyFlags)); - fd = dup(file); +#ifdef _WIN32 + fd= my_win_dup(file); +#else + fd= dup(file); +#endif filename= (((uint) file < my_file_limit) ? my_file_info[(int) file].name : "Unknown"); DBUG_RETURN(my_register_filename(fd, filename, FILE_BY_DUP, diff --git a/mysys/my_file.c b/mysys/my_file.c index 594f361437f..d8d51b91ab2 100644 --- a/mysys/my_file.c +++ b/mysys/my_file.c @@ -97,6 +97,7 @@ uint my_set_max_open_files(uint files) DBUG_ENTER("my_set_max_open_files"); DBUG_PRINT("enter",("files: %u my_file_limit: %u", files, my_file_limit)); + files+= MY_FILE_MIN; files= set_max_open_files(min(files, OS_FILE_LIMIT)); if (files <= MY_NFILE) DBUG_RETURN(files); diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c index a4b0c9f895d..ae631a59353 100644 --- a/mysys/my_fopen.c +++ b/mysys/my_fopen.c @@ -46,24 +46,14 @@ FILE *my_fopen(const char *filename, int flags, myf MyFlags) DBUG_ENTER("my_fopen"); DBUG_PRINT("my",("Name: '%s' flags: %d MyFlags: %d", filename, flags, MyFlags)); - /* - if we are not creating, then we need to use my_access to make sure - the file exists since Windows doesn't handle files like "com1.sym" - very well - */ -#ifdef __WIN__ - if (check_if_legal_filename(filename)) - { - errno= EACCES; - fd= 0; - } - else + + make_ftype(type,flags); + +#ifdef _WIN32 + fd= my_win_fopen(filename, type); +#else + fd= fopen(filename, type); #endif - { - make_ftype(type,flags); - fd = fopen(filename, type); - } - if (fd != 0) { /* @@ -71,18 +61,20 @@ FILE *my_fopen(const char *filename, int flags, myf MyFlags) on some OS (SUNOS). Actually the filename save isn't that important so we can ignore if this doesn't work. */ - if ((uint) fileno(fd) >= my_file_limit) + + int filedesc= my_fileno(fd); + if ((uint)filedesc >= my_file_limit) { thread_safe_increment(my_stream_opened,&THR_LOCK_open); DBUG_RETURN(fd); /* safeguard */ } pthread_mutex_lock(&THR_LOCK_open); - if ((my_file_info[fileno(fd)].name = (char*) + if ((my_file_info[filedesc].name= (char*) my_strdup(filename,MyFlags))) { my_stream_opened++; my_file_total_opened++; - my_file_info[fileno(fd)].type = STREAM_BY_FOPEN; + my_file_info[filedesc].type= STREAM_BY_FOPEN; pthread_mutex_unlock(&THR_LOCK_open); DBUG_PRINT("exit",("stream: 0x%lx", (long) fd)); DBUG_RETURN(fd); @@ -240,8 +232,13 @@ int my_fclose(FILE *fd, myf MyFlags) DBUG_PRINT("my",("stream: 0x%lx MyFlags: %d", (long) fd, MyFlags)); pthread_mutex_lock(&THR_LOCK_open); - file=fileno(fd); - if ((err = fclose(fd)) < 0) + file= my_fileno(fd); +#ifndef _WIN32 + err= fclose(fd); +#else + err= my_win_fclose(fd); +#endif + if(err < 0) { my_errno=errno; if (MyFlags & (MY_FAE | MY_WME)) @@ -272,7 +269,12 @@ FILE *my_fdopen(File Filedes, const char *name, int Flags, myf MyFlags) Filedes, Flags, MyFlags)); make_ftype(type,Flags); - if ((fd = fdopen(Filedes, type)) == 0) +#ifdef _WIN32 + fd= my_win_fdopen(Filedes, type); +#else + fd= fdopen(Filedes, type); +#endif + if (!fd) { my_errno=errno; if (MyFlags & (MY_FAE | MY_WME)) diff --git a/mysys/my_fstream.c b/mysys/my_fstream.c index f3b5418b906..2059e1a9f18 100644 --- a/mysys/my_fstream.c +++ b/mysys/my_fstream.c @@ -56,11 +56,11 @@ size_t my_fread(FILE *stream, uchar *Buffer, size_t Count, myf MyFlags) { if (ferror(stream)) my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), - my_filename(fileno(stream)),errno); + my_filename(my_fileno(stream)),errno); else if (MyFlags & (MY_NABP | MY_FNABP)) my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), - my_filename(fileno(stream)),errno); + my_filename(my_fileno(stream)),errno); } my_errno=errno ? errno : -1; if (ferror(stream) || MyFlags & (MY_NABP | MY_FNABP)) @@ -142,7 +142,7 @@ size_t my_fwrite(FILE *stream, const uchar *Buffer, size_t Count, myf MyFlags) if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) { my_error(EE_WRITE, MYF(ME_BELL+ME_WAITTANG), - my_filename(fileno(stream)),errno); + my_filename(my_fileno(stream)),errno); } writtenbytes= (size_t) -1; /* Return that we got error */ break; @@ -182,3 +182,14 @@ my_off_t my_ftell(FILE *stream, myf MyFlags __attribute__((unused))) DBUG_PRINT("exit",("ftell: %lu",(ulong) pos)); DBUG_RETURN((my_off_t) pos); } /* my_ftell */ + + +/* Get a File corresponding to the stream*/ +int my_fileno(FILE *f) +{ +#ifdef _WIN32 + return my_win_fileno(f); +#else + return fileno(f); +#endif +} diff --git a/mysys/my_init.c b/mysys/my_init.c index e7ab9ba7a1f..7f174cea1f0 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -274,6 +274,7 @@ void my_parameter_handler(const wchar_t * expression, const wchar_t * function, { DBUG_PRINT("my",("Expression: %s function: %s file: %s, line: %d", expression, function, file, line)); + __debugbreak(); } @@ -298,7 +299,7 @@ int handle_rtc_failure(int err_type, const char *file, int line, fprintf(stderr, " At %s:%d\n", file, line); va_end(args); (void) fflush(stderr); - + __debugbreak(); return 0; /* Error is handled */ } #pragma runtime_checks("", restore) diff --git a/mysys/my_lib.c b/mysys/my_lib.c index c18d14fb549..033f8789b49 100644 --- a/mysys/my_lib.c +++ b/mysys/my_lib.c @@ -35,8 +35,7 @@ # if defined(HAVE_NDIR_H) # include # endif -# if defined(__WIN__) -# include +# if defined(_WIN32) # ifdef __BORLANDC__ # include # endif @@ -92,7 +91,7 @@ static int comp_names(struct fileinfo *a, struct fileinfo *b) } /* comp_names */ -#if !defined(__WIN__) +#if !defined(_WIN32) MY_DIR *my_dir(const char *path, myf MyFlags) { @@ -507,19 +506,24 @@ error: DBUG_RETURN((MY_DIR *) NULL); } /* my_dir */ -#endif /* __WIN__ */ +#endif /* _WIN32 */ /**************************************************************************** ** File status ** Note that MY_STAT is assumed to be same as struct stat ****************************************************************************/ -int my_fstat(int Filedes, MY_STAT *stat_area, + +int my_fstat(File Filedes, MY_STAT *stat_area, myf MyFlags __attribute__((unused))) { DBUG_ENTER("my_fstat"); DBUG_PRINT("my",("fd: %d MyFlags: %d", Filedes, MyFlags)); +#ifdef _WIN32 + DBUG_RETURN(my_win_fstat(Filedes, stat_area)); +#else DBUG_RETURN(fstat(Filedes, (struct stat *) stat_area)); +#endif } @@ -531,11 +535,15 @@ MY_STAT *my_stat(const char *path, MY_STAT *stat_area, myf my_flags) (long) stat_area, my_flags)); if ((m_used= (stat_area == NULL))) - if (!(stat_area = (MY_STAT *) my_malloc(sizeof(MY_STAT), my_flags))) + if (!(stat_area= (MY_STAT *) my_malloc(sizeof(MY_STAT), my_flags))) goto error; - if (! stat((char *) path, (struct stat *) stat_area) ) - DBUG_RETURN(stat_area); - +#ifndef _WIN32 + if (! stat((char *) path, (struct stat *) stat_area) ) + DBUG_RETURN(stat_area); +#else + if (! my_win_stat(path, stat_area) ) + DBUG_RETURN(stat_area); +#endif DBUG_PRINT("error",("Got errno: %d from stat", errno)); my_errno= errno; if (m_used) /* Free if new area */ diff --git a/mysys/my_lock.c b/mysys/my_lock.c index 8450fcfc30a..6bbb177e4b6 100644 --- a/mysys/my_lock.c +++ b/mysys/my_lock.c @@ -22,13 +22,113 @@ #undef NO_ALARM_LOOP #endif #include -#ifdef __WIN__ -#include -#endif #ifdef __NETWARE__ #include #endif +#ifdef _WIN32 +#define WIN_LOCK_INFINITE -1 +#define WIN_LOCK_SLEEP_MILLIS 100 + +static int win_lock(File fd, int locktype, my_off_t start, my_off_t length, + int timeout_sec) +{ + LARGE_INTEGER liOffset,liLength; + DWORD dwFlags; + OVERLAPPED ov= {0}; + HANDLE hFile= (HANDLE)my_get_osfhandle(fd); + DWORD lastError= 0; + int i; + int timeout_millis= timeout_sec * 1000; + + DBUG_ENTER("win_lock"); + + liOffset.QuadPart= start; + liLength.QuadPart= length; + + ov.Offset= liOffset.LowPart; + ov.OffsetHigh= liOffset.HighPart; + + if (locktype == F_UNLCK) + { + if (UnlockFileEx(hFile, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + /* + For compatibility with fcntl implementation, ignore error, + if region was not locked + */ + if (GetLastError() == ERROR_NOT_LOCKED) + { + SetLastError(0); + DBUG_RETURN(0); + } + goto error; + } + else if (locktype == F_RDLCK) + /* read lock is mapped to a shared lock. */ + dwFlags= 0; + else + /* write lock is mapped to an exclusive lock. */ + dwFlags= LOCKFILE_EXCLUSIVE_LOCK; + + /* + Drop old lock first to avoid double locking. + During analyze of Bug#38133 (Myisamlog test fails on Windows) + I met the situation that the program myisamlog locked the file + exclusively, then additionally shared, then did one unlock, and + then blocked on an attempt to lock it exclusively again. + Unlocking before every lock fixed the problem. + Note that this introduces a race condition. When the application + wants to convert an exclusive lock into a shared one, it will now + first unlock the file and then lock it shared. A waiting exclusive + lock could step in here. For reasons described in Bug#38133 and + Bug#41124 (Server hangs on Windows with --external-locking after + INSERT...SELECT) and in the review thread at + http://lists.mysql.com/commits/60721 it seems to be the better + option than not to unlock here. + If one day someone notices a way how to do file lock type changes + on Windows without unlocking before taking the new lock, please + change this code accordingly to fix the race condition. + */ + if (!UnlockFileEx(hFile, 0, liLength.LowPart, liLength.HighPart, &ov) && + (GetLastError() != ERROR_NOT_LOCKED)) + goto error; + + if (timeout_sec == WIN_LOCK_INFINITE) + { + if (LockFileEx(hFile, dwFlags, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + goto error; + } + + dwFlags|= LOCKFILE_FAIL_IMMEDIATELY; + timeout_millis= timeout_sec * 1000; + /* Try lock in a loop, until the lock is acquired or timeout happens */ + for(i= 0; ;i+= WIN_LOCK_SLEEP_MILLIS) + { + if (LockFileEx(hFile, dwFlags, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + + if (GetLastError() != ERROR_LOCK_VIOLATION) + goto error; + + if (i >= timeout_millis) + break; + Sleep(WIN_LOCK_SLEEP_MILLIS); + } + + /* timeout */ + errno= EAGAIN; + DBUG_RETURN(-1); + +error: + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} +#endif + + + /* Lock a part of a file @@ -97,29 +197,16 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length, DBUG_RETURN(0); } } -#elif defined(HAVE_LOCKING) - /* Windows */ +#elif defined(_WIN32) { - my_bool error= FALSE; - pthread_mutex_lock(&my_file_info[fd].mutex); - if (MyFlags & MY_SEEK_NOT_DONE) - { - if( my_seek(fd,start,MY_SEEK_SET,MYF(MyFlags & ~MY_SEEK_NOT_DONE)) - == MY_FILEPOS_ERROR ) - { - /* - If my_seek fails my_errno will already contain an error code; - just unlock and return error code. - */ - DBUG_PRINT("error",("my_errno: %d (%d)",my_errno,errno)); - pthread_mutex_unlock(&my_file_info[fd].mutex); - DBUG_RETURN(-1); - } - } - error= locking(fd,locktype,(ulong) length) && errno != EINVAL; - pthread_mutex_unlock(&my_file_info[fd].mutex); - if (!error) - DBUG_RETURN(0); + int timeout_sec; + if (MyFlags & MY_NO_WAIT) + timeout_sec= 0; + else + timeout_sec= WIN_LOCK_INFINITE; + + if(win_lock(fd, locktype, start, length, timeout_sec) == 0) + DBUG_RETURN(0); } #else #if defined(HAVE_FCNTL) diff --git a/mysys/my_mmap.c b/mysys/my_mmap.c index 023a06fd896..303d8efaf30 100644 --- a/mysys/my_mmap.c +++ b/mysys/my_mmap.c @@ -27,17 +27,17 @@ int my_msync(int fd, void *addr, size_t len, int flags) return my_sync(fd, MYF(0)); } -#elif defined(__WIN__) +#elif defined(_WIN32) static SECURITY_ATTRIBUTES mmap_security_attributes= {sizeof(SECURITY_ATTRIBUTES), 0, TRUE}; void *my_mmap(void *addr, size_t len, int prot, - int flags, int fd, my_off_t offset) + int flags, File fd, my_off_t offset) { HANDLE hFileMap; LPVOID ptr; - HANDLE hFile= (HANDLE)_get_osfhandle(fd); + HANDLE hFile= (HANDLE)my_get_osfhandle(fd); if (hFile == INVALID_HANDLE_VALUE) return MAP_FAILED; diff --git a/mysys/my_open.c b/mysys/my_open.c index fe7f65c450b..b1788506832 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -17,9 +17,7 @@ #include "mysys_err.h" #include #include -#if defined(__WIN__) -#include -#endif + /* Open a file @@ -43,29 +41,8 @@ File my_open(const char *FileName, int Flags, myf MyFlags) DBUG_ENTER("my_open"); DBUG_PRINT("my",("Name: '%s' Flags: %d MyFlags: %d", FileName, Flags, MyFlags)); -#if defined(__WIN__) - /* - Check that we don't try to open or create a file name that may - cause problems for us in the future (like PRN) - */ - if (check_if_legal_filename(FileName)) - { - errno= EACCES; - DBUG_RETURN(my_register_filename(-1, FileName, FILE_BY_OPEN, - EE_FILENOTFOUND, MyFlags)); - } -#ifndef __WIN__ - if (Flags & O_SHARE) - fd = sopen((char *) FileName, (Flags & ~O_SHARE) | O_BINARY, SH_DENYNO, - MY_S_IREAD | MY_S_IWRITE); - else - fd = open((char *) FileName, Flags | O_BINARY, - MY_S_IREAD | MY_S_IWRITE); -#else - fd= my_sopen((char *) FileName, (Flags & ~O_SHARE) | O_BINARY, SH_DENYNO, - MY_S_IREAD | MY_S_IWRITE); -#endif - +#if defined(_WIN32) + fd= my_win_open(FileName, Flags); #elif !defined(NO_OPEN_3) fd = open(FileName, Flags, my_umask); /* Normal unix */ #else @@ -94,11 +71,14 @@ int my_close(File fd, myf MyFlags) DBUG_PRINT("my",("fd: %d MyFlags: %d",fd, MyFlags)); pthread_mutex_lock(&THR_LOCK_open); +#ifndef _WIN32 do { err= close(fd); } while (err == -1 && errno == EINTR); - +#else + err= my_win_close(fd); +#endif if (err) { DBUG_PRINT("error",("Got error %d on close",err)); @@ -109,9 +89,6 @@ int my_close(File fd, myf MyFlags) if ((uint) fd < my_file_limit && my_file_info[fd].type != UNOPEN) { my_free(my_file_info[fd].name, MYF(0)); -#if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_destroy(&my_file_info[fd].mutex); -#endif my_file_info[fd].type = UNOPEN; } my_file_opened--; @@ -141,16 +118,12 @@ File my_register_filename(File fd, const char *FileName, enum file_type type_of_file, uint error_message_number, myf MyFlags) { DBUG_ENTER("my_register_filename"); - if ((int) fd >= 0) + if ((int) fd >= MY_FILE_MIN) { if ((uint) fd >= my_file_limit) { -#if defined(THREAD) && !defined(HAVE_PREAD) - my_errno= EMFILE; -#else thread_safe_increment(my_file_opened,&THR_LOCK_open); DBUG_RETURN(fd); /* safeguard */ -#endif } else { @@ -160,9 +133,6 @@ File my_register_filename(File fd, const char *FileName, enum file_type my_file_opened++; my_file_total_opened++; my_file_info[fd].type = type_of_file; -#if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); -#endif pthread_mutex_unlock(&THR_LOCK_open); DBUG_PRINT("exit",("fd: %d",fd)); DBUG_RETURN(fd); @@ -187,188 +157,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type DBUG_RETURN(-1); } -#ifdef __WIN__ -extern void __cdecl _dosmaperr(unsigned long); - -/* - Open a file with sharing. Similar to _sopen() from libc, but allows managing - share delete on win32 - - SYNOPSIS - my_sopen() - path fully qualified file name - oflag operation flags - shflag share flag - pmode permission flags - - RETURN VALUE - File descriptor of opened file if success - -1 and sets errno if fails. -*/ - -File my_sopen(const char *path, int oflag, int shflag, int pmode) -{ - int fh; /* handle of opened file */ - int mask; - HANDLE osfh; /* OS handle of opened file */ - DWORD fileaccess; /* OS file access (requested) */ - DWORD fileshare; /* OS file sharing mode */ - DWORD filecreate; /* OS method of opening/creating */ - DWORD fileattrib; /* OS file attribute flags */ - SECURITY_ATTRIBUTES SecurityAttributes; - - SecurityAttributes.nLength= sizeof(SecurityAttributes); - SecurityAttributes.lpSecurityDescriptor= NULL; - SecurityAttributes.bInheritHandle= !(oflag & _O_NOINHERIT); - - /* - * decode the access flags - */ - switch (oflag & (_O_RDONLY | _O_WRONLY | _O_RDWR)) { - case _O_RDONLY: /* read access */ - fileaccess= GENERIC_READ; - break; - case _O_WRONLY: /* write access */ - fileaccess= GENERIC_WRITE; - break; - case _O_RDWR: /* read and write access */ - fileaccess= GENERIC_READ | GENERIC_WRITE; - break; - default: /* error, bad oflag */ - errno= EINVAL; - _doserrno= 0L; /* not an OS error */ - return -1; - } - - /* - * decode sharing flags - */ - switch (shflag) { - case _SH_DENYRW: /* exclusive access except delete */ - fileshare= FILE_SHARE_DELETE; - break; - case _SH_DENYWR: /* share read and delete access */ - fileshare= FILE_SHARE_READ | FILE_SHARE_DELETE; - break; - case _SH_DENYRD: /* share write and delete access */ - fileshare= FILE_SHARE_WRITE | FILE_SHARE_DELETE; - break; - case _SH_DENYNO: /* share read, write and delete access */ - fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; - break; - case _SH_DENYRWD: /* exclusive access */ - fileshare= 0L; - break; - case _SH_DENYWRD: /* share read access */ - fileshare= FILE_SHARE_READ; - break; - case _SH_DENYRDD: /* share write access */ - fileshare= FILE_SHARE_WRITE; - break; - case _SH_DENYDEL: /* share read and write access */ - fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE; - break; - default: /* error, bad shflag */ - errno= EINVAL; - _doserrno= 0L; /* not an OS error */ - return -1; - } - - /* - * decode open/create method flags - */ - switch (oflag & (_O_CREAT | _O_EXCL | _O_TRUNC)) { - case 0: - case _O_EXCL: /* ignore EXCL w/o CREAT */ - filecreate= OPEN_EXISTING; - break; - - case _O_CREAT: - filecreate= OPEN_ALWAYS; - break; - - case _O_CREAT | _O_EXCL: - case _O_CREAT | _O_TRUNC | _O_EXCL: - filecreate= CREATE_NEW; - break; - - case _O_TRUNC: - case _O_TRUNC | _O_EXCL: /* ignore EXCL w/o CREAT */ - filecreate= TRUNCATE_EXISTING; - break; - - case _O_CREAT | _O_TRUNC: - filecreate= CREATE_ALWAYS; - break; - - default: - /* this can't happen ... all cases are covered */ - errno= EINVAL; - _doserrno= 0L; - return -1; - } - - /* - * decode file attribute flags if _O_CREAT was specified - */ - fileattrib= FILE_ATTRIBUTE_NORMAL; /* default */ - if (oflag & _O_CREAT) - { - _umask((mask= _umask(0))); - - if (!((pmode & ~mask) & _S_IWRITE)) - fileattrib= FILE_ATTRIBUTE_READONLY; - } - - /* - * Set temporary file (delete-on-close) attribute if requested. - */ - if (oflag & _O_TEMPORARY) - { - fileattrib|= FILE_FLAG_DELETE_ON_CLOSE; - fileaccess|= DELETE; - } - - /* - * Set temporary file (delay-flush-to-disk) attribute if requested. - */ - if (oflag & _O_SHORT_LIVED) - fileattrib|= FILE_ATTRIBUTE_TEMPORARY; - - /* - * Set sequential or random access attribute if requested. - */ - if (oflag & _O_SEQUENTIAL) - fileattrib|= FILE_FLAG_SEQUENTIAL_SCAN; - else if (oflag & _O_RANDOM) - fileattrib|= FILE_FLAG_RANDOM_ACCESS; - - /* - * try to open/create the file - */ - if ((osfh= CreateFile(path, fileaccess, fileshare, &SecurityAttributes, - filecreate, fileattrib, NULL)) == INVALID_HANDLE_VALUE) - { - /* - * OS call to open/create file failed! map the error, release - * the lock, and return -1. note that it's not necessary to - * call _free_osfhnd (it hasn't been used yet). - */ - _dosmaperr(GetLastError()); /* map error */ - return -1; /* return error to caller */ - } - - if ((fh= _open_osfhandle((intptr_t)osfh, - oflag & (_O_APPEND | _O_RDONLY | _O_TEXT))) == -1) - { - _dosmaperr(GetLastError()); /* map error */ - CloseHandle(osfh); - } - - return fh; /* return handle */ -} -#endif /* __WIN__ */ #ifdef EXTRA_DEBUG diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 836f5a92963..6421a2da601 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -18,7 +18,7 @@ #include "my_base.h" #include #include -#ifdef HAVE_PREAD +#ifndef _WIN32 #include #endif @@ -48,9 +48,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, { size_t readbytes; int error= 0; -#ifndef HAVE_PREAD - int save_errno; -#endif + #ifndef DBUG_OFF char llbuf[22]; DBUG_ENTER("my_pread"); @@ -61,20 +59,15 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, for (;;) { errno= 0; /* Linux, Windows don't reset this on EOF/success */ -#ifndef HAVE_PREAD - pthread_mutex_lock(&my_file_info[Filedes].mutex); - readbytes= (uint) -1; - error= (lseek(Filedes, offset, MY_SEEK_SET) == (my_off_t) -1 || - (readbytes= read(Filedes, Buffer, (uint) Count)) != Count); - save_errno= errno; - pthread_mutex_unlock(&my_file_info[Filedes].mutex); +#ifdef _WIN32 + readbytes= my_win_pread(Filedes, Buffer, Count, offset); +#else + readbytes= pread(Filedes, Buffer, Count, offset); +#endif + error = (readbytes != Count); + if (error) { - errno= save_errno; -#else - if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count))) - { -#endif my_errno= errno ? errno : -1; if (errno == 0 || (readbytes != (size_t) -1 && (MyFlags & (MY_NABP | MY_FNABP)))) @@ -91,19 +84,19 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, #endif if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) { - if (readbytes == (size_t) -1) - my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), - my_filename(Filedes),my_errno); - else if (MyFlags & (MY_NABP | MY_FNABP)) - my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), - my_filename(Filedes),my_errno); + if (readbytes == (size_t) -1) + my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), + my_filename(Filedes),my_errno); + else if (MyFlags & (MY_NABP | MY_FNABP)) + my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), + my_filename(Filedes),my_errno); } if (readbytes == (size_t) -1 || (MyFlags & (MY_FNABP | MY_NABP))) - DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ + DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ } if (MyFlags & (MY_NABP | MY_FNABP)) - DBUG_RETURN(0); /* Read went ok; Return 0 */ - DBUG_RETURN(readbytes); /* purecov: inspected */ + DBUG_RETURN(0); /* Read went ok; Return 0 */ + DBUG_RETURN(readbytes); /* purecov: inspected */ } } /* my_pread */ @@ -132,7 +125,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, my_off_t offset, myf MyFlags) { - size_t writenbytes, written; + size_t writtenbytes, written; uint errors; #ifndef DBUG_OFF char llbuf[22]; @@ -146,28 +139,22 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, for (;;) { -#ifndef HAVE_PREAD - int error; - writenbytes= (size_t) -1; - pthread_mutex_lock(&my_file_info[Filedes].mutex); - error= (lseek(Filedes, offset, MY_SEEK_SET) != (my_off_t) -1 && - (writenbytes = write(Filedes, Buffer, (uint) Count)) == Count); - pthread_mutex_unlock(&my_file_info[Filedes].mutex); - if (error) - break; +#ifdef _WIN32 + writtenbytes= my_win_pwrite(Filedes, Buffer, Count,offset); #else - if ((writenbytes= pwrite(Filedes, Buffer, Count,offset)) == Count) + writtenbytes= pwrite(Filedes, Buffer, Count, offset); +#endif + if (writtenbytes == Count) break; my_errno= errno; -#endif - if (writenbytes != (size_t) -1) + if (writtenbytes != (size_t) -1) { /* Safegueard */ - written+=writenbytes; - Buffer+=writenbytes; - Count-=writenbytes; - offset+=writenbytes; + written+=writtenbytes; + Buffer+=writtenbytes; + Count-=writtenbytes; + offset+=writtenbytes; } - DBUG_PRINT("error",("Write only %u bytes", (uint) writenbytes)); + DBUG_PRINT("error",("Write only %u bytes", (uint) writtenbytes)); #ifndef NO_BACKGROUND #ifdef THREAD if (my_thread_var->abort) @@ -180,15 +167,15 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, errors++; continue; } - if ((writenbytes && writenbytes != (size_t) -1) || my_errno == EINTR) + if ((writtenbytes && writtenbytes != (size_t) -1) || my_errno == EINTR) continue; /* Retry */ #endif if (MyFlags & (MY_NABP | MY_FNABP)) { if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) { - my_error(EE_WRITE, MYF(ME_BELL | ME_WAITTANG), - my_filename(Filedes),my_errno); + my_error(EE_WRITE, MYF(ME_BELL | ME_WAITTANG), + my_filename(Filedes),my_errno); } DBUG_RETURN(MY_FILE_ERROR); /* Error on read */ } @@ -198,5 +185,5 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, DBUG_EXECUTE_IF("check", my_seek(Filedes, -1, SEEK_SET, MYF(0));); if (MyFlags & (MY_NABP | MY_FNABP)) DBUG_RETURN(0); /* Want only errors */ - DBUG_RETURN(writenbytes+written); /* purecov: inspected */ + DBUG_RETURN(writtenbytes+written); /* purecov: inspected */ } /* my_pwrite */ diff --git a/mysys/my_quick.c b/mysys/my_quick.c index 0ba20a5bdee..b93e7e17224 100644 --- a/mysys/my_quick.c +++ b/mysys/my_quick.c @@ -19,11 +19,19 @@ #include "my_nosys.h" +#ifdef _WIN32 +extern size_t my_win_read(File Filedes,uchar *Buffer,size_t Count); +#endif + size_t my_quick_read(File Filedes,uchar *Buffer,size_t Count,myf MyFlags) { size_t readbytes; - - if ((readbytes = read(Filedes, Buffer, (uint) Count)) != Count) +#ifdef _WIN32 + readbytes= my_win_read(Filedes, Buffer, Count); +#else + readbytes= read(Filedes, Buffer, Count); +#endif + if(readbytes != Count) { #ifndef DBUG_OFF if ((readbytes == 0 || readbytes == (size_t) -1) && errno == EINTR) @@ -40,8 +48,13 @@ size_t my_quick_read(File Filedes,uchar *Buffer,size_t Count,myf MyFlags) } -size_t my_quick_write(File Filedes,const uchar *Buffer,size_t Count) + +size_t my_quick_write(File Filedes, const uchar *Buffer, size_t Count) { +#ifdef _WIN32 + return my_win_write(Filedes, Buffer, Count); +#else + #ifndef DBUG_OFF size_t writtenbytes; #endif @@ -50,7 +63,7 @@ size_t my_quick_write(File Filedes,const uchar *Buffer,size_t Count) #ifndef DBUG_OFF writtenbytes = #endif - (size_t) write(Filedes,Buffer, (uint) Count)) != Count) + (size_t) write(Filedes,Buffer,Count)) != Count) { #ifndef DBUG_OFF if ((writtenbytes == 0 || writtenbytes == (size_t) -1) && errno == EINTR) @@ -64,4 +77,5 @@ size_t my_quick_write(File Filedes,const uchar *Buffer,size_t Count) return (size_t) -1; } return 0; +#endif } diff --git a/mysys/my_read.c b/mysys/my_read.c index 25ffe73d813..9c76193aa63 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -44,7 +44,12 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) for (;;) { errno= 0; /* Linux, Windows don't reset this on EOF/success */ - if ((readbytes= read(Filedes, Buffer, (uint) Count)) != Count) +#ifdef _WIN32 + readbytes= my_win_read(Filedes, Buffer, Count); +#else + readbytes= my_read(Fildes, Buffer, Count); +#endif + if (readbytes != Count) { my_errno= errno; if (errno == 0 || (readbytes != (size_t) -1 && diff --git a/mysys/my_seek.c b/mysys/my_seek.c index d186d56869a..24941517487 100644 --- a/mysys/my_seek.c +++ b/mysys/my_seek.c @@ -56,16 +56,11 @@ my_off_t my_seek(File fd, my_off_t pos, int whence, myf MyFlags) Make sure we are using a valid file descriptor! */ DBUG_ASSERT(fd != -1); -#if defined(THREAD) && !defined(HAVE_PREAD) - if (MyFlags & MY_THREADSAFE) - { - pthread_mutex_lock(&my_file_info[fd].mutex); - newpos= lseek(fd, pos, whence); - pthread_mutex_unlock(&my_file_info[fd].mutex); - } - else +#ifdef _WIN32 + newpos= my_win_lseek(fd, pos, whence); +#else + newpos= lseek(fd, pos, whence); #endif - newpos= lseek(fd, pos, whence); if (newpos == (os_off_t) -1) { my_errno= errno; @@ -91,7 +86,9 @@ my_off_t my_tell(File fd, myf MyFlags) DBUG_ENTER("my_tell"); DBUG_PRINT("my",("fd: %d MyFlags: %d",fd, MyFlags)); DBUG_ASSERT(fd >= 0); -#ifdef HAVE_TELL +#ifdef _WIN32 + pos= my_seek(fd, 0, MY_SEEK_CUR,0); +#elif defined(HAVE_TELL) pos=tell(fd); #else pos=lseek(fd, 0L, MY_SEEK_CUR); diff --git a/mysys/my_static.c b/mysys/my_static.c index 08653d03d21..629ee26efb9 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -35,7 +35,7 @@ int NEAR my_umask=0664, NEAR my_umask_dir=0777; #ifndef THREAD int NEAR my_errno=0; #endif -struct st_my_file_info my_file_info_default[MY_NFILE]= {{0,UNOPEN}}; +struct st_my_file_info my_file_info_default[MY_NFILE]; uint my_file_limit= MY_NFILE; struct st_my_file_info *my_file_info= my_file_info_default; diff --git a/mysys/my_sync.c b/mysys/my_sync.c index dae20b0163e..9c5fbce7ab7 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -71,7 +71,7 @@ int my_sync(File fd, myf my_flags) if (res == -1 && errno == ENOLCK) res= 0; /* Result Bug in Old FreeBSD */ #elif defined(__WIN__) - res= _commit(fd); + res= my_win_fsync(fd); #else #error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ diff --git a/mysys/my_winerr.c b/mysys/my_winerr.c new file mode 100644 index 00000000000..534078b6737 --- /dev/null +++ b/mysys/my_winerr.c @@ -0,0 +1,123 @@ +/* Copyright (C) 2008 MySQL AB + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Convert Windows API error (GetLastError() to Posix equivalent (errno) + The exported function my_osmaperr() is modelled after and borrows + heavily from undocumented _dosmaperr()(found of the static Microsoft C runtime). +*/ + +#include +#include + + +struct errentry +{ + unsigned long oscode; /* OS return value */ + int sysv_errno; /* System V error code */ +}; + +static struct errentry errtable[]= { + { ERROR_INVALID_FUNCTION, EINVAL }, /* 1 */ + { ERROR_FILE_NOT_FOUND, ENOENT }, /* 2 */ + { ERROR_PATH_NOT_FOUND, ENOENT }, /* 3 */ + { ERROR_TOO_MANY_OPEN_FILES, EMFILE }, /* 4 */ + { ERROR_ACCESS_DENIED, EACCES }, /* 5 */ + { ERROR_INVALID_HANDLE, EBADF }, /* 6 */ + { ERROR_ARENA_TRASHED, ENOMEM }, /* 7 */ + { ERROR_NOT_ENOUGH_MEMORY, ENOMEM }, /* 8 */ + { ERROR_INVALID_BLOCK, ENOMEM }, /* 9 */ + { ERROR_BAD_ENVIRONMENT, E2BIG }, /* 10 */ + { ERROR_BAD_FORMAT, ENOEXEC }, /* 11 */ + { ERROR_INVALID_ACCESS, EINVAL }, /* 12 */ + { ERROR_INVALID_DATA, EINVAL }, /* 13 */ + { ERROR_INVALID_DRIVE, ENOENT }, /* 15 */ + { ERROR_CURRENT_DIRECTORY, EACCES }, /* 16 */ + { ERROR_NOT_SAME_DEVICE, EXDEV }, /* 17 */ + { ERROR_NO_MORE_FILES, ENOENT }, /* 18 */ + { ERROR_LOCK_VIOLATION, EACCES }, /* 33 */ + { ERROR_BAD_NETPATH, ENOENT }, /* 53 */ + { ERROR_NETWORK_ACCESS_DENIED, EACCES }, /* 65 */ + { ERROR_BAD_NET_NAME, ENOENT }, /* 67 */ + { ERROR_FILE_EXISTS, EEXIST }, /* 80 */ + { ERROR_CANNOT_MAKE, EACCES }, /* 82 */ + { ERROR_FAIL_I24, EACCES }, /* 83 */ + { ERROR_INVALID_PARAMETER, EINVAL }, /* 87 */ + { ERROR_NO_PROC_SLOTS, EAGAIN }, /* 89 */ + { ERROR_DRIVE_LOCKED, EACCES }, /* 108 */ + { ERROR_BROKEN_PIPE, EPIPE }, /* 109 */ + { ERROR_DISK_FULL, ENOSPC }, /* 112 */ + { ERROR_INVALID_TARGET_HANDLE, EBADF }, /* 114 */ + { ERROR_INVALID_HANDLE, EINVAL }, /* 124 */ + { ERROR_WAIT_NO_CHILDREN, ECHILD }, /* 128 */ + { ERROR_CHILD_NOT_COMPLETE, ECHILD }, /* 129 */ + { ERROR_DIRECT_ACCESS_HANDLE, EBADF }, /* 130 */ + { ERROR_NEGATIVE_SEEK, EINVAL }, /* 131 */ + { ERROR_SEEK_ON_DEVICE, EACCES }, /* 132 */ + { ERROR_DIR_NOT_EMPTY, ENOTEMPTY }, /* 145 */ + { ERROR_NOT_LOCKED, EACCES }, /* 158 */ + { ERROR_BAD_PATHNAME, ENOENT }, /* 161 */ + { ERROR_MAX_THRDS_REACHED, EAGAIN }, /* 164 */ + { ERROR_LOCK_FAILED, EACCES }, /* 167 */ + { ERROR_ALREADY_EXISTS, EEXIST }, /* 183 */ + { ERROR_FILENAME_EXCED_RANGE, ENOENT }, /* 206 */ + { ERROR_NESTING_NOT_ALLOWED, EAGAIN }, /* 215 */ + { ERROR_NOT_ENOUGH_QUOTA, ENOMEM } /* 1816 */ +}; + +/* size of the table */ +#define ERRTABLESIZE (sizeof(errtable)/sizeof(errtable[0])) + +/* The following two constants must be the minimum and maximum +values in the (contiguous) range of Exec Failure errors. */ +#define MIN_EXEC_ERROR ERROR_INVALID_STARTING_CODESEG +#define MAX_EXEC_ERROR ERROR_INFLOOP_IN_RELOC_CHAIN + +/* These are the low and high value in the range of errors that are +access violations */ +#define MIN_EACCES_RANGE ERROR_WRITE_PROTECT +#define MAX_EACCES_RANGE ERROR_SHARING_BUFFER_EXCEEDED + + +static int get_errno_from_oserr(unsigned long oserrno) +{ + int i; + + /* check the table for the OS error code */ + for (i= 0; i < ERRTABLESIZE; ++i) + { + if (oserrno == errtable[i].oscode) + { + return errtable[i].sysv_errno; + } + } + + /* The error code wasn't in the table. We check for a range of */ + /* EACCES errors or exec failure errors (ENOEXEC). Otherwise */ + /* EINVAL is returned. */ + + if (oserrno >= MIN_EACCES_RANGE && oserrno <= MAX_EACCES_RANGE) + return EACCES; + else if (oserrno >= MIN_EXEC_ERROR && oserrno <= MAX_EXEC_ERROR) + return ENOEXEC; + else + return EINVAL; +} + +/* Set errno corresponsing to GetLastError() value */ +void my_osmaperr ( unsigned long oserrno) +{ + errno= get_errno_from_oserr(oserrno); +} diff --git a/mysys/my_winfile.c b/mysys/my_winfile.c new file mode 100644 index 00000000000..f63c35ba47b --- /dev/null +++ b/mysys/my_winfile.c @@ -0,0 +1,681 @@ +/* Copyright (C) 2008 MySQL AB, 2008-2009 Sun Microsystems, Inc + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + The purpose of this file is to provide implementation of file IO routines on + Windows that can be thought as drop-in replacement for corresponding C runtime + functionality. + + Compared to Windows CRT, this one + - does not have the same file descriptor + limitation (default is 16384 and can be increased further, whereas CRT poses + a hard limit of 2048 file descriptors) + - the file operations are not serialized + - positional IO pread/pwrite is ported here. + - no text mode for files, all IO is "binary" + + Naming convention: + All routines are prefixed with my_win_, e.g Posix open() is implemented with + my_win_open() + + Implemented are + - POSIX routines(e.g open, read, lseek ...) + - Some ANSI C stream routines (fopen, fdopen, fileno, fclose) + - Windows CRT equvalients (my_get_osfhandle, open_osfhandle) + + Worth to note: + - File descriptors used here are located in a range that is not compatible + with CRT on purpose. Attempt to use a file descriptor from Windows CRT library + range in my_win_* function will be punished with DBUG_ASSERT() + + - File streams (FILE *) are actually from the C runtime. The routines provided + here are useful only in scernarios that use low-level IO with my_win_fileno() +*/ + +#ifdef _WIN32 + +#include "mysys_priv.h" +#include +#include + +/* Associates a file descriptor with an existing operating-system file handle.*/ +File my_open_osfhandle(HANDLE handle, int oflag) +{ + int offset= -1; + uint i; + DBUG_ENTER("my_open_osfhandle"); + + pthread_mutex_lock(&THR_LOCK_open); + for(i= MY_FILE_MIN; i < my_file_limit;i++) + { + if(my_file_info[i].fhandle == 0) + { + struct st_my_file_info *finfo= &(my_file_info[i]); + finfo->type= FILE_BY_OPEN; + finfo->fhandle= handle; + finfo->oflag= oflag; + offset= i; + break; + } + } + pthread_mutex_unlock(&THR_LOCK_open); + if(offset == -1) + errno= EMFILE; /* to many file handles open */ + DBUG_RETURN(offset); +} + + +static void invalidate_fd(File fd) +{ + DBUG_ENTER("invalidate_fd"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + my_file_info[fd].fhandle= 0; + DBUG_VOID_RETURN; +} + + +/* Get Windows handle for a file descriptor */ +HANDLE my_get_osfhandle(File fd) +{ + DBUG_ENTER("my_get_osfhandle"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + DBUG_RETURN(my_file_info[fd].fhandle); +} + + +static int my_get_open_flags(File fd) +{ + DBUG_ENTER("my_get_open_flags"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + DBUG_RETURN(my_file_info[fd].oflag); +} + + +/* + Open a file with sharing. Similar to _sopen() from libc, but allows managing + share delete on win32 + + SYNOPSIS + my_win_sopen() + path file name + oflag operation flags + shflag share flag + pmode permission flags + + RETURN VALUE + File descriptor of opened file if success + -1 and sets errno if fails. +*/ + +File my_win_sopen(const char *path, int oflag, int shflag, int pmode) +{ + int fh; /* handle of opened file */ + int mask; + HANDLE osfh; /* OS handle of opened file */ + DWORD fileaccess; /* OS file access (requested) */ + DWORD fileshare; /* OS file sharing mode */ + DWORD filecreate; /* OS method of opening/creating */ + DWORD fileattrib; /* OS file attribute flags */ + SECURITY_ATTRIBUTES SecurityAttributes; + + DBUG_ENTER("my_win_sopen"); + + if (check_if_legal_filename(path)) + { + errno= EACCES; + DBUG_RETURN(-1); + } + SecurityAttributes.nLength= sizeof(SecurityAttributes); + SecurityAttributes.lpSecurityDescriptor= NULL; + SecurityAttributes.bInheritHandle= !(oflag & _O_NOINHERIT); + + /* decode the access flags */ + switch (oflag & (_O_RDONLY | _O_WRONLY | _O_RDWR)) { + case _O_RDONLY: /* read access */ + fileaccess= GENERIC_READ; + break; + case _O_WRONLY: /* write access */ + fileaccess= GENERIC_WRITE; + break; + case _O_RDWR: /* read and write access */ + fileaccess= GENERIC_READ | GENERIC_WRITE; + break; + default: /* error, bad oflag */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode sharing flags */ + switch (shflag) { + case _SH_DENYRW: /* exclusive access except delete */ + fileshare= FILE_SHARE_DELETE; + break; + case _SH_DENYWR: /* share read and delete access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_DELETE; + break; + case _SH_DENYRD: /* share write and delete access */ + fileshare= FILE_SHARE_WRITE | FILE_SHARE_DELETE; + break; + case _SH_DENYNO: /* share read, write and delete access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; + break; + case _SH_DENYRWD: /* exclusive access */ + fileshare= 0L; + break; + case _SH_DENYWRD: /* share read access */ + fileshare= FILE_SHARE_READ; + break; + case _SH_DENYRDD: /* share write access */ + fileshare= FILE_SHARE_WRITE; + break; + case _SH_DENYDEL: /* share read and write access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE; + break; + default: /* error, bad shflag */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode open/create method flags */ + switch (oflag & (_O_CREAT | _O_EXCL | _O_TRUNC)) { + case 0: + case _O_EXCL: /* ignore EXCL w/o CREAT */ + filecreate= OPEN_EXISTING; + break; + + case _O_CREAT: + filecreate= OPEN_ALWAYS; + break; + + case _O_CREAT | _O_EXCL: + case _O_CREAT | _O_TRUNC | _O_EXCL: + filecreate= CREATE_NEW; + break; + + case _O_TRUNC: + case _O_TRUNC | _O_EXCL: /* ignore EXCL w/o CREAT */ + filecreate= TRUNCATE_EXISTING; + break; + + case _O_CREAT | _O_TRUNC: + filecreate= CREATE_ALWAYS; + break; + + default: + /* this can't happen ... all cases are covered */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode file attribute flags if _O_CREAT was specified */ + fileattrib= FILE_ATTRIBUTE_NORMAL; /* default */ + if (oflag & _O_CREAT) + { + _umask((mask= _umask(0))); + + if (!((pmode & ~mask) & _S_IWRITE)) + fileattrib= FILE_ATTRIBUTE_READONLY; + } + + /* Set temporary file (delete-on-close) attribute if requested. */ + if (oflag & _O_TEMPORARY) + { + fileattrib|= FILE_FLAG_DELETE_ON_CLOSE; + fileaccess|= DELETE; + } + + /* Set temporary file (delay-flush-to-disk) attribute if requested.*/ + if (oflag & _O_SHORT_LIVED) + fileattrib|= FILE_ATTRIBUTE_TEMPORARY; + + /* Set sequential or random access attribute if requested. */ + if (oflag & _O_SEQUENTIAL) + fileattrib|= FILE_FLAG_SEQUENTIAL_SCAN; + else if (oflag & _O_RANDOM) + fileattrib|= FILE_FLAG_RANDOM_ACCESS; + + /* try to open/create the file */ + if ((osfh= CreateFile(path, fileaccess, fileshare, &SecurityAttributes, + filecreate, fileattrib, NULL)) == INVALID_HANDLE_VALUE) + { + /* + OS call to open/create file failed! map the error, release + the lock, and return -1. note that it's not necessary to + call _free_osfhnd (it hasn't been used yet). + */ + my_osmaperr(GetLastError()); /* map error */ + DBUG_RETURN(-1); /* return error to caller */ + } + + if ((fh= my_open_osfhandle(osfh, + oflag & (_O_APPEND | _O_RDONLY | _O_TEXT))) == -1) + { + CloseHandle(osfh); + } + + DBUG_RETURN(fh); /* return handle */ +} + + +File my_win_open(const char *path, int flags) +{ + DBUG_ENTER("my_win_open"); + DBUG_RETURN(my_win_sopen((char *) path, flags | _O_BINARY, _SH_DENYNO, + _S_IREAD | S_IWRITE)); +} + + +int my_win_close(File fd) +{ + DBUG_ENTER("my_win_close"); + if(CloseHandle(my_get_osfhandle(fd))) + { + invalidate_fd(fd); + DBUG_RETURN(0); + } + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + + +size_t my_win_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset) +{ + DWORD nBytesRead; + HANDLE hFile; + OVERLAPPED ov= {0}; + LARGE_INTEGER li; + + DBUG_ENTER("my_win_pread"); + + if(!Count) + DBUG_RETURN(0); +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + li.QuadPart= offset; + ov.Offset= li.LowPart; + ov.OffsetHigh= li.HighPart; + + if(!ReadFile(hFile, Buffer, (DWORD)Count, &nBytesRead, &ov)) + { + DWORD lastError= GetLastError(); + /* + ERROR_BROKEN_PIPE is returned when no more data coming + through e.g. a command pipe in windows : see MSDN on ReadFile. + */ + if(lastError == ERROR_HANDLE_EOF || lastError == ERROR_BROKEN_PIPE) + DBUG_RETURN(0); /*return 0 at EOF*/ + my_osmaperr(lastError); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nBytesRead); +} + + +size_t my_win_read(File Filedes, uchar *Buffer, size_t Count) +{ + DWORD nBytesRead; + HANDLE hFile; + + DBUG_ENTER("my_win_read"); + if(!Count) + DBUG_RETURN(0); +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + + if(!ReadFile(hFile, Buffer, (DWORD)Count, &nBytesRead, NULL)) + { + DWORD lastError= GetLastError(); + /* + ERROR_BROKEN_PIPE is returned when no more data coming + through e.g. a command pipe in windows : see MSDN on ReadFile. + */ + if(lastError == ERROR_HANDLE_EOF || lastError == ERROR_BROKEN_PIPE) + DBUG_RETURN(0); /*return 0 at EOF*/ + my_osmaperr(lastError); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nBytesRead); +} + + +size_t my_win_pwrite(File Filedes, const uchar *Buffer, size_t Count, + my_off_t offset) +{ + DWORD nBytesWritten; + HANDLE hFile; + OVERLAPPED ov= {0}; + LARGE_INTEGER li; + + DBUG_ENTER("my_win_pwrite"); + DBUG_PRINT("my",("Filedes: %d, Buffer: %p, Count: %llu, offset: %llu", + Filedes, Buffer, (ulonglong)Count, (ulonglong)offset)); + + if(!Count) + DBUG_RETURN(0); + +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + li.QuadPart= offset; + ov.Offset= li.LowPart; + ov.OffsetHigh= li.HighPart; + + if(!WriteFile(hFile, Buffer, (DWORD)Count, &nBytesWritten, &ov)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN((size_t)-1); + } + else + DBUG_RETURN(nBytesWritten); +} + + +my_off_t my_win_lseek(File fd, my_off_t pos, int whence) +{ + LARGE_INTEGER offset; + LARGE_INTEGER newpos; + + DBUG_ENTER("my_win_lseek"); + + /* Check compatibility of Windows and Posix seek constants */ + compile_time_assert(FILE_BEGIN == SEEK_SET && FILE_CURRENT == SEEK_CUR + && FILE_END == SEEK_END); + + offset.QuadPart= pos; + if(!SetFilePointerEx(my_get_osfhandle(fd), offset, &newpos, whence)) + { + my_osmaperr(GetLastError()); + newpos.QuadPart= -1; + } + DBUG_RETURN(newpos.QuadPart); +} + + +#ifndef FILE_WRITE_TO_END_OF_FILE +#define FILE_WRITE_TO_END_OF_FILE 0xffffffff +#endif +size_t my_win_write(File fd, const uchar *Buffer, size_t Count) +{ + DWORD nWritten; + OVERLAPPED ov; + OVERLAPPED *pov= NULL; + HANDLE hFile; + + DBUG_ENTER("my_win_write"); + DBUG_PRINT("my",("Filedes: %d, Buffer: %p, Count %llu", fd, Buffer, + (ulonglong)Count)); + + if(!Count) + DBUG_RETURN(0); + +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + if(my_get_open_flags(fd) & _O_APPEND) + { + /* + Atomic append to the end of file is is done by special initialization of + the OVERLAPPED structure. See MSDN WriteFile documentation for more info. + */ + memset(&ov, 0, sizeof(ov)); + ov.Offset= FILE_WRITE_TO_END_OF_FILE; + ov.OffsetHigh= -1; + pov= &ov; + } + + hFile= my_get_osfhandle(fd); + if(!WriteFile(hFile, Buffer, (DWORD)Count, &nWritten, pov)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nWritten); +} + + +int my_win_chsize(File fd, my_off_t newlength) +{ + HANDLE hFile; + LARGE_INTEGER length; + DBUG_ENTER("my_win_chsize"); + + hFile= (HANDLE) my_get_osfhandle(fd); + length.QuadPart= newlength; + if (!SetFilePointerEx(hFile, length , NULL , FILE_BEGIN)) + goto err; + if (!SetEndOfFile(hFile)) + goto err; + DBUG_RETURN(0); +err: + my_osmaperr(GetLastError()); + my_errno= errno; + DBUG_RETURN(-1); +} + + +/* Get the file descriptor for stdin,stdout or stderr */ +static File my_get_stdfile_descriptor(FILE *stream) +{ + HANDLE hFile; + DWORD nStdHandle; + DBUG_ENTER("my_get_stdfile_descriptor"); + + if(stream == stdin) + nStdHandle= STD_INPUT_HANDLE; + else if(stream == stdout) + nStdHandle= STD_OUTPUT_HANDLE; + else if(stream == stderr) + nStdHandle= STD_ERROR_HANDLE; + else + DBUG_RETURN(-1); + + hFile= GetStdHandle(nStdHandle); + if(hFile != INVALID_HANDLE_VALUE) + DBUG_RETURN(my_open_osfhandle(hFile, 0)); + DBUG_RETURN(-1); +} + + +File my_win_fileno(FILE *file) +{ + HANDLE hFile= (HANDLE)_get_osfhandle(fileno(file)); + int retval= -1; + uint i; + + DBUG_ENTER("my_win_fileno"); + + for(i= MY_FILE_MIN; i < my_file_limit; i++) + { + if(my_file_info[i].fhandle == hFile) + { + retval= i; + break; + } + } + if(retval == -1) + /* try std stream */ + DBUG_RETURN(my_get_stdfile_descriptor(file)); + DBUG_RETURN(retval); +} + + +FILE *my_win_fopen(const char *filename, const char *type) +{ + FILE *file; + int flags= 0; + DBUG_ENTER("my_win_open"); + + /* + If we are not creating, then we need to use my_access to make sure + the file exists since Windows doesn't handle files like "com1.sym" + very well + */ + if (check_if_legal_filename(filename)) + { + errno= EACCES; + DBUG_RETURN(NULL); + } + + file= fopen(filename, type); + if(!file) + DBUG_RETURN(NULL); + + if(strchr(type,'a') != NULL) + flags= O_APPEND; + + /* + Register file handle in my_table_info. + Necessary for my_fileno() + */ + if(my_open_osfhandle((HANDLE)_get_osfhandle(fileno(file)), flags) < 0) + { + fclose(file); + DBUG_RETURN(NULL); + } + DBUG_RETURN(file); +} + + +FILE * my_win_fdopen(File fd, const char *type) +{ + FILE *file; + int crt_fd; + int flags= 0; + + DBUG_ENTER("my_win_fdopen"); + + if(strchr(type,'a') != NULL) + flags= O_APPEND; + /* Convert OS file handle to CRT file descriptor and then call fdopen*/ + crt_fd= _open_osfhandle((intptr_t)my_get_osfhandle(fd), flags); + if(crt_fd < 0) + file= NULL; + else + file= fdopen(crt_fd, type); + DBUG_RETURN(file); +} + + +int my_win_fclose(FILE *file) +{ + File fd; + + DBUG_ENTER("my_win_close"); + fd= my_fileno(file); + if(fd < 0) + DBUG_RETURN(-1); + if(fclose(file) < 0) + DBUG_RETURN(-1); + invalidate_fd(fd); + DBUG_RETURN(0); +} + + + +/* + Quick and dirty my_fstat() implementation for Windows. + Use CRT fstat on temporarily allocated file descriptor. + Patch file size, because size that fstat returns is not + reliable (may be outdated) +*/ +int my_win_fstat(File fd, struct _stati64 *buf) +{ + int crt_fd; + int retval; + HANDLE hFile, hDup; + + DBUG_ENTER("my_win_fstat"); + + hFile= my_get_osfhandle(fd); + if(!DuplicateHandle( GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup ,0,FALSE,DUPLICATE_SAME_ACCESS)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); + } + if ((crt_fd= _open_osfhandle((intptr_t)hDup,0)) < 0) + DBUG_RETURN(-1); + + retval= _fstati64(crt_fd, buf); + if(retval == 0) + { + /* File size returned by stat is not accurate (may be outdated), fix it*/ + GetFileSizeEx(hDup, (PLARGE_INTEGER) (&(buf->st_size))); + } + _close(crt_fd); + DBUG_RETURN(retval); +} + + + +int my_win_stat( const char *path, struct _stati64 *buf) +{ + DBUG_ENTER("my_win_stat"); + if(_stati64( path, buf) == 0) + { + /* File size returned by stat is not accurate (may be outdated), fix it*/ + WIN32_FILE_ATTRIBUTE_DATA data; + if (GetFileAttributesEx(path, GetFileExInfoStandard, &data)) + { + LARGE_INTEGER li; + li.LowPart= data.nFileSizeLow; + li.HighPart= data.nFileSizeHigh; + buf->st_size= li.QuadPart; + } + DBUG_RETURN(0); + } + DBUG_RETURN(-1); +} + + + +int my_win_fsync(File fd) +{ + DBUG_ENTER("my_win_fsync"); + if(FlushFileBuffers(my_get_osfhandle(fd))) + DBUG_RETURN(0); + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + + + +int my_win_dup(File fd) +{ + HANDLE hDup; + DBUG_ENTER("my_win_dup"); + if (DuplicateHandle(GetCurrentProcess(), my_get_osfhandle(fd), + GetCurrentProcess(), &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS)) + { + DBUG_RETURN(my_open_osfhandle(hDup, my_get_open_flags(fd))); + } + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + +#endif /*_WIN32*/ diff --git a/mysys/my_write.c b/mysys/my_write.c index 52127545888..5b916d6cb65 100644 --- a/mysys/my_write.c +++ b/mysys/my_write.c @@ -22,7 +22,7 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) { - size_t writenbytes, written; + size_t writtenbytes, written; uint errors; DBUG_ENTER("my_write"); DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", @@ -35,17 +35,27 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) for (;;) { - if ((writenbytes= write(Filedes, Buffer, Count)) == Count) - break; - if (writenbytes != (size_t) -1) - { /* Safeguard */ - written+=writenbytes; - Buffer+=writenbytes; - Count-=writenbytes; +#ifdef _WIN32 + if(Filedes < 0) + { + errno = EINVAL; + return -1; } - my_errno=errno; + writtenbytes= my_win_write(Filedes, Buffer, Count); +#else + writtenbytes= write(Filedes, Buffer, Count); +#endif + if (writtenbytes == Count) + break; + if (writtenbytes != (size_t) -1) + { /* Safeguard */ + written+= writtenbytes; + Buffer+= writtenbytes; + Count-= writtenbytes; + } + my_errno= errno; DBUG_PRINT("error",("Write only %ld bytes, error: %d", - (long) writenbytes, my_errno)); + (long) writtenbytes, my_errno)); #ifndef NO_BACKGROUND #ifdef THREAD if (my_thread_var->abort) @@ -59,19 +69,19 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) continue; } - if ((writenbytes == 0 || writenbytes == (size_t) -1)) + if ((writtenbytes == 0 || writtenbytes == (size_t) -1)) { if (my_errno == EINTR) { DBUG_PRINT("debug", ("my_write() was interrupted and returned %ld", - (long) writenbytes)); + (long) writtenbytes)); continue; /* Interrupted */ } - if (!writenbytes && !errors++) /* Retry once */ + if (!writtenbytes && !errors++) /* Retry once */ { /* We may come here if the file quota is exeeded */ - errno=EFBIG; /* Assume this is the error */ + errno= EFBIG; /* Assume this is the error */ continue; } } @@ -92,5 +102,5 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) } if (MyFlags & (MY_NABP | MY_FNABP)) DBUG_RETURN(0); /* Want only errors */ - DBUG_RETURN(writenbytes+written); + DBUG_RETURN(writtenbytes+written); } /* my_write */ diff --git a/mysys/mysys_priv.h b/mysys/mysys_priv.h index 113b64005f2..6d39999aa86 100644 --- a/mysys/mysys_priv.h +++ b/mysys/mysys_priv.h @@ -45,3 +45,27 @@ extern pthread_mutex_t THR_LOCK_charset, THR_LOCK_time; void my_error_unregister_all(void); void my_thread_destroy_mutex(void); my_bool my_wait_for_other_threads_to_die(uint number_of_threads); + +#ifdef _WIN32 +/* my_winfile.c exports, should not be used outside mysys */ +extern File my_win_open(const char *path, int oflag); +extern int my_win_close(File fd); +extern size_t my_win_read(File fd, uchar *buffer, size_t count); +extern size_t my_win_write(File fd, const uchar *buffer, size_t count); +extern size_t my_win_pread(File fd, uchar *buffer, size_t count, + my_off_t offset); +extern size_t my_win_pwrite(File fd, const uchar *buffer, size_t count, + my_off_t offset); +extern my_off_t my_win_lseek(File fd, my_off_t pos, int whence); +extern int my_win_chsize(File fd, my_off_t newlength); +extern FILE* my_win_fopen(const char *filename, const char *type); +extern File my_win_fclose(FILE *file); +extern File my_win_fileno(FILE *file); +extern FILE* my_win_fdopen(File Filedes, const char *type); +extern int my_win_stat(const char *path, struct _stat64 *buf); +extern int my_win_fstat(File fd, struct _stat64 *buf); +extern int my_win_fsync(File fd); +extern File my_win_dup(File fd); +extern File my_win_sopen(const char *path, int oflag, int shflag, int perm); +extern File my_open_osfhandle(HANDLE handle, int oflag); +#endif diff --git a/sql/discover.cc b/sql/discover.cc index 56dc00cc5c4..92af5d56016 100644 --- a/sql/discover.cc +++ b/sql/discover.cc @@ -67,7 +67,7 @@ int readfrm(const char *name, uchar **frmdata, size_t *len) error= 2; if (my_fstat(file, &state, MYF(0))) goto err; - read_len= state.st_size; + read_len= (size_t)state.st_size; // Read whole frm file error= 3; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index d6ac3a341a4..46182a96020 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -3025,16 +3025,16 @@ String *Item_load_file::val_str(String *str) func_name(), current_thd->variables.max_allowed_packet); goto err; } - if (tmp_value.alloc(stat_info.st_size)) + if (tmp_value.alloc((size_t)stat_info.st_size)) goto err; if ((file = my_open(file_name->ptr(), O_RDONLY, MYF(0))) < 0) goto err; - if (my_read(file, (uchar*) tmp_value.ptr(), stat_info.st_size, MYF(MY_NABP))) + if (my_read(file, (uchar*) tmp_value.ptr(), (size_t)stat_info.st_size, MYF(MY_NABP))) { my_close(file, MYF(0)); goto err; } - tmp_value.length(stat_info.st_size); + tmp_value.length((uint32)stat_info.st_size); my_close(file, MYF(0)); null_value = 0; DBUG_RETURN(&tmp_value); diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index d70de0dd13c..7ee411f94dc 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -684,11 +684,11 @@ int ha_archive::create(const char *name, TABLE *table_arg, { if (!my_fstat(frm_file, &file_stat, MYF(MY_WME))) { - frm_ptr= (uchar *)my_malloc(sizeof(uchar) * file_stat.st_size, MYF(0)); + frm_ptr= (uchar *)my_malloc(sizeof(uchar) * (size_t)file_stat.st_size, MYF(0)); if (frm_ptr) { - my_read(frm_file, frm_ptr, file_stat.st_size, MYF(0)); - azwrite_frm(&create_stream, (char *)frm_ptr, file_stat.st_size); + my_read(frm_file, frm_ptr, (size_t)file_stat.st_size, MYF(0)); + azwrite_frm(&create_stream, (char *)frm_ptr, (size_t)file_stat.st_size); my_free((uchar*)frm_ptr, MYF(0)); } } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6f58fd70fbd..d5c92ff7c22 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1061,7 +1061,29 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ + +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index a9b81116a90..6ccdd9e3574 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -48,7 +48,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include - +#ifdef _WIN32 +#include +#endif /** @file ha_innodb.cc */ /* Include necessary InnoDB headers */ @@ -1172,7 +1174,28 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index d986525814e..28b7a93164d 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -415,9 +415,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, if (!share->temporary) error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, FLUSH_KEEP, FLUSH_KEEP); -#ifdef HAVE_PREAD + _ma_decrement_open_count(info, 1); -#endif if (share->not_flushed) { share->not_flushed= 0; diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c index 97011831af8..17c1fc83f13 100644 --- a/storage/myisam/mi_locking.c +++ b/storage/myisam/mi_locking.c @@ -239,7 +239,7 @@ int mi_lock_database(MI_INFO *info, int lock_type) break; /* Impossible */ } } -#ifdef __WIN__ +#ifdef _WIN32 else { /* @@ -521,11 +521,11 @@ int _mi_writeinfo(register MI_INFO *info, uint operation) share->state.update_count= info->last_loop= ++info->this_loop; if ((error=mi_state_info_write(share->kfile, &share->state, 1))) olderror=my_errno; -#ifdef __WIN__ +#ifdef _WIN32 if (myisam_flush) { - _commit(share->kfile); - _commit(info->dfile); + my_sync(share->kfile,0); + my_sync(info->dfile,0); } #endif } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index dfe98f09e4f..6d233a4219d 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -55,7 +55,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include - +#ifdef _WIN32 +#include +#endif /** @file ha_innodb.cc */ /* Include necessary InnoDB headers */ @@ -1219,7 +1221,28 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; From 4bca1a786f9986cec7d0487059451e51e2b9479b Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 16:07:18 +0200 Subject: [PATCH 03/14] Fix XtraDB LPBug #714143 : Windows native async io is disabled. The patch uses completion ports for asynchronous IO notification , instead of formerly used notification via event . This also removes the limit of 64 async IOs per background IO thread (this limit was forced by using WaitForMultipleObjects in previous AIO implementation) --- CMakeLists.txt | 2 +- storage/innodb_plugin/handler/ha_innodb.cc | 4 +- storage/xtradb/include/os0file.h | 4 +- storage/xtradb/os/os0file.c | 451 ++++++++------------- storage/xtradb/srv/srv0start.c | 13 +- 5 files changed, 181 insertions(+), 293 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c6a65505fe..9d4df51bcb1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,7 @@ IF (MSVC_VERSION GREATER 1400) ENDIF() -SET(CMAKE_INSTALL_PREFIX "C:/MariaDB${MYSQL_BASE_VERSION}") +SET(CMAKE_INSTALL_PREFIX "C:/MariaDB${MYSQL_BASE_VERSION}" CACHE PATH "Default installation directory") SET(INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}") # Set standard options ADD_DEFINITIONS(-DHAVE_YASSL) diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index a9b81116a90..6ea751da332 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -10916,12 +10916,12 @@ static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads, static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Number of background read I/O threads in InnoDB.", - NULL, NULL, 4, 1, 64, 0); + NULL, NULL, IF_WIN(1,4), 1, 64, 0); static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Number of background write I/O threads in InnoDB.", - NULL, NULL, 4, 1, 64, 0); + NULL, NULL, IF_WIN(1,4), 1, 64, 0); static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 732e930517b..4a99eb6b0b5 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -152,8 +152,8 @@ log. */ #define OS_FILE_LOG 256 /* This can be ORed to type */ /* @} */ -#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more - than 64 */ +#define OS_AIO_N_PENDING_IOS_PER_THREAD 256 /*!< Windows might be able to handle +more */ /** Modes for aio operations @{ */ #define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index 5b8e656d8b2..9817edd7f0d 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -121,6 +121,12 @@ typedef struct os_aio_slot_struct os_aio_slot_t; /** The asynchronous i/o array slot structure */ struct os_aio_slot_struct{ +#ifdef WIN_ASYNC_IO + OVERLAPPED control; /*!< Windows control block for the + aio request, MUST be first element in the structure*/ + void *arr; /*!< Array this slot belongs to*/ +#endif + ibool is_read; /*!< TRUE if a read operation */ ulint pos; /*!< index of the slot in the aio array */ @@ -148,12 +154,6 @@ struct os_aio_slot_struct{ and which can be used to identify which pending aio operation was completed */ -#ifdef WIN_ASYNC_IO - os_event_t event; /*!< event object we need in the - OVERLAPPED struct */ - OVERLAPPED control; /*!< Windows control block for the - aio request */ -#endif }; /** The asynchronous i/o array structure */ @@ -182,15 +182,6 @@ struct os_aio_array_struct{ /*!< Number of reserved slots in the aio array outside the ibuf segment */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ -#ifdef __WIN__ - os_native_event_t* native_events; - /*!< Pointer to an array of OS native - event handles where we copied the - handles from slots, in the same - order. This can be used in - WaitForMultipleObjects; used only in - Windows */ -#endif }; /** Array of events used in simulated aio */ @@ -250,6 +241,14 @@ UNIV_INTERN ulint os_n_pending_writes = 0; /** Number of pending read operations */ UNIV_INTERN ulint os_n_pending_reads = 0; + +#ifdef _WIN32 +/** IO completion port used by background io threads */ +static HANDLE completion_port; +/** Thread local storage index for the per-thread event used for synchronous IO */ +static DWORD tls_sync_io = TLS_OUT_OF_INDEXES; +#endif + /***********************************************************************//** Gets the operating system version. Currently works only on Windows. @return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */ @@ -286,6 +285,86 @@ os_get_os_version(void) #endif } + +#ifdef _WIN32 +/* +Windows : Handling synchronous IO on files opened asynchronously. + +If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to +a completion port, then every IO on this file would normally be enqueued to the +completion port. Sometimes however we would like to do a synchronous IO. This is +possible if we initialitze have overlapped.hEvent with a valid event and set its +lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info) + +We'll create this special event once for each thread and store in thread local +storage. +*/ + + +/***********************************************************************//** +Initialize tls index.for event handle used for synchronized IO on files that +might be opened with FILE_FLAG_OVERLAPPED. +*/ +static void win_init_syncio_event() +{ + tls_sync_io = TlsAlloc(); + ut_a(tls_sync_io != TLS_OUT_OF_INDEXES); +} + +/***********************************************************************//** +Retrieve per-thread event for doing synchronous io on asyncronously opened files +*/ +static HANDLE win_get_syncio_event() +{ + HANDLE h; + if(tls_sync_io == TLS_OUT_OF_INDEXES){ + win_init_syncio_event(); + } + + h = (HANDLE)TlsGetValue(tls_sync_io); + if (h) + return h; + h = CreateEventA(NULL, FALSE, FALSE, NULL); + ut_a(h); + h = (HANDLE)((uintptr_t)h | 1); + TlsSetValue(tls_sync_io, h); + return h; +} + +/* + TLS destructor, inspired by Chromium code + http://src.chromium.org/svn/trunk/src/base/threading/thread_local_storage_win.cc +*/ + +static void win_free_syncio_event() +{ + HANDLE h = win_get_syncio_event(); + if (h) { + CloseHandle(h); + } +} + +static void NTAPI win_tls_thread_exit(PVOID module, DWORD reason, PVOID reserved) { + if (DLL_THREAD_DETACH == reason || DLL_PROCESS_DETACH == reason) + win_free_syncio_event(); +} + +#ifdef _WIN64 +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:p_thread_callback_base") +#pragma const_seg(".CRT$XLB") +extern const PIMAGE_TLS_CALLBACK p_thread_callback_base; +const PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit; +#pragma data_seg() +#else +#pragma comment(linker, "/INCLUDE:__tls_used") +#pragma comment(linker, "/INCLUDE:_p_thread_callback_base") +#pragma data_seg(".CRT$XLB") +PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit; +#pragma data_seg() +#endif +#endif /*_WIN32 */ + /***********************************************************************//** Retrieves the last error number if an error occurs in a file io function. The number should be retrieved before any other OS calls (because they may @@ -611,6 +690,9 @@ os_io_init_simple(void) for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { os_file_seek_mutexes[i] = os_mutex_create(NULL); } +#ifdef _WIN32 + win_init_syncio_event(); +#endif } /***********************************************************************//** @@ -1325,6 +1407,8 @@ try_again: #endif #ifdef UNIV_NON_BUFFERED_IO # ifndef UNIV_HOTBACKUP + if (type == OS_LOG_FILE) + attributes = attributes | FILE_FLAG_SEQUENTIAL_SCAN; if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { /* Do not use unbuffered i/o to log files because value 2 denotes that we do not flush the log at every @@ -1402,6 +1486,9 @@ try_again: } } else { *success = TRUE; + if (os_aio_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) { + ut_a(CreateIoCompletionPort(file, completion_port, 0, 0)); + } } return(file); @@ -2350,13 +2437,10 @@ _os_file_read( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; DWORD low; DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2378,33 +2462,18 @@ try_again: os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + memset (&overlapped, 0, sizeof (overlapped)); + overlapped.Offset = low; + overlapped.OffsetHigh = high; + overlapped.hEvent = win_get_syncio_event(); - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; + ret = ReadFile(file, buf, n, NULL, &overlapped); + if (ret) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); + } + else if(GetLastError() == ERROR_IO_PENDING) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2433,9 +2502,6 @@ try_again: (ulong)n, (ulong)offset_high, (ulong)offset, (long)ret); #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error(NULL, "read"); if (retry) { @@ -2477,13 +2543,10 @@ os_file_read_no_error_handling( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; DWORD low; DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2505,33 +2568,19 @@ try_again: os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + memset (&overlapped, 0, sizeof (overlapped)); + overlapped.Offset = low; + overlapped.OffsetHigh = high; + overlapped.hEvent = win_get_syncio_event(); - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; + ret = ReadFile(file, buf, n, NULL, &overlapped); + if (ret) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); + } + else if(GetLastError() == ERROR_IO_PENDING) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2554,9 +2603,6 @@ try_again: return(TRUE); } #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error_no_exit(NULL, "read"); if (retry) { @@ -2609,14 +2655,11 @@ os_file_write( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; DWORD low; DWORD high; ulint n_retries = 0; ulint err; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2636,56 +2679,18 @@ retry: os_n_pending_writes++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + memset (&overlapped, 0, sizeof (overlapped)); + overlapped.Offset = low; + overlapped.OffsetHigh = high; + overlapped.hEvent = win_get_syncio_event(); - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: File pointer positioning to" - " file %s failed at\n" - "InnoDB: offset %lu %lu. Operating system" - " error number %lu.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) GetLastError()); - - return(FALSE); + ret = WriteFile(file, buf, n, NULL, &overlapped); + if (ret) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); } - - ret = WriteFile(file, buf, (DWORD) n, &len, NULL); - - /* Always do fsync to reduce the probability that when the OS crashes, - a database page is only partially physically written to disk. */ - -# ifdef UNIV_DO_FLUSH - if (!os_do_not_call_flush_at_each_write) { - ut_a(TRUE == os_file_flush(file)); + else if(GetLastError() == ERROR_IO_PENDING) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } -# endif /* UNIV_DO_FLUSH */ - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ os_mutex_enter(os_file_count_mutex); os_n_pending_writes--; @@ -3071,9 +3076,6 @@ os_aio_array_create( os_aio_array_t* array; ulint i; os_aio_slot_t* slot; -#ifdef WIN_ASYNC_IO - OVERLAPPED* over; -#endif ut_a(n > 0); ut_a(n_segments > 0); @@ -3089,23 +3091,11 @@ os_aio_array_create( array->n_segments = n_segments; array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); -#ifdef __WIN__ - array->native_events = ut_malloc(n * sizeof(os_native_event_t)); -#endif + for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i); - slot->pos = i; slot->reserved = FALSE; -#ifdef WIN_ASYNC_IO - slot->event = os_event_create(NULL); - - over = &(slot->control); - - over->hEvent = slot->event->handle; - - *((array->native_events) + i) = over->hEvent; -#endif } return(array); @@ -3119,18 +3109,7 @@ os_aio_array_free( /*==============*/ os_aio_array_t* array) /*!< in, own: array to free */ { -#ifdef WIN_ASYNC_IO - ulint i; - for (i = 0; i < array->n_slots; i++) { - os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); - os_event_free(slot->event); - } -#endif /* WIN_ASYNC_IO */ - -#ifdef __WIN__ - ut_free(array->native_events); -#endif /* __WIN__ */ os_mutex_free(array->mutex); os_event_free(array->not_full); os_event_free(array->is_empty); @@ -3209,7 +3188,11 @@ os_aio_init( } os_last_printout = time(NULL); - +#ifdef _WIN32 + ut_a(completion_port == 0); + completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + ut_a(completion_port); +#endif } /*********************************************************************** @@ -3251,11 +3234,10 @@ os_aio_array_wake_win_aio_at_shutdown( /*==================================*/ os_aio_array_t* array) /*!< in: aio array */ { - ulint i; - - for (i = 0; i < array->n_slots; i++) { - - os_event_set((array->slots + i)->event); + if(completion_port) + { + ut_a(CloseHandle(completion_port)); + completion_port = 0; } } #endif @@ -3480,7 +3462,8 @@ found: control = &(slot->control); control->Offset = (DWORD)offset; control->OffsetHigh = (DWORD)offset_high; - os_event_reset(slot->event); + control->hEvent = 0; + slot->arr = array; #endif os_mutex_exit(array->mutex); @@ -3517,9 +3500,6 @@ os_aio_array_free_slot( os_event_set(array->is_empty); } -#ifdef WIN_ASYNC_IO - os_event_reset(slot->event); -#endif os_mutex_exit(array->mutex); } @@ -3689,12 +3669,8 @@ os_aio( os_aio_array_t* array; os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO - ibool retval; - BOOL ret = TRUE; DWORD len = (DWORD) n; - struct fil_node_struct * dummy_mess1; - void* dummy_mess2; - ulint dummy_type; + BOOL ret; #endif ulint err = 0; ibool retry; @@ -3713,26 +3689,23 @@ os_aio( wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); - if (mode == OS_AIO_SYNC -#ifdef WIN_ASYNC_IO - && !os_aio_use_native_aio -#endif - ) { + if (mode == OS_AIO_SYNC) + { + ibool ret; /* This is actually an ordinary synchronous read or write: - no need to use an i/o-handler thread. NOTE that if we use - Windows async i/o, Windows does not allow us to use - ordinary synchronous os_file_read etc. on the same file, - therefore we have built a special mechanism for synchronous - wait in the Windows case. */ + no need to use an i/o-handler thread */ if (type == OS_FILE_READ) { - return(_os_file_read(file, buf, offset, - offset_high, n, trx)); + ret = _os_file_read(file, buf, offset, + offset_high, n, trx); } + else { + ut_a(type == OS_FILE_WRITE); - ut_a(type == OS_FILE_WRITE); - - return(os_file_write(name, file, buf, offset, offset_high, n)); + ret = os_file_write(name, file, buf, offset, offset_high, n); + } + ut_a(ret); + return ret; } try_again: @@ -3775,6 +3748,8 @@ try_again: ret = ReadFile(file, buf, (DWORD)n, &len, &(slot->control)); + if(!ret && GetLastError() != ERROR_IO_PENDING) + err = 1; #endif } else { if (!wake_later) { @@ -3789,6 +3764,8 @@ try_again: os_n_file_writes++; ret = WriteFile(file, buf, (DWORD)n, &len, &(slot->control)); + if(!ret && GetLastError() != ERROR_IO_PENDING) + err = 1; #endif } else { if (!wake_later) { @@ -3801,34 +3778,7 @@ try_again: ut_error; } -#ifdef WIN_ASYNC_IO - if (os_aio_use_native_aio) { - if ((ret && len == n) - || (!ret && GetLastError() == ERROR_IO_PENDING)) { - /* aio was queued successfully! */ - if (mode == OS_AIO_SYNC) { - /* We want a synchronous i/o operation on a - file where we also use async i/o: in Windows - we must use the same wait mechanism as for - async i/o */ - - retval = os_aio_windows_handle(ULINT_UNDEFINED, - slot->pos, - &dummy_mess1, - &dummy_mess2, - &dummy_type, - &space_id); - - return(retval); - } - - return(TRUE); - } - - err = 1; /* Fall through the next if */ - } -#endif if (err == 0) { /* aio was queued successfully! */ @@ -3881,52 +3831,21 @@ os_aio_windows_handle( ulint* space_id) { ulint orig_seg = segment; - os_aio_array_t* array; os_aio_slot_t* slot; - ulint n; - ulint i; ibool ret_val; BOOL ret; DWORD len; BOOL retry = FALSE; + ULONG_PTR dummy_key; - if (segment == ULINT_UNDEFINED) { - array = os_aio_sync_array; - segment = 0; - } else { - segment = os_aio_get_array_and_local_segment(&array, segment); + + ret = GetQueuedCompletionStatus(completion_port, &len, &dummy_key, + (OVERLAPPED **)&slot, INFINITE); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); } - /* NOTE! We only access constant fields in os_aio_array. Therefore - we do not have to acquire the protecting mutex yet */ - - ut_ad(os_aio_validate()); - ut_ad(segment < array->n_segments); - - n = array->n_slots; - - if (array == os_aio_sync_array) { - os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); - i = pos; - } else { - srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); - i = os_event_wait_multiple(n, - (array->native_events) - ); - } - - os_mutex_enter(array->mutex); - - slot = os_aio_array_get_nth_slot(array, i); - - ut_a(slot->reserved); - - if (orig_seg != ULINT_UNDEFINED) { - srv_set_io_thread_op_info(orig_seg, - "get windows aio return value"); - } - - ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); *message1 = slot->message1; *message2 = slot->message2; @@ -3951,8 +3870,6 @@ os_aio_windows_handle( ret_val = FALSE; } - os_mutex_exit(array->mutex); - if (retry) { /* retry failed read/write operation synchronously. No need to hold array->mutex. */ @@ -3961,37 +3878,19 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - ret = WriteFile(slot->file, slot->buf, - (DWORD) slot->len, &len, - &(slot->control)); - + ret_val = os_file_write(slot->name, slot->file, slot->buf, + slot->control.Offset, slot->control.OffsetHigh, slot->len); break; case OS_FILE_READ: - ret = ReadFile(slot->file, slot->buf, - (DWORD) slot->len, &len, - &(slot->control)); - + ret_val = os_file_read(slot->file, slot->buf, + slot->control.Offset, slot->control.OffsetHigh, slot->len); break; default: ut_error; } - - if (!ret && GetLastError() == ERROR_IO_PENDING) { - /* aio was queued successfully! - We want a synchronous i/o operation on a - file where we also use async i/o: in Windows - we must use the same wait mechanism as for - async i/o */ - - ret = GetOverlappedResult(slot->file, - &(slot->control), - &len, TRUE); - } - - ret_val = ret && len == slot->len; } - os_aio_array_free_slot(array, slot); + os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot); return(ret_val); } diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index cef045d72e1..60ad0904633 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1274,13 +1274,7 @@ innobase_start_or_create_for_mysql(void) break; default: /* On Win 2000 and XP use async i/o */ - //os_aio_use_native_aio = TRUE; - os_aio_use_native_aio = FALSE; - fprintf(stderr, - "InnoDB: Windows native async i/o is disabled as default.\n" - "InnoDB: It is not applicable for the current" - " multi io threads implementation.\n"); - break; + os_aio_use_native_aio = TRUE; } #endif if (srv_file_flush_method_str == NULL) { @@ -1320,11 +1314,6 @@ innobase_start_or_create_for_mysql(void) "async_unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; os_aio_use_native_aio = TRUE; - srv_n_read_io_threads = srv_n_write_io_threads = 1; - fprintf(stderr, - "InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n" - "InnoDB: Windows native async i/o is enabled.\n" - "InnoDB: And io threads are restricted.\n"); #endif } else { fprintf(stderr, From fe054adfcaecbd891da802517826a951bb4ca377 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 16:09:28 +0200 Subject: [PATCH 04/14] Backport fix for MySQL bug #56405 : use native windows condition variables and rwlocks in mysys, if Windows supports it. --- include/my_pthread.h | 76 +++++-- mysys/my_wincond.c | 232 +++++++++++++++++---- mysys/my_winthread.c | 15 +- mysys/thr_rwlock.c | 160 ++++++++++++++ storage/innodb_plugin/handler/ha_innodb.cc | 22 ++ storage/pbxt/src/pthread_xt.cc | 43 +--- 6 files changed, 450 insertions(+), 98 deletions(-) diff --git a/include/my_pthread.h b/include/my_pthread.h index fffb883912a..aa9eb8fc807 100644 --- a/include/my_pthread.h +++ b/include/my_pthread.h @@ -48,19 +48,30 @@ typedef struct st_pthread_link { struct st_pthread_link *next; } pthread_link; -typedef struct { - uint32 waiting; - CRITICAL_SECTION lock_waiting; - - enum { - SIGNAL= 0, - BROADCAST= 1, - MAX_EVENTS= 2 - } EVENTS; - - HANDLE events[MAX_EVENTS]; - HANDLE broadcast_block_event; +/** + Implementation of Windows condition variables. + We use native conditions on Vista and later, and fallback to own + implementation on earlier OS version. +*/ +typedef union +{ + /* Native condition (used on Vista and later) */ + CONDITION_VARIABLE native_cond; + /* Own implementation (used on XP) */ + struct + { + uint32 waiting; + CRITICAL_SECTION lock_waiting; + enum + { + SIGNAL= 0, + BROADCAST= 1, + MAX_EVENTS= 2 + } EVENTS; + HANDLE events[MAX_EVENTS]; + HANDLE broadcast_block_event; + }; } pthread_cond_t; @@ -632,6 +643,45 @@ int my_pthread_fastmutex_lock(my_pthread_fastmutex_t *mp); #endif #define my_rwlock_init(A,B) rwlock_init((A),USYNC_THREAD,0) #else +#ifdef _WIN32 +/** + Implementation of Windows rwlock. + + We use native (slim) rwlocks on Win7 and later, and fallback to portable + implementation on earlier Windows. + + slim rwlock are also available on Vista/WS2008, but we do not use it + ("trylock" APIs are missing on Vista) +*/ +typedef union +{ + /* Native rwlock (is_srwlock == TRUE) */ + struct + { + SRWLOCK srwlock; /* native reader writer lock */ + BOOL have_exclusive_srwlock; /* used for unlock */ + }; + + /* + Portable implementation (is_srwlock == FALSE) + Fields are identical with Unix my_rw_lock_t fields. + */ + struct + { + pthread_mutex_t lock; /* lock for structure */ + pthread_cond_t readers; /* waiting readers */ + pthread_cond_t writers; /* waiting writers */ + int state; /* -1:writer,0:free,>0:readers */ + int waiters; /* number of waiting writers */ +#ifdef SAFE_MUTEX + pthread_t write_thread; +#endif + }; +} my_rw_lock_t; + + +#else /* _WIN32 */ + /* Use our own version of read/write locks */ typedef struct _my_rw_lock_t { pthread_mutex_t lock; /* lock for structure */ @@ -641,6 +691,8 @@ typedef struct _my_rw_lock_t { int waiters; /* number of waiting writers */ } my_rw_lock_t; +#endif /* _WIN32 */ + #define rw_lock_t my_rw_lock_t #define rw_rdlock(A) my_rw_rdlock((A)) #define rw_wrlock(A) my_rw_wrlock((A)) diff --git a/mysys/my_wincond.c b/mysys/my_wincond.c index b869b22bdea..13bf0b78766 100644 --- a/mysys/my_wincond.c +++ b/mysys/my_wincond.c @@ -26,7 +26,108 @@ #include #include -int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) + +/* + Windows native condition variables. We use runtime loading / function + pointers, because they are not available on XP +*/ + +/* Prototypes and function pointers for condition variable functions */ +typedef VOID (WINAPI * InitializeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); + +typedef BOOL (WINAPI * SleepConditionVariableCSProc) + (PCONDITION_VARIABLE ConditionVariable, + PCRITICAL_SECTION CriticalSection, + DWORD dwMilliseconds); + +typedef VOID (WINAPI * WakeAllConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); + +typedef VOID (WINAPI * WakeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); + +static InitializeConditionVariableProc my_InitializeConditionVariable; +static SleepConditionVariableCSProc my_SleepConditionVariableCS; +static WakeAllConditionVariableProc my_WakeAllConditionVariable; +static WakeConditionVariableProc my_WakeConditionVariable; + + +/** + Indicates if we have native condition variables, + initialized first time pthread_cond_init is called. +*/ + +static BOOL have_native_conditions= FALSE; + + +/** + Check if native conditions can be used, load function pointers +*/ + +static void check_native_cond_availability(void) +{ + HMODULE module= GetModuleHandle("kernel32"); + + my_InitializeConditionVariable= (InitializeConditionVariableProc) + GetProcAddress(module, "InitializeConditionVariable"); + my_SleepConditionVariableCS= (SleepConditionVariableCSProc) + GetProcAddress(module, "SleepConditionVariableCS"); + my_WakeAllConditionVariable= (WakeAllConditionVariableProc) + GetProcAddress(module, "WakeAllConditionVariable"); + my_WakeConditionVariable= (WakeConditionVariableProc) + GetProcAddress(module, "WakeConditionVariable"); + + if (my_InitializeConditionVariable) + have_native_conditions= TRUE; +} + + + +/** + Convert abstime to milliseconds +*/ + +static DWORD get_milliseconds(const struct timespec *abstime) +{ + long long millis; + union ft64 now; + + if (abstime == NULL) + return INFINITE; + + GetSystemTimeAsFileTime(&now.ft); + + /* + Calculate time left to abstime + - subtract start time from current time(values are in 100ns units) + - convert to millisec by dividing with 10000 + */ + millis= (abstime->tv.i64 - now.i64) / 10000; + + /* Don't allow the timeout to be negative */ + if (millis < 0) + return 0; + + /* + Make sure the calculated timeout does not exceed original timeout + value which could cause "wait for ever" if system time changes + */ + if (millis > abstime->max_timeout_msec) + millis= abstime->max_timeout_msec; + + if (millis > UINT_MAX) + millis= UINT_MAX; + + return (DWORD)millis; +} + + +/* + Old (pre-vista) implementation using events +*/ + +static int legacy_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) { cond->waiting= 0; InitializeCriticalSection(&cond->lock_waiting); @@ -55,7 +156,8 @@ int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) return 0; } -int pthread_cond_destroy(pthread_cond_t *cond) + +static int legacy_cond_destroy(pthread_cond_t *cond) { DeleteCriticalSection(&cond->lock_waiting); @@ -67,48 +169,13 @@ int pthread_cond_destroy(pthread_cond_t *cond) } -int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) -{ - return pthread_cond_timedwait(cond,mutex,NULL); -} - - -int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, +static int legacy_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, struct timespec *abstime) { int result; - long timeout; - union ft64 now; - - if( abstime != NULL ) - { - GetSystemTimeAsFileTime(&now.ft); - - /* - Calculate time left to abstime - - subtract start time from current time(values are in 100ns units) - - convert to millisec by dividing with 10000 - */ - timeout= (long)((abstime->tv.i64 - now.i64) / 10000); - - /* Don't allow the timeout to be negative */ - if (timeout < 0) - timeout= 0L; - - /* - Make sure the calucated timeout does not exceed original timeout - value which could cause "wait for ever" if system time changes - */ - if (timeout > abstime->max_timeout_msec) - timeout= abstime->max_timeout_msec; - - } - else - { - /* No time specified; don't expire */ - timeout= INFINITE; - } + DWORD timeout; + timeout= get_milliseconds(abstime); /* Block access if previous broadcast hasn't finished. This is just for safety and should normally not @@ -144,7 +211,7 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, return result == WAIT_TIMEOUT ? ETIMEDOUT : 0; } -int pthread_cond_signal(pthread_cond_t *cond) +static int legacy_cond_signal(pthread_cond_t *cond) { EnterCriticalSection(&cond->lock_waiting); @@ -157,7 +224,7 @@ int pthread_cond_signal(pthread_cond_t *cond) } -int pthread_cond_broadcast(pthread_cond_t *cond) +static int legacy_cond_broadcast(pthread_cond_t *cond) { EnterCriticalSection(&cond->lock_waiting); /* @@ -179,6 +246,87 @@ int pthread_cond_broadcast(pthread_cond_t *cond) } +/* + Posix API functions. Just choose between native and legacy implementation. +*/ + +int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) +{ + /* + Once initialization is used here rather than in my_init(), to + 1) avoid my_init() pitfalls- undefined order in which initialization should + run + 2) be potentially useful C++ (in static constructors that run before main()) + 3) just to simplify the API. + Also, the overhead of my_pthread_once is very small. + */ + static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT; + my_pthread_once(&once_control, check_native_cond_availability); + + if (have_native_conditions) + { + my_InitializeConditionVariable(&cond->native_cond); + return 0; + } + else + return legacy_cond_init(cond, attr); +} + + +int pthread_cond_destroy(pthread_cond_t *cond) +{ + if (have_native_conditions) + return 0; /* no destroy function */ + else + return legacy_cond_destroy(cond); +} + + +int pthread_cond_broadcast(pthread_cond_t *cond) +{ + if (have_native_conditions) + { + my_WakeAllConditionVariable(&cond->native_cond); + return 0; + } + else + return legacy_cond_broadcast(cond); +} + + +int pthread_cond_signal(pthread_cond_t *cond) +{ + if (have_native_conditions) + { + my_WakeConditionVariable(&cond->native_cond); + return 0; + } + else + return legacy_cond_signal(cond); +} + + +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + struct timespec *abstime) +{ + if (have_native_conditions) + { + DWORD timeout= get_milliseconds(abstime); + if (!my_SleepConditionVariableCS(&cond->native_cond, mutex, timeout)) + return ETIMEDOUT; + return 0; + } + else + return legacy_cond_timedwait(cond, mutex, abstime); +} + + +int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + return pthread_cond_timedwait(cond, mutex, NULL); +} + + int pthread_attr_init(pthread_attr_t *connect_att) { connect_att->dwStackSize = 0; diff --git a/mysys/my_winthread.c b/mysys/my_winthread.c index 6adf24ef543..5fbad39597c 100644 --- a/mysys/my_winthread.c +++ b/mysys/my_winthread.c @@ -156,8 +156,19 @@ int win_pthread_setspecific(void *a,void *b,uint length) int my_pthread_once(my_pthread_once_t *once_control, void (*init_routine)(void)) { - LONG state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS, - MY_PTHREAD_ONCE_INIT); + LONG state; + + /* + Do "dirty" read to find out if initialization is already done, to + save an interlocked operation in common case. Memory barriers are ensured by + Visual C++ volatile implementation. + */ + if (*once_control == MY_PTHREAD_ONCE_DONE) + return 0; + + state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS, + MY_PTHREAD_ONCE_INIT); + switch(state) { case MY_PTHREAD_ONCE_INIT: diff --git a/mysys/thr_rwlock.c b/mysys/thr_rwlock.c index ea98a854a4d..2978d91090d 100644 --- a/mysys/thr_rwlock.c +++ b/mysys/thr_rwlock.c @@ -19,6 +19,119 @@ #if defined(THREAD) && !defined(HAVE_PTHREAD_RWLOCK_RDLOCK) && !defined(HAVE_RWLOCK_INIT) #include +#ifdef _WIN32 + +static BOOL have_srwlock= FALSE; +/* Prototypes and function pointers for windows functions */ +typedef VOID (WINAPI* srw_func) (PSRWLOCK SRWLock); +typedef BOOL (WINAPI* srw_bool_func) (PSRWLOCK SRWLock); + +static srw_func my_InitializeSRWLock; +static srw_func my_AcquireSRWLockExclusive; +static srw_func my_ReleaseSRWLockExclusive; +static srw_func my_AcquireSRWLockShared; +static srw_func my_ReleaseSRWLockShared; + +static srw_bool_func my_TryAcquireSRWLockExclusive; +static srw_bool_func my_TryAcquireSRWLockShared; + +/** + Check for presence of Windows slim reader writer lock function. + Load function pointers. +*/ + +static void check_srwlock_availability(void) +{ + HMODULE module= GetModuleHandle("kernel32"); + + my_InitializeSRWLock= (srw_func) GetProcAddress(module, + "InitializeSRWLock"); + my_AcquireSRWLockExclusive= (srw_func) GetProcAddress(module, + "AcquireSRWLockExclusive"); + my_AcquireSRWLockShared= (srw_func) GetProcAddress(module, + "AcquireSRWLockShared"); + my_ReleaseSRWLockExclusive= (srw_func) GetProcAddress(module, + "ReleaseSRWLockExclusive"); + my_ReleaseSRWLockShared= (srw_func) GetProcAddress(module, + "ReleaseSRWLockShared"); + my_TryAcquireSRWLockExclusive= (srw_bool_func) GetProcAddress(module, + "TryAcquireSRWLockExclusive"); + my_TryAcquireSRWLockShared= (srw_bool_func) GetProcAddress(module, + "TryAcquireSRWLockShared"); + + /* + We currently require TryAcquireSRWLockExclusive. This API is missing on + Vista, this means SRWLock are only used starting with Win7. + + If "trylock" usage for rwlocks is eliminated from server codebase (it is used + in a single place currently, in query cache), then SRWLock can be enabled on + Vista too. In this case condition below needs to be changed to e.g check + for my_InitializeSRWLock. + */ + + if (my_TryAcquireSRWLockExclusive) + have_srwlock= TRUE; + +} + + +static int srw_init(my_rw_lock_t *rwp) +{ + my_InitializeSRWLock(&rwp->srwlock); + rwp->have_exclusive_srwlock = FALSE; + return 0; +} + + +static int srw_rdlock(my_rw_lock_t *rwp) +{ + my_AcquireSRWLockShared(&rwp->srwlock); + return 0; +} + + +static int srw_tryrdlock(my_rw_lock_t *rwp) +{ + + if (!my_TryAcquireSRWLockShared(&rwp->srwlock)) + return EBUSY; + return 0; +} + + +static int srw_wrlock(my_rw_lock_t *rwp) +{ + my_AcquireSRWLockExclusive(&rwp->srwlock); + rwp->have_exclusive_srwlock= TRUE; + return 0; +} + + +static int srw_trywrlock(my_rw_lock_t *rwp) +{ + if (!my_TryAcquireSRWLockExclusive(&rwp->srwlock)) + return EBUSY; + rwp->have_exclusive_srwlock= TRUE; + return 0; +} + + +static int srw_unlock(my_rw_lock_t *rwp) +{ + if (rwp->have_exclusive_srwlock) + { + rwp->have_exclusive_srwlock= FALSE; + my_ReleaseSRWLockExclusive(&rwp->srwlock); + } + else + { + my_ReleaseSRWLockShared(&rwp->srwlock); + } + return 0; +} + +#endif /*_WIN32 */ + /* Source base from Sun Microsystems SPILT, simplified for MySQL use -- Joshua Chamas @@ -62,6 +175,22 @@ int my_rwlock_init(rw_lock_t *rwp, void *arg __attribute__((unused))) { pthread_condattr_t cond_attr; +#ifdef _WIN32 + /* + Once initialization is used here rather than in my_init(), in order to + - avoid my_init() pitfalls- (undefined order in which initialization should + run) + - be potentially useful C++ (static constructors) + - just to simplify the API. + Also, the overhead is of my_pthread_once is very small. + */ + static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT; + my_pthread_once(&once_control, check_srwlock_availability); + + if (have_srwlock) + return srw_init(rwp); +#endif + pthread_mutex_init( &rwp->lock, MY_MUTEX_INIT_FAST); pthread_condattr_init( &cond_attr ); pthread_cond_init( &rwp->readers, &cond_attr ); @@ -77,6 +206,10 @@ int my_rwlock_init(rw_lock_t *rwp, void *arg __attribute__((unused))) int my_rwlock_destroy(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return 0; /* no destroy function */ +#endif pthread_mutex_destroy( &rwp->lock ); pthread_cond_destroy( &rwp->readers ); pthread_cond_destroy( &rwp->writers ); @@ -86,6 +219,11 @@ int my_rwlock_destroy(rw_lock_t *rwp) int my_rw_rdlock(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return srw_rdlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); /* active or queued writers */ @@ -100,6 +238,12 @@ int my_rw_rdlock(rw_lock_t *rwp) int my_rw_tryrdlock(rw_lock_t *rwp) { int res; + +#ifdef _WIN32 + if (have_srwlock) + return srw_tryrdlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); if ((rwp->state < 0 ) || rwp->waiters) res= EBUSY; /* Can't get lock */ @@ -115,6 +259,11 @@ int my_rw_tryrdlock(rw_lock_t *rwp) int my_rw_wrlock(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return srw_wrlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); rwp->waiters++; /* another writer queued */ @@ -130,6 +279,12 @@ int my_rw_wrlock(rw_lock_t *rwp) int my_rw_trywrlock(rw_lock_t *rwp) { int res; + +#ifdef _WIN32 + if (have_srwlock) + return srw_trywrlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); if (rwp->state) res= EBUSY; /* Can't get lock */ @@ -145,6 +300,11 @@ int my_rw_trywrlock(rw_lock_t *rwp) int my_rw_unlock(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return srw_unlock(rwp); +#endif + DBUG_PRINT("rw_unlock", ("state: %d waiters: %d", rwp->state, rwp->waiters)); pthread_mutex_lock(&rwp->lock); diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index a9b81116a90..e0c7d2081f0 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -1172,7 +1172,29 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ + +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; diff --git a/storage/pbxt/src/pthread_xt.cc b/storage/pbxt/src/pthread_xt.cc index c5dc2e41fdd..284672d2c79 100755 --- a/storage/pbxt/src/pthread_xt.cc +++ b/storage/pbxt/src/pthread_xt.cc @@ -396,48 +396,7 @@ xtPublic int xt_p_cond_wait(xt_cond_type *cond, xt_mutex_type *mutex) xtPublic int xt_p_cond_timedwait(xt_cond_type *cond, xt_mutex_type *mt, struct timespec *abstime) { - pthread_mutex_t *mutex = &mt->mt_cs; - int result; - long timeout; - union ft64 now; - - if (abstime != NULL) { - GetSystemTimeAsFileTime(&now.ft); - - timeout = (long)((abstime->tv.i64 - now.i64) / 10000); - if (timeout < 0) - timeout = 0L; - if (timeout > abstime->max_timeout_msec) - timeout = abstime->max_timeout_msec; - } - else - timeout= INFINITE; - - WaitForSingleObject(cond->broadcast_block_event, INFINITE); - - EnterCriticalSection(&cond->lock_waiting); - cond->waiting++; - LeaveCriticalSection(&cond->lock_waiting); - - LeaveCriticalSection(mutex); - - result= WaitForMultipleObjects(2, cond->events, FALSE, timeout); - - EnterCriticalSection(&cond->lock_waiting); - cond->waiting--; - - if (cond->waiting == 0) { - /* The last waiter must reset the broadcast - * state (whther there was a broadcast or not)! - */ - ResetEvent(cond->events[xt_cond_type::BROADCAST]); - SetEvent(cond->broadcast_block_event); - } - LeaveCriticalSection(&cond->lock_waiting); - - EnterCriticalSection(mutex); - - return result == WAIT_TIMEOUT ? ETIMEDOUT : 0; + return pthread_cond_timedwait(cond, &(mt->mt_cs), abstime); } xtPublic int xt_p_join(pthread_t thread, void **value) From 8cde9162460cc2c3a7eddb972c6aa766f89d4965 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 16:44:41 +0200 Subject: [PATCH 05/14] fix mismerge --- storage/xtradb/os/os0file.c | 27 +++++++++++---------------- storage/xtradb/srv/srv0start.c | 4 +--- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index f0547f2e243..edf4e5d2744 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -2445,8 +2445,7 @@ _os_file_read( DWORD len; ibool retry; OVERLAPPED overlapped; - overlapped.Offset = (DWORD)offset; - overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2466,8 +2465,8 @@ try_again: os_mutex_exit(os_file_count_mutex); memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = low; - overlapped.OffsetHigh = high; + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; overlapped.hEvent = win_get_syncio_event(); ret = ReadFile(file, buf, n, NULL, &overlapped); if (ret) { @@ -2475,7 +2474,7 @@ try_again: } else if(GetLastError() == ERROR_IO_PENDING) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); - + } os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); @@ -2568,8 +2567,8 @@ try_again: os_mutex_exit(os_file_count_mutex); memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = low; - overlapped.OffsetHigh = high; + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; overlapped.hEvent = win_get_syncio_event(); ret = ReadFile(file, buf, n, NULL, &overlapped); if (ret) { @@ -2577,7 +2576,7 @@ try_again: } else if(GetLastError() == ERROR_IO_PENDING) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); - + } os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); @@ -2654,8 +2653,6 @@ os_file_write( ulint n_retries = 0; ulint err; OVERLAPPED overlapped; - overlapped.Offset = (DWORD)offset; - overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2674,12 +2671,14 @@ retry: os_mutex_exit(os_file_count_mutex); memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = low; - overlapped.OffsetHigh = high; + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + overlapped.hEvent = win_get_syncio_event(); ret = WriteFile(file, buf, n, NULL, &overlapped); if (ret) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); + } else if(GetLastError() == ERROR_IO_PENDING) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } @@ -3831,10 +3830,6 @@ os_aio_windows_handle( BOOL retry = FALSE; ULONG_PTR dummy_key; - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - - ret = GetQueuedCompletionStatus(completion_port, &len, &dummy_key, (OVERLAPPED **)&slot, INFINITE); diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index e235bab3e02..bb1beb06ef5 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1284,10 +1284,8 @@ innobase_start_or_create_for_mysql(void) default: os_aio_use_native_aio = TRUE; srv_use_native_conditions = TRUE; - default: - /* On Win 2000 and XP use async i/o */ - os_aio_use_native_aio = TRUE; } +#endif if (srv_file_flush_method_str == NULL) { /* These are the default options */ From 06490b208acdf56b1999617f7fd7d7ec2613f059 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 13 Jun 2011 02:38:16 +0200 Subject: [PATCH 06/14] fix warnings --- client/mysqlslap.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/client/mysqlslap.c b/client/mysqlslap.c index de1992e2d57..a876390db27 100644 --- a/client/mysqlslap.c +++ b/client/mysqlslap.c @@ -1382,9 +1382,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open create file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); parse_delimiter(tmp_string, &create_statements, delimiter[0]); @@ -1409,9 +1409,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open query supplied file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); if (user_supplied_query) @@ -1440,9 +1440,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open query supplied file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); if (user_supplied_pre_statements) @@ -1471,9 +1471,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open query supplied file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); if (user_supplied_post_statements) From 2b1044d8a4d3d71e9ed03e89b17cd63f3e525b1d Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Thu, 16 Jun 2011 14:33:09 +0200 Subject: [PATCH 07/14] Accept innodb_flush_method values previously allowed on Unix only map them to corresponding Windows CreateFile flags, O_DSYNC=>FILE_FLAG_WRITE_THROUGH ALL_O_DIRECT=>FILE_FLAG_NO_BUFFERING Ability to specify innodb_flush_method=O_DSYNC fixes MySQL Bug#31876 (InnoDB commit performance slow on Windows XP), by removing an extra FlushFileBuffers() call overhead. --- storage/xtradb/os/os0file.c | 12 ++++++++++-- storage/xtradb/srv/srv0start.c | 4 +--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index edf4e5d2744..7a8e5802b19 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -1413,8 +1413,6 @@ try_again: #endif #ifdef UNIV_NON_BUFFERED_IO # ifndef UNIV_HOTBACKUP - if (type == OS_LOG_FILE) - attributes = attributes | FILE_FLAG_SEQUENTIAL_SCAN; if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { /* Do not use unbuffered i/o to log files because value 2 denotes that we do not flush the log at every @@ -1448,6 +1446,16 @@ try_again: ut_error; } + if (type == OS_LOG_FILE) { + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { + /* Map O_DSYNC to WRITE_THROUGH */ + attributes |= FILE_FLAG_WRITE_THROUGH; + } else if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { + /* Open log file without buffering */ + attributes |= FILE_FLAG_NO_BUFFERING; + } + } + file = CreateFile((LPCTSTR) name, GENERIC_READ | GENERIC_WRITE, /* read and write access */ diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index bb1beb06ef5..d002a1bb682 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1293,7 +1293,6 @@ innobase_start_or_create_for_mysql(void) srv_unix_file_flush_method = SRV_UNIX_FSYNC; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; -#ifndef __WIN__ } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { srv_unix_file_flush_method = SRV_UNIX_FSYNC; @@ -1311,7 +1310,7 @@ innobase_start_or_create_for_mysql(void) } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) { srv_unix_file_flush_method = SRV_UNIX_NOSYNC; -#else +#ifdef _WIN32 } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) { srv_win_file_flush_method = SRV_WIN_IO_NORMAL; os_aio_use_native_aio = FALSE; @@ -1319,7 +1318,6 @@ innobase_start_or_create_for_mysql(void) } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; os_aio_use_native_aio = FALSE; - } else if (0 == ut_strcmp(srv_file_flush_method_str, "async_unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; From 3e748505de096195821ece6018187b82e1b979c1 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Thu, 16 Jun 2011 14:51:50 +0200 Subject: [PATCH 08/14] Fix MySQL Bug #21978 : 'flush_time' value set for 1800 sec This setting is obsolete now. It could makes sense in the past, situations open file handles limit was low. It does not make sense anymore to flush all files every 1.5 hours now, after 2048 myisam file limit is removed as fix to MySQL bug #24509. --- sql/mysql_priv.h | 3 --- sql/mysqld.cc | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index bd2f31fbe1d..db1c385c992 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -401,7 +401,6 @@ protected: #define DELAYED_LIMIT 100 /**< pause after xxx inserts */ #define DELAYED_QUEUE_SIZE 1000 #define DELAYED_WAIT_TIMEOUT 5*60 /**< Wait for delayed insert */ -#define FLUSH_TIME 0 /**< Don't flush tables */ #define MAX_CONNECT_ERRORS 10 ///< errors before disabling host #ifdef __NETWARE__ @@ -411,8 +410,6 @@ protected: #endif #if defined(__WIN__) -#undef FLUSH_TIME -#define FLUSH_TIME 1800 /**< Flush every half hour */ #define INTERRUPT_PRIOR -2 #define CONNECT_PRIOR -1 diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 3fee65fe963..19d5a84e67d 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -7104,7 +7104,7 @@ each time the SQL thread starts.", { "flush_time", OPT_FLUSH_TIME, "A dedicated thread is created to flush all tables at the given interval.", &flush_time, &flush_time, 0, GET_ULONG, REQUIRED_ARG, - FLUSH_TIME, 0, LONG_TIMEOUT, 0, 1, 0}, + 0 , 0, LONG_TIMEOUT, 0, 1, 0}, { "ft_boolean_syntax", OPT_FT_BOOLEAN_SYNTAX, "List of operators for MATCH ... AGAINST ( ... IN BOOLEAN MODE).", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, From b8eac6a7193af4fa43e6ba03f091f6ec81187925 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Fri, 17 Jun 2011 00:29:22 +0200 Subject: [PATCH 09/14] Point to the correct documentation on building in our KB. --- INSTALL-WIN-SOURCE | 289 +-------------------------------------------- 1 file changed, 1 insertion(+), 288 deletions(-) diff --git a/INSTALL-WIN-SOURCE b/INSTALL-WIN-SOURCE index 8faf511936b..a61c12e6502 100644 --- a/INSTALL-WIN-SOURCE +++ b/INSTALL-WIN-SOURCE @@ -1,289 +1,2 @@ +Up-to-date instructions on MariaDB building on Windows can be found in http://kb.askmonty.org/en/building-mariadb-on-windows. -2.5.10. Installing MySQL from Source on Windows - - These instructions describe how to build binaries from source for - MySQL 5.1 on Windows. Instructions are provided for building - binaries from a standard source distribution or from the Bazaar - tree that contains the latest development source. - -Note - - The instructions here are strictly for users who want to test - MySQL on Microsoft Windows from the latest source distribution or - from the Bazaar tree. For production use, we do not advise using a - MySQL server built by yourself from source. Normally, it is best - to use precompiled binary distributions of MySQL that are built - specifically for optimal performance on Windows by Oracle - Corporation. Instructions for installing binary distributions are - available in Section 2.5, "Installing MySQL on Windows." - - To build MySQL on Windows from source, you must satisfy the - following system, compiler, and resource requirements: - - * Windows 2000, Windows XP, or newer version. - Windows Vista is supported when using Visual Studio 2005 - provided you have installed the following updates: - - + Microsoft Visual Studio 2005 Professional Edition - ENU - Service Pack 1 (KB926601) - (http://support.microsoft.com/?kbid=926601) - - + Security Update for Microsoft Visual Studio 2005 - Professional Edition - ENU (KB937061) - (http://support.microsoft.com/?kbid=937061) - - + Update for Microsoft Visual Studio 2005 Professional - Edition - ENU (KB932232) - (http://support.microsoft.com/?kbid=932232) - - * CMake, which can be downloaded from http://www.cmake.org. - After installing, modify your path to include the cmake - binary. - - * Microsoft Visual C++ 2005 Express Edition, Visual Studio .Net - 2003 (7.1), or Visual Studio 2005 (8.0) compiler system. - - * If you are using Visual C++ 2005 Express Edition, you must - also install an appropriate Platform SDK. More information and - links to downloads for various Windows platforms is available - from - http://www.microsoft.com/downloads/details.aspx?familyid=0baf2 - b35-c656-4969-ace8-e4c0c0716adb. - - * If you are compiling from a Bazaar tree or making changes to - the parser, you need bison for Windows, which can be - downloaded from - http://gnuwin32.sourceforge.net/packages/bison.htm. Download - the package labeled "Complete package, excluding sources". - After installing the package, modify your path to include the - bison binary and ensure that this binary is accessible from - Visual Studio. - - * Cygwin might be necessary if you want to run the test script - or package the compiled binaries and support files into a Zip - archive. (Cygwin is needed only to test or package the - distribution, not to build it.) Cygwin is available from - http://cygwin.com. - - * 3GB to 5GB of disk space. - - The exact system requirements for Visual Studio can be found here: - http://msdn.microsoft.com/vstudio/Previous/2003/sysreqs/default.as - px and - http://msdn.microsoft.com/vstudio/products/sysreqs/default.aspx - - You also need a MySQL source distribution for Windows, which can - be obtained two ways: - - * Obtain a source distribution packaged by Oracle Corporation. - These are available from http://dev.mysql.com/downloads/. - - * Package a source distribution yourself from the latest Bazaar - developer source tree. For instructions on pulling the latest - source files, see Section 2.3.3, "Installing from the - Development Source Tree." - - If you find something not working as expected, or you have - suggestions about ways to improve the current build process on - Windows, please send a message to the win32 mailing list. See - Section 1.6.1, "MySQL Mailing Lists." - -2.5.10.1. Building MySQL from Source Using CMake and Visual Studio - - You can build MySQL on Windows by using a combination of cmake and - Microsoft Visual Studio .NET 2003 (7.1), Microsoft Visual Studio - 2005 (8.0), Microsoft Visual Studio 2008 (9.0) or Microsoft Visual - C++ 2005 Express Edition. You must have the appropriate Microsoft - Platform SDK installed. - -Note - - To compile from the source code on Windows you must use the - standard source distribution (for example, mysql-5.1.46.tar.gz). - You build from the same distribution as used to build MySQL on - Unix, Linux and other platforms. Do not use the Windows Source - distributions as they do not contain the necessary configuration - script and other files. - - Follow this procedure to build MySQL: - - 1. If you are installing from a packaged source distribution, - create a work directory (for example, C:\workdir), and unpack - the source distribution there using WinZip or another Windows - tool that can read .zip files. This directory is the work - directory in the following instructions. - -Note - You must run the commands in the win directory from the - top-level source directory. Do not change into the win - directory, as the commands will not be executed correctly. - - 2. Start a command shell. If you have not configured the PATH and - other environment variables for all command shells, you may be - able to start a command shell from the Start Menu within the - Windows Visual Studio menu that contains the necessary - environment changes. - - 3. Within the command shell, navigate to the work directory and - run the following command: -C:\workdir>win\configure.js options - If you have associated the .js file extension with an - application such as a text editor, then you may need to use - the following command to force configure.js to be executed as - a script: -C:\workdir>cscript win\configure.js options - These options are available for configure.js: - - + WITH_INNOBASE_STORAGE_ENGINE: Enable the InnoDB storage - engine. - - + WITH_PARTITION_STORAGE_ENGINE: Enable user-defined - partitioning. - - + WITH_ARCHIVE_STORAGE_ENGINE: Enable the ARCHIVE storage - engine. - - + WITH_BLACKHOLE_STORAGE_ENGINE: Enable the BLACKHOLE - storage engine. - - + WITH_EXAMPLE_STORAGE_ENGINE: Enable the EXAMPLE storage - engine. - - + WITH_FEDERATED_STORAGE_ENGINE: Enable the FEDERATED - storage engine. - - + WITH_NDBCLUSTER_STORAGE_ENGINE (experimental): Enable the - NDBCLUSTER storage engine in the MySQL server; cause - binaries for the MySQL Cluster management and data node, - management client, and other programs to be built. - This option is supported only in MySQL Cluster NDB 7.0 - (NDBCLUSTER storage engine versions 6.4.0 and later) - using the MySQL Cluster sources. It cannot be used to - enable clustering support in other MySQL source trees or - distributions. - - + MYSQL_SERVER_SUFFIX=suffix: Server suffix, default none. - - + COMPILATION_COMMENT=comment: Server comment, default - "Source distribution". - - + MYSQL_TCP_PORT=port: Server port, default 3306. - - + DISABLE_GRANT_OPTIONS: Disables the --bootstrap, - --skip-grant-tables, and --init-file options for mysqld. - This option is available as of MySQL 5.1.15. - For example (type the command on one line): -C:\workdir>win\configure.js WITH_INNOBASE_STORAGE_ENGINE - WITH_PARTITION_STORAGE_ENGINE MYSQL_SERVER_SUFFIX=-pro - - 4. From the work directory, execute the win\build-vs9.bat - (Windows Visual Studio 2008), win\build-vs8.bat (Windows - Visual Studio 2005), or win\build-vs71.bat (Windows Visual - Stidion 2003) script, depending on the version of Visual - Studio you have installed. The script invokes CMake, which - generates the mysql.sln solution file. - You can also use the corresponding 64-bit file (for example - win\build-vs8_x64.bat or win\build-vs9_x64.bat) to build the - 64-bit version of MySQL. However, you cannot build the 64-bit - version with Visual Studio Express Edition. You must use - Visual Studio 2005 (8.0) or higher. - - 5. From the work directory, open the generated mysql.sln file - with Visual Studio and select the proper configuration using - the Configuration menu. The menu provides Debug, Release, - RelwithDebInfo, MinRelInfo options. Then select Solution > - Build to build the solution. - Remember the configuration that you use in this step. It is - important later when you run the test script because that - script needs to know which configuration you used. - - 6. Test the server. The server built using the preceding - instructions expects that the MySQL base directory and data - directory are C:\mysql and C:\mysql\data by default. If you - want to test your server using the source tree root directory - and its data directory as the base directory and data - directory, you need to tell the server their path names. You - can either do this on the command line with the --basedir and - --datadir options, or by placing appropriate options in an - option file. (See Section 4.2.3.3, "Using Option Files.") If - you have an existing data directory elsewhere that you want to - use, you can specify its path name instead. - When the server is running in standalone fashion or as a - service based on your configuration, try to connect to it from - the mysql interactive command-line utility. - You can also run the standard test script, mysql-test-run.pl. - This script is written in Perl, so you'll need either Cygwin - or ActiveState Perl to run it. You may also need to install - the modules required by the script. To run the test script, - change location into the mysql-test directory under the work - directory, set the MTR_VS_CONFIG environment variable to the - configuration you selected earlier (or use the --vs-config - option), and invoke mysql-test-run.pl. For example (using - Cygwin and the bash shell): -shell> cd mysql-test -shell> export MTR_VS_CONFIG=debug -shell> ./mysql-test-run.pl --force --timer -shell> ./mysql-test-run.pl --force --timer --ps-protocol - - When you are satisfied that the programs you have built are - working correctly, stop the server. Now you can install the - distribution. One way to do this is to use the make_win_bin_dist - script in the scripts directory of the MySQL source distribution - (see Section 4.4.2, "make_win_bin_dist --- Package MySQL - Distribution as ZIP Archive"). This is a shell script, so you must - have Cygwin installed if you want to use it. It creates a Zip - archive of the built executables and support files that you can - unpack in the location at which you want to install MySQL. - - It is also possible to install MySQL by copying directories and - files directly: - - 1. Create the directories where you want to install MySQL. For - example, to install into C:\mysql, use these commands: -C:\> mkdir C:\mysql -C:\> mkdir C:\mysql\bin -C:\> mkdir C:\mysql\data -C:\> mkdir C:\mysql\share -C:\> mkdir C:\mysql\scripts - If you want to compile other clients and link them to MySQL, - you should also create several additional directories: -C:\> mkdir C:\mysql\include -C:\> mkdir C:\mysql\lib -C:\> mkdir C:\mysql\lib\debug -C:\> mkdir C:\mysql\lib\opt - If you want to benchmark MySQL, create this directory: -C:\> mkdir C:\mysql\sql-bench - Benchmarking requires Perl support. See Section 2.15, "Perl - Installation Notes." - - 2. From the work directory, copy into the C:\mysql directory the - following files and directories: -C:\> cd \workdir -C:\workdir> mkdir C:\mysql -C:\workdir> mkdir C:\mysql\bin -C:\workdir> copy client\Release\*.exe C:\mysql\bin -C:\workdir> copy sql\Release\mysqld.exe C:\mysql\bin\mysqld.exe -C:\workdir> xcopy scripts\*.* C:\mysql\scripts /E -C:\workdir> xcopy share\*.* C:\mysql\share /E - If you want to compile other clients and link them to MySQL, - you should also copy several libraries and header files: -C:\workdir> copy lib\Release\mysqlclient.lib C:\mysql\lib\debug -C:\workdir> copy lib\Release\libmysql.* C:\mysql\lib\debug -C:\workdir> copy lib\Release\zlib.* C:\mysql\lib\debug -C:\workdir> copy lib\Release\mysqlclient.lib C:\mysql\lib\opt -C:\workdir> copy lib\Release\libmysql.* C:\mysql\lib\opt -C:\workdir> copy lib\Release\zlib.* C:\mysql\lib\opt -C:\workdir> copy include\*.h C:\mysql\include -C:\workdir> copy libmysql\libmysql.def C:\mysql\include - -Note - If you have compiled a Debug, rather than Release solution, - you can replace Release with Debug in the source file names - shown above. - If you want to benchmark MySQL, you should also do this: -C:\workdir> xcopy sql-bench\*.* C:\mysql\bench /E - - After installation, set up and start the server in the same way as - for binary Windows distributions. This includes creating the - system tables by running mysql_install_db. For more information, - see Section 2.5, "Installing MySQL on Windows." From 8fa05f426657e57ed6fe3f89f6f1f2ae0e622865 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 18 Jun 2011 21:56:47 +0200 Subject: [PATCH 10/14] dummy change to trigger the buildbot --- INSTALL-WIN-SOURCE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL-WIN-SOURCE b/INSTALL-WIN-SOURCE index a61c12e6502..2e77959d69c 100644 --- a/INSTALL-WIN-SOURCE +++ b/INSTALL-WIN-SOURCE @@ -1,2 +1,2 @@ -Up-to-date instructions on MariaDB building on Windows can be found in http://kb.askmonty.org/en/building-mariadb-on-windows. +Up-to-date instructions on MariaDB building on Windows can be found in http://kb.askmonty.org/en/building-mariadb-on-windows. From 6cf9e92e74089dcc700d6f0577ef9e7208027442 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 19 Jun 2011 00:29:49 +0200 Subject: [PATCH 11/14] fix compile error on *nix --- mysys/my_read.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mysys/my_read.c b/mysys/my_read.c index 9c76193aa63..5fef4455f62 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -45,9 +45,9 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) { errno= 0; /* Linux, Windows don't reset this on EOF/success */ #ifdef _WIN32 - readbytes= my_win_read(Filedes, Buffer, Count); + readbytes= my_win_read(Filedes, Buffer, Count); #else - readbytes= my_read(Fildes, Buffer, Count); + readbytes= read(Filedes, Buffer, Count); #endif if (readbytes != Count) { From d1d19a68830c71d5aa6dfd71ecdfab5170904783 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 19 Jun 2011 00:51:41 +0200 Subject: [PATCH 12/14] add missing DBUG_RETURN --- mysys/my_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysys/my_write.c b/mysys/my_write.c index 5b916d6cb65..1d59c62a005 100644 --- a/mysys/my_write.c +++ b/mysys/my_write.c @@ -39,7 +39,7 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) if(Filedes < 0) { errno = EINVAL; - return -1; + DBUG_RETURN(-1); } writtenbytes= my_win_write(Filedes, Buffer, Count); #else From 4243aee728dd038e23f05a69d60208e9cebd8635 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 19 Jun 2011 17:19:22 +0200 Subject: [PATCH 13/14] Fix "make dist" : add my_winfile.c and my_winerr.c to EXTRA_DIST list --- mysys/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 1dd4cc0f780..edeba73a5ee 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -74,7 +74,7 @@ endif EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c waiting_threads.c \ CMakeLists.txt mf_soundex.c \ - my_conio.c my_wincond.c my_winthread.c + my_conio.c my_wincond.c my_winthread.c my_winfile.c my_winerr.c # test_dir_DEPENDENCIES= $(LIBRARIES) # testhash_DEPENDENCIES= $(LIBRARIES) # test_charset_DEPENDENCIES= $(LIBRARIES) From b48dc8306f2b729bef09f9cdf30d7897726b873e Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 26 Jun 2011 01:07:39 +0200 Subject: [PATCH 14/14] set errno to EBADF, if file descriptor < 0 in my_write() --- mysys/my_write.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mysys/my_write.c b/mysys/my_write.c index 1d59c62a005..be493049d5c 100644 --- a/mysys/my_write.c +++ b/mysys/my_write.c @@ -38,8 +38,8 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) #ifdef _WIN32 if(Filedes < 0) { - errno = EINVAL; - DBUG_RETURN(-1); + my_errno= errno= EBADF; + DBUG_RETURN((size_t)-1); } writtenbytes= my_win_write(Filedes, Buffer, Count); #else