mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add support for basic NUMA awareness
Add basic NUMA awareness routines, using a minimal src/port/pg_numa.c portability wrapper and an optional build dependency, enabled by --with-libnuma configure option. For now this is Linux-only, other platforms may be supported later. A built-in SQL function pg_numa_available() allows checking NUMA support, i.e. that the server was built/linked with the NUMA library. The main function introduced is pg_numa_query_pages(), which allows determining the NUMA node for individual memory pages. Internally the function uses move_pages(2) syscall, as it allows batching, and is more efficient than get_mempolicy(2). Author: Jakub Wartak <jakub.wartak@enterprisedb.com> Co-authored-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Tomas Vondra <tomas@vondra.me> Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com
This commit is contained in:
@ -196,6 +196,7 @@ with_gssapi = @with_gssapi@
|
||||
with_krb_srvnam = @with_krb_srvnam@
|
||||
with_ldap = @with_ldap@
|
||||
with_libcurl = @with_libcurl@
|
||||
with_libnuma = @with_libnuma@
|
||||
with_liburing = @with_liburing@
|
||||
with_libxml = @with_libxml@
|
||||
with_libxslt = @with_libxslt@
|
||||
@ -223,6 +224,9 @@ krb_srvtab = @krb_srvtab@
|
||||
ICU_CFLAGS = @ICU_CFLAGS@
|
||||
ICU_LIBS = @ICU_LIBS@
|
||||
|
||||
LIBNUMA_CFLAGS = @LIBNUMA_CFLAGS@
|
||||
LIBNUMA_LIBS = @LIBNUMA_LIBS@
|
||||
|
||||
LIBURING_CFLAGS = @LIBURING_CFLAGS@
|
||||
LIBURING_LIBS = @LIBURING_LIBS@
|
||||
|
||||
@ -250,7 +254,7 @@ CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
PG_SYSROOT = @PG_SYSROOT@
|
||||
|
||||
override CPPFLAGS := $(ICU_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS)
|
||||
override CPPFLAGS := $(ICU_CFLAGS) $(LIBNUMA_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS)
|
||||
|
||||
ifdef PGXS
|
||||
override CPPFLAGS := -I$(includedir_server) -I$(includedir_internal) $(CPPFLAGS)
|
||||
|
@ -566,7 +566,7 @@ static int ssl_renegotiation_limit;
|
||||
*/
|
||||
int huge_pages = HUGE_PAGES_TRY;
|
||||
int huge_page_size;
|
||||
static int huge_pages_status = HUGE_PAGES_UNKNOWN;
|
||||
int huge_pages_status = HUGE_PAGES_UNKNOWN;
|
||||
|
||||
/*
|
||||
* These variables are all dummies that don't do anything, except in some
|
||||
|
@ -57,6 +57,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 202504071
|
||||
#define CATALOG_VERSION_NO 202504072
|
||||
|
||||
#endif
|
||||
|
@ -8542,6 +8542,10 @@
|
||||
proargnames => '{name,off,size,allocated_size}',
|
||||
prosrc => 'pg_get_shmem_allocations' },
|
||||
|
||||
{ oid => '4099', descr => 'Is NUMA support available?',
|
||||
proname => 'pg_numa_available', provolatile => 's', prorettype => 'bool',
|
||||
proargtypes => '', prosrc => 'pg_numa_available' },
|
||||
|
||||
# memory context of local backend
|
||||
{ oid => '2282',
|
||||
descr => 'information about all memory contexts of local backend',
|
||||
|
@ -689,6 +689,9 @@
|
||||
/* Define to 1 to build with libcurl support. (--with-libcurl) */
|
||||
#undef USE_LIBCURL
|
||||
|
||||
/* Define to 1 to build with NUMA support. (--with-libnuma) */
|
||||
#undef USE_LIBNUMA
|
||||
|
||||
/* Define to build with io_uring support. (--with-liburing) */
|
||||
#undef USE_LIBURING
|
||||
|
||||
|
40
src/include/port/pg_numa.h
Normal file
40
src/include/port/pg_numa.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_numa.h
|
||||
* Basic NUMA portability routines
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2025, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/include/port/pg_numa.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PG_NUMA_H
|
||||
#define PG_NUMA_H
|
||||
|
||||
#include "fmgr.h"
|
||||
|
||||
extern PGDLLIMPORT int pg_numa_init(void);
|
||||
extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status);
|
||||
extern PGDLLIMPORT int pg_numa_get_max_node(void);
|
||||
extern PGDLLIMPORT Size pg_numa_get_pagesize(void);
|
||||
|
||||
#ifdef USE_LIBNUMA
|
||||
|
||||
/*
|
||||
* This is required on Linux, before pg_numa_query_pages() as we
|
||||
* need to page-fault before move_pages(2) syscall returns valid results.
|
||||
*/
|
||||
#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
|
||||
ro_volatile_var = *(volatile uint64 *) ptr
|
||||
|
||||
#else
|
||||
|
||||
#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
|
||||
do {} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* PG_NUMA_H */
|
@ -45,6 +45,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */
|
||||
extern PGDLLIMPORT int shared_memory_type;
|
||||
extern PGDLLIMPORT int huge_pages;
|
||||
extern PGDLLIMPORT int huge_page_size;
|
||||
extern PGDLLIMPORT int huge_pages_status;
|
||||
|
||||
/* Possible values for huge_pages and huge_pages_status */
|
||||
typedef enum
|
||||
|
@ -200,6 +200,8 @@ pgxs_empty = [
|
||||
|
||||
'ICU_LIBS',
|
||||
|
||||
'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS',
|
||||
|
||||
'LIBURING_CFLAGS', 'LIBURING_LIBS',
|
||||
]
|
||||
|
||||
@ -232,6 +234,7 @@ pgxs_deps = {
|
||||
'icu': icu,
|
||||
'ldap': ldap,
|
||||
'libcurl': libcurl,
|
||||
'libnuma': libnuma,
|
||||
'liburing': liburing,
|
||||
'libxml': libxml,
|
||||
'libxslt': libxslt,
|
||||
|
@ -45,6 +45,7 @@ OBJS = \
|
||||
path.o \
|
||||
pg_bitutils.o \
|
||||
pg_localeconv_r.o \
|
||||
pg_numa.o \
|
||||
pg_popcount_aarch64.o \
|
||||
pg_popcount_avx512.o \
|
||||
pg_strong_random.o \
|
||||
|
@ -8,6 +8,7 @@ pgport_sources = [
|
||||
'path.c',
|
||||
'pg_bitutils.c',
|
||||
'pg_localeconv_r.c',
|
||||
'pg_numa.c',
|
||||
'pg_popcount_aarch64.c',
|
||||
'pg_popcount_avx512.c',
|
||||
'pg_strong_random.c',
|
||||
|
120
src/port/pg_numa.c
Normal file
120
src/port/pg_numa.c
Normal file
@ -0,0 +1,120 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_numa.c
|
||||
* Basic NUMA portability routines
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2025, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/port/pg_numa.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include "fmgr.h"
|
||||
#include "miscadmin.h"
|
||||
#include "port/pg_numa.h"
|
||||
#include "storage/pg_shmem.h"
|
||||
|
||||
/*
|
||||
* At this point we provide support only for Linux thanks to libnuma, but in
|
||||
* future support for other platforms e.g. Win32 or FreeBSD might be possible
|
||||
* too. For Win32 NUMA APIs see
|
||||
* https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
|
||||
*/
|
||||
#ifdef USE_LIBNUMA
|
||||
|
||||
#include <numa.h>
|
||||
#include <numaif.h>
|
||||
|
||||
Datum pg_numa_available(PG_FUNCTION_ARGS);
|
||||
|
||||
/* libnuma requires initialization as per numa(3) on Linux */
|
||||
int
|
||||
pg_numa_init(void)
|
||||
{
|
||||
int r = numa_available();
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use move_pages(2) syscall here - instead of get_mempolicy(2) - as the
|
||||
* first one allows us to batch and query about many memory pages in one single
|
||||
* giant system call that is way faster.
|
||||
*/
|
||||
int
|
||||
pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
|
||||
{
|
||||
return numa_move_pages(pid, count, pages, NULL, status, 0);
|
||||
}
|
||||
|
||||
int
|
||||
pg_numa_get_max_node(void)
|
||||
{
|
||||
return numa_max_node();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
Datum pg_numa_available(PG_FUNCTION_ARGS);
|
||||
|
||||
/* Empty wrappers */
|
||||
int
|
||||
pg_numa_init(void)
|
||||
{
|
||||
/* We state that NUMA is not available */
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pg_numa_get_max_node(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Datum
|
||||
pg_numa_available(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_BOOL(pg_numa_init() != -1);
|
||||
}
|
||||
|
||||
/* This should be used only after the server is started */
|
||||
Size
|
||||
pg_numa_get_pagesize(void)
|
||||
{
|
||||
Size os_page_size;
|
||||
#ifdef WIN32
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
GetSystemInfo(&sysinfo);
|
||||
os_page_size = sysinfo.dwPageSize;
|
||||
#else
|
||||
os_page_size = sysconf(_SC_PAGESIZE);
|
||||
#endif
|
||||
|
||||
Assert(IsUnderPostmaster);
|
||||
Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
|
||||
|
||||
if (huge_pages_status == HUGE_PAGES_ON)
|
||||
GetHugePageSize(&os_page_size, NULL);
|
||||
|
||||
return os_page_size;
|
||||
}
|
Reference in New Issue
Block a user