1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Add support for basic NUMA awareness

Add basic NUMA awareness routines, using a minimal src/port/pg_numa.c
portability wrapper and an optional build dependency, enabled by
--with-libnuma configure option. For now this is Linux-only, other
platforms may be supported later.

A built-in SQL function pg_numa_available() allows checking NUMA
support, i.e. that the server was built/linked with the NUMA library.

The main function introduced is pg_numa_query_pages(), which allows
determining the NUMA node for individual memory pages. Internally the
function uses move_pages(2) syscall, as it allows batching, and is more
efficient than get_mempolicy(2).

Author: Jakub Wartak <jakub.wartak@enterprisedb.com>
Co-authored-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com
This commit is contained in:
Tomas Vondra
2025-04-07 22:51:49 +02:00
parent 17bcf4f545
commit 65c298f61f
18 changed files with 444 additions and 3 deletions

View File

@ -196,6 +196,7 @@ with_gssapi = @with_gssapi@
with_krb_srvnam = @with_krb_srvnam@
with_ldap = @with_ldap@
with_libcurl = @with_libcurl@
with_libnuma = @with_libnuma@
with_liburing = @with_liburing@
with_libxml = @with_libxml@
with_libxslt = @with_libxslt@
@ -223,6 +224,9 @@ krb_srvtab = @krb_srvtab@
ICU_CFLAGS = @ICU_CFLAGS@
ICU_LIBS = @ICU_LIBS@
LIBNUMA_CFLAGS = @LIBNUMA_CFLAGS@
LIBNUMA_LIBS = @LIBNUMA_LIBS@
LIBURING_CFLAGS = @LIBURING_CFLAGS@
LIBURING_LIBS = @LIBURING_LIBS@
@ -250,7 +254,7 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
PG_SYSROOT = @PG_SYSROOT@
override CPPFLAGS := $(ICU_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS)
override CPPFLAGS := $(ICU_CFLAGS) $(LIBNUMA_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS)
ifdef PGXS
override CPPFLAGS := -I$(includedir_server) -I$(includedir_internal) $(CPPFLAGS)

View File

@ -566,7 +566,7 @@ static int ssl_renegotiation_limit;
*/
int huge_pages = HUGE_PAGES_TRY;
int huge_page_size;
static int huge_pages_status = HUGE_PAGES_UNKNOWN;
int huge_pages_status = HUGE_PAGES_UNKNOWN;
/*
* These variables are all dummies that don't do anything, except in some

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202504071
#define CATALOG_VERSION_NO 202504072
#endif

View File

@ -8542,6 +8542,10 @@
proargnames => '{name,off,size,allocated_size}',
prosrc => 'pg_get_shmem_allocations' },
{ oid => '4099', descr => 'Is NUMA support available?',
proname => 'pg_numa_available', provolatile => 's', prorettype => 'bool',
proargtypes => '', prosrc => 'pg_numa_available' },
# memory context of local backend
{ oid => '2282',
descr => 'information about all memory contexts of local backend',

View File

@ -689,6 +689,9 @@
/* Define to 1 to build with libcurl support. (--with-libcurl) */
#undef USE_LIBCURL
/* Define to 1 to build with NUMA support. (--with-libnuma) */
#undef USE_LIBNUMA
/* Define to build with io_uring support. (--with-liburing) */
#undef USE_LIBURING

View File

@ -0,0 +1,40 @@
/*-------------------------------------------------------------------------
*
* pg_numa.h
* Basic NUMA portability routines
*
*
* Copyright (c) 2025, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/include/port/pg_numa.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_NUMA_H
#define PG_NUMA_H
#include "fmgr.h"
extern PGDLLIMPORT int pg_numa_init(void);
extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status);
extern PGDLLIMPORT int pg_numa_get_max_node(void);
extern PGDLLIMPORT Size pg_numa_get_pagesize(void);
#ifdef USE_LIBNUMA
/*
* This is required on Linux, before pg_numa_query_pages() as we
* need to page-fault before move_pages(2) syscall returns valid results.
*/
#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
ro_volatile_var = *(volatile uint64 *) ptr
#else
#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
do {} while(0)
#endif
#endif /* PG_NUMA_H */

View File

@ -45,6 +45,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */
extern PGDLLIMPORT int shared_memory_type;
extern PGDLLIMPORT int huge_pages;
extern PGDLLIMPORT int huge_page_size;
extern PGDLLIMPORT int huge_pages_status;
/* Possible values for huge_pages and huge_pages_status */
typedef enum

View File

@ -200,6 +200,8 @@ pgxs_empty = [
'ICU_LIBS',
'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS',
'LIBURING_CFLAGS', 'LIBURING_LIBS',
]
@ -232,6 +234,7 @@ pgxs_deps = {
'icu': icu,
'ldap': ldap,
'libcurl': libcurl,
'libnuma': libnuma,
'liburing': liburing,
'libxml': libxml,
'libxslt': libxslt,

View File

@ -45,6 +45,7 @@ OBJS = \
path.o \
pg_bitutils.o \
pg_localeconv_r.o \
pg_numa.o \
pg_popcount_aarch64.o \
pg_popcount_avx512.o \
pg_strong_random.o \

View File

@ -8,6 +8,7 @@ pgport_sources = [
'path.c',
'pg_bitutils.c',
'pg_localeconv_r.c',
'pg_numa.c',
'pg_popcount_aarch64.c',
'pg_popcount_avx512.c',
'pg_strong_random.c',

120
src/port/pg_numa.c Normal file
View File

@ -0,0 +1,120 @@
/*-------------------------------------------------------------------------
*
* pg_numa.c
* Basic NUMA portability routines
*
*
* Copyright (c) 2025, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/port/pg_numa.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#ifdef WIN32
#include <windows.h>
#endif
#include "fmgr.h"
#include "miscadmin.h"
#include "port/pg_numa.h"
#include "storage/pg_shmem.h"
/*
* At this point we provide support only for Linux thanks to libnuma, but in
* future support for other platforms e.g. Win32 or FreeBSD might be possible
* too. For Win32 NUMA APIs see
* https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
*/
#ifdef USE_LIBNUMA
#include <numa.h>
#include <numaif.h>
Datum pg_numa_available(PG_FUNCTION_ARGS);
/* libnuma requires initialization as per numa(3) on Linux */
int
pg_numa_init(void)
{
int r = numa_available();
return r;
}
/*
* We use move_pages(2) syscall here - instead of get_mempolicy(2) - as the
* first one allows us to batch and query about many memory pages in one single
* giant system call that is way faster.
*/
int
pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
{
return numa_move_pages(pid, count, pages, NULL, status, 0);
}
int
pg_numa_get_max_node(void)
{
return numa_max_node();
}
#else
Datum pg_numa_available(PG_FUNCTION_ARGS);
/* Empty wrappers */
int
pg_numa_init(void)
{
/* We state that NUMA is not available */
return -1;
}
int
pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
{
return 0;
}
int
pg_numa_get_max_node(void)
{
return 0;
}
#endif
Datum
pg_numa_available(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL(pg_numa_init() != -1);
}
/* This should be used only after the server is started */
Size
pg_numa_get_pagesize(void)
{
Size os_page_size;
#ifdef WIN32
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
os_page_size = sysinfo.dwPageSize;
#else
os_page_size = sysconf(_SC_PAGESIZE);
#endif
Assert(IsUnderPostmaster);
Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
if (huge_pages_status == HUGE_PAGES_ON)
GetHugePageSize(&os_page_size, NULL);
return os_page_size;
}