1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-22 12:22:45 +03:00
Files
postgres/src/backend/utils/adt/genfile.c
Tom Lane 2333803d84 Use "data directory" not "current directory" in error messages.
The user receiving the message might not understand where the
server's "current directory" is.  "Data directory" seems clearer.
(This would not be good for frontend code, but both of these
messages are only issued in the backend.)

Kyotaro Horiguchi

Discussion: https://postgr.es/m/20230316.111646.1564684434328830712.horikyota.ntt@gmail.com
2023-03-16 12:04:08 -04:00

780 lines
19 KiB
C

/*-------------------------------------------------------------------------
*
* genfile.c
* Functions for direct access to files
*
*
* Copyright (c) 2004-2023, PostgreSQL Global Development Group
*
* Author: Andreas Pflug <pgadmin@pse-consulting.de>
*
* IDENTIFICATION
* src/backend/utils/adt/genfile.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/file.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
#include "access/htup_details.h"
#include "access/xlog_internal.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_tablespace_d.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "postmaster/syslogger.h"
#include "replication/slot.h"
#include "storage/fd.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
/*
* Convert a "text" filename argument to C string, and check it's allowable.
*
* Filename may be absolute or relative to the DataDir, but we only allow
* absolute paths that match DataDir or Log_directory.
*
* This does a privilege check against the 'pg_read_server_files' role, so
* this function is really only appropriate for callers who are only checking
* 'read' access. Do not use this function if you are looking for a check
* for 'write' or 'program' access without updating it to access the type
* of check as an argument and checking the appropriate role membership.
*/
static char *
convert_and_check_filename(text *arg)
{
char *filename;
filename = text_to_cstring(arg);
canonicalize_path(filename); /* filename can change length here */
/*
* Roles with privileges of the 'pg_read_server_files' role are allowed to
* access any files on the server as the PG user, so no need to do any
* further checks here.
*/
if (has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
return filename;
/*
* User isn't a member of the pg_read_server_files role, so check if it's
* allowable
*/
if (is_absolute_path(filename))
{
/*
* Allow absolute paths if within DataDir or Log_directory, even
* though Log_directory might be outside DataDir.
*/
if (!path_is_prefix_of_path(DataDir, filename) &&
(!is_absolute_path(Log_directory) ||
!path_is_prefix_of_path(Log_directory, filename)))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("absolute path not allowed")));
}
else if (!path_is_relative_and_below_cwd(filename))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("path must be in or below the data directory")));
return filename;
}
/*
* Read a section of a file, returning it as bytea
*
* Caller is responsible for all permissions checking.
*
* We read the whole of the file when bytes_to_read is negative.
*/
static bytea *
read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
bool missing_ok)
{
bytea *buf;
size_t nbytes = 0;
FILE *file;
/* clamp request size to what we can actually deliver */
if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length too large")));
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
{
if (missing_ok && errno == ENOENT)
return NULL;
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" for reading: %m",
filename)));
}
if (fseeko(file, (off_t) seek_offset,
(seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not seek in file \"%s\": %m", filename)));
if (bytes_to_read >= 0)
{
/* If passed explicit read size just do it */
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
}
else
{
/* Negative read size, read rest of file */
StringInfoData sbuf;
initStringInfo(&sbuf);
/* Leave room in the buffer for the varlena length word */
sbuf.len += VARHDRSZ;
Assert(sbuf.len < sbuf.maxlen);
while (!(feof(file) || ferror(file)))
{
size_t rbytes;
/* Minimum amount to read at a time */
#define MIN_READ_SIZE 4096
/*
* If not at end of file, and sbuf.len is equal to MaxAllocSize -
* 1, then either the file is too large, or there is nothing left
* to read. Attempt to read one more byte to see if the end of
* file has been reached. If not, the file is too large; we'd
* rather give the error message for that ourselves.
*/
if (sbuf.len == MaxAllocSize - 1)
{
char rbuf[1];
if (fread(rbuf, 1, 1, file) != 0 || !feof(file))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("file length too large")));
else
break;
}
/* OK, ensure that we can read at least MIN_READ_SIZE */
enlargeStringInfo(&sbuf, MIN_READ_SIZE);
/*
* stringinfo.c likes to allocate in powers of 2, so it's likely
* that much more space is available than we asked for. Use all
* of it, rather than making more fread calls than necessary.
*/
rbytes = fread(sbuf.data + sbuf.len, 1,
(size_t) (sbuf.maxlen - sbuf.len - 1), file);
sbuf.len += rbytes;
nbytes += rbytes;
}
/* Now we can commandeer the stringinfo's buffer as the result */
buf = (bytea *) sbuf.data;
}
if (ferror(file))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", filename)));
SET_VARSIZE(buf, nbytes + VARHDRSZ);
FreeFile(file);
return buf;
}
/*
* Similar to read_binary_file, but we verify that the contents are valid
* in the database encoding.
*/
static text *
read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
bool missing_ok)
{
bytea *buf;
buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
if (buf != NULL)
{
/* Make sure the input is valid */
pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
/* OK, we can cast it to text safely */
return (text *) buf;
}
else
return NULL;
}
/*
* Read a section of a file, returning it as text
*
* This function is kept to support adminpack 1.0.
*/
Datum
pg_read_file(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
int64 seek_offset = 0;
int64 bytes_to_read = -1;
bool missing_ok = false;
char *filename;
text *result;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to read files with adminpack 1.0"),
/* translator: %s is a SQL function name */
errhint("Consider using %s, which is part of core, instead.",
"pg_read_file()")));
/* handle optional arguments */
if (PG_NARGS() >= 3)
{
seek_offset = PG_GETARG_INT64(1);
bytes_to_read = PG_GETARG_INT64(2);
if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
}
if (PG_NARGS() >= 4)
missing_ok = PG_GETARG_BOOL(3);
filename = convert_and_check_filename(filename_t);
result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
if (result)
PG_RETURN_TEXT_P(result);
else
PG_RETURN_NULL();
}
/*
* Read a section of a file, returning it as text
*
* No superuser check done here- instead privileges are handled by the
* GRANT system.
*
* If read_to_eof is true, bytes_to_read must be -1, otherwise negative values
* are not allowed for bytes_to_read.
*/
static text *
pg_read_file_common(text *filename_t, int64 seek_offset, int64 bytes_to_read,
bool read_to_eof, bool missing_ok)
{
if (read_to_eof)
Assert(bytes_to_read == -1);
else if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
return read_text_file(convert_and_check_filename(filename_t),
seek_offset, bytes_to_read, missing_ok);
}
/*
* Read a section of a file, returning it as bytea
*
* Parameters are interpreted the same as pg_read_file_common().
*/
static bytea *
pg_read_binary_file_common(text *filename_t,
int64 seek_offset, int64 bytes_to_read,
bool read_to_eof, bool missing_ok)
{
if (read_to_eof)
Assert(bytes_to_read == -1);
else if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
return read_binary_file(convert_and_check_filename(filename_t),
seek_offset, bytes_to_read, missing_ok);
}
/*
* Wrapper functions for the variants of SQL functions pg_read_file() and
* pg_read_binary_file().
*
* These are necessary to pass the sanity check in opr_sanity, which checks
* that all built-in functions that share the implementing C function take
* the same number of arguments.
*/
Datum
pg_read_file_off_len(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
int64 seek_offset = PG_GETARG_INT64(1);
int64 bytes_to_read = PG_GETARG_INT64(2);
text *ret;
ret = pg_read_file_common(filename_t, seek_offset, bytes_to_read,
false, false);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(ret);
}
Datum
pg_read_file_off_len_missing(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
int64 seek_offset = PG_GETARG_INT64(1);
int64 bytes_to_read = PG_GETARG_INT64(2);
bool missing_ok = PG_GETARG_BOOL(3);
text *ret;
ret = pg_read_file_common(filename_t, seek_offset, bytes_to_read,
false, missing_ok);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(ret);
}
Datum
pg_read_file_all(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
text *ret;
ret = pg_read_file_common(filename_t, 0, -1, true, false);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(ret);
}
Datum
pg_read_file_all_missing(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
bool missing_ok = PG_GETARG_BOOL(1);
text *ret;
ret = pg_read_file_common(filename_t, 0, -1, true, missing_ok);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(ret);
}
Datum
pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
int64 seek_offset = PG_GETARG_INT64(1);
int64 bytes_to_read = PG_GETARG_INT64(2);
text *ret;
ret = pg_read_binary_file_common(filename_t, seek_offset, bytes_to_read,
false, false);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_BYTEA_P(ret);
}
Datum
pg_read_binary_file_off_len_missing(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
int64 seek_offset = PG_GETARG_INT64(1);
int64 bytes_to_read = PG_GETARG_INT64(2);
bool missing_ok = PG_GETARG_BOOL(3);
text *ret;
ret = pg_read_binary_file_common(filename_t, seek_offset, bytes_to_read,
false, missing_ok);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_BYTEA_P(ret);
}
Datum
pg_read_binary_file_all(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
text *ret;
ret = pg_read_binary_file_common(filename_t, 0, -1, true, false);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_BYTEA_P(ret);
}
Datum
pg_read_binary_file_all_missing(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
bool missing_ok = PG_GETARG_BOOL(1);
text *ret;
ret = pg_read_binary_file_common(filename_t, 0, -1, true, missing_ok);
if (!ret)
PG_RETURN_NULL();
PG_RETURN_BYTEA_P(ret);
}
/*
* stat a file
*/
Datum
pg_stat_file(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
char *filename;
struct stat fst;
Datum values[6];
bool isnull[6];
HeapTuple tuple;
TupleDesc tupdesc;
bool missing_ok = false;
/* check the optional argument */
if (PG_NARGS() == 2)
missing_ok = PG_GETARG_BOOL(1);
filename = convert_and_check_filename(filename_t);
if (stat(filename, &fst) < 0)
{
if (missing_ok && errno == ENOENT)
PG_RETURN_NULL();
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", filename)));
}
/*
* This record type had better match the output parameters declared for me
* in pg_proc.h.
*/
tupdesc = CreateTemplateTupleDesc(6);
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
"size", INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2,
"access", TIMESTAMPTZOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3,
"modification", TIMESTAMPTZOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 4,
"change", TIMESTAMPTZOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 5,
"creation", TIMESTAMPTZOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 6,
"isdir", BOOLOID, -1, 0);
BlessTupleDesc(tupdesc);
memset(isnull, false, sizeof(isnull));
values[0] = Int64GetDatum((int64) fst.st_size);
values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
/* Unix has file status change time, while Win32 has creation time */
#if !defined(WIN32) && !defined(__CYGWIN__)
values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
isnull[4] = true;
#else
isnull[3] = true;
values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
#endif
values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));
tuple = heap_form_tuple(tupdesc, values, isnull);
pfree(filename);
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
}
/*
* stat a file (1 argument version)
*
* note: this wrapper is necessary to pass the sanity check in opr_sanity,
* which checks that all built-in functions that share the implementing C
* function take the same number of arguments
*/
Datum
pg_stat_file_1arg(PG_FUNCTION_ARGS)
{
return pg_stat_file(fcinfo);
}
/*
* List a directory (returns the filenames only)
*/
Datum
pg_ls_dir(PG_FUNCTION_ARGS)
{
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
char *location;
bool missing_ok = false;
bool include_dot_dirs = false;
DIR *dirdesc;
struct dirent *de;
location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
/* check the optional arguments */
if (PG_NARGS() == 3)
{
if (!PG_ARGISNULL(1))
missing_ok = PG_GETARG_BOOL(1);
if (!PG_ARGISNULL(2))
include_dot_dirs = PG_GETARG_BOOL(2);
}
InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
dirdesc = AllocateDir(location);
if (!dirdesc)
{
/* Return empty tuplestore if appropriate */
if (missing_ok && errno == ENOENT)
return (Datum) 0;
/* Otherwise, we can let ReadDir() throw the error */
}
while ((de = ReadDir(dirdesc, location)) != NULL)
{
Datum values[1];
bool nulls[1];
if (!include_dot_dirs &&
(strcmp(de->d_name, ".") == 0 ||
strcmp(de->d_name, "..") == 0))
continue;
values[0] = CStringGetTextDatum(de->d_name);
nulls[0] = false;
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
values, nulls);
}
FreeDir(dirdesc);
return (Datum) 0;
}
/*
* List a directory (1 argument version)
*
* note: this wrapper is necessary to pass the sanity check in opr_sanity,
* which checks that all built-in functions that share the implementing C
* function take the same number of arguments.
*/
Datum
pg_ls_dir_1arg(PG_FUNCTION_ARGS)
{
return pg_ls_dir(fcinfo);
}
/*
* Generic function to return a directory listing of files.
*
* If the directory isn't there, silently return an empty set if missing_ok.
* Other unreadable-directory cases throw an error.
*/
static Datum
pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
{
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
DIR *dirdesc;
struct dirent *de;
InitMaterializedSRF(fcinfo, 0);
/*
* Now walk the directory. Note that we must do this within a single SRF
* call, not leave the directory open across multiple calls, since we
* can't count on the SRF being run to completion.
*/
dirdesc = AllocateDir(dir);
if (!dirdesc)
{
/* Return empty tuplestore if appropriate */
if (missing_ok && errno == ENOENT)
return (Datum) 0;
/* Otherwise, we can let ReadDir() throw the error */
}
while ((de = ReadDir(dirdesc, dir)) != NULL)
{
Datum values[3];
bool nulls[3];
char path[MAXPGPATH * 2];
struct stat attrib;
/* Skip hidden files */
if (de->d_name[0] == '.')
continue;
/* Get the file info */
snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
if (stat(path, &attrib) < 0)
{
/* Ignore concurrently-deleted files, else complain */
if (errno == ENOENT)
continue;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", path)));
}
/* Ignore anything but regular files */
if (!S_ISREG(attrib.st_mode))
continue;
values[0] = CStringGetTextDatum(de->d_name);
values[1] = Int64GetDatum((int64) attrib.st_size);
values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
memset(nulls, 0, sizeof(nulls));
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
FreeDir(dirdesc);
return (Datum) 0;
}
/* Function to return the list of files in the log directory */
Datum
pg_ls_logdir(PG_FUNCTION_ARGS)
{
return pg_ls_dir_files(fcinfo, Log_directory, false);
}
/* Function to return the list of files in the WAL directory */
Datum
pg_ls_waldir(PG_FUNCTION_ARGS)
{
return pg_ls_dir_files(fcinfo, XLOGDIR, false);
}
/*
* Generic function to return the list of files in pgsql_tmp
*/
static Datum
pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc)
{
char path[MAXPGPATH];
if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tblspc)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("tablespace with OID %u does not exist",
tblspc)));
TempTablespacePath(path, tblspc);
return pg_ls_dir_files(fcinfo, path, true);
}
/*
* Function to return the list of temporary files in the pg_default tablespace's
* pgsql_tmp directory
*/
Datum
pg_ls_tmpdir_noargs(PG_FUNCTION_ARGS)
{
return pg_ls_tmpdir(fcinfo, DEFAULTTABLESPACE_OID);
}
/*
* Function to return the list of temporary files in the specified tablespace's
* pgsql_tmp directory
*/
Datum
pg_ls_tmpdir_1arg(PG_FUNCTION_ARGS)
{
return pg_ls_tmpdir(fcinfo, PG_GETARG_OID(0));
}
/*
* Function to return the list of files in the WAL archive status directory.
*/
Datum
pg_ls_archive_statusdir(PG_FUNCTION_ARGS)
{
return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true);
}
/*
* Function to return the list of files in the pg_logical/snapshots directory.
*/
Datum
pg_ls_logicalsnapdir(PG_FUNCTION_ARGS)
{
return pg_ls_dir_files(fcinfo, "pg_logical/snapshots", false);
}
/*
* Function to return the list of files in the pg_logical/mappings directory.
*/
Datum
pg_ls_logicalmapdir(PG_FUNCTION_ARGS)
{
return pg_ls_dir_files(fcinfo, "pg_logical/mappings", false);
}
/*
* Function to return the list of files in the pg_replslot/<replication_slot>
* directory.
*/
Datum
pg_ls_replslotdir(PG_FUNCTION_ARGS)
{
text *slotname_t;
char path[MAXPGPATH];
char *slotname;
slotname_t = PG_GETARG_TEXT_PP(0);
slotname = text_to_cstring(slotname_t);
if (!SearchNamedReplicationSlot(slotname, true))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("replication slot \"%s\" does not exist",
slotname)));
snprintf(path, sizeof(path), "pg_replslot/%s", slotname);
return pg_ls_dir_files(fcinfo, path, false);
}