mirror of
https://github.com/postgres/postgres.git
synced 2025-08-25 20:23:07 +03:00
tzparse() would attempt to load the "posixrules" timezone database file on each call. That might seem like it would only be an issue when selecting a POSIX-style zone name rather than a zone defined in the timezone database, but it turns out that each zone definition file contains a POSIX-style zone string and tzload() will call tzparse() to parse that. Thus, when scanning the whole timezone file tree as we do in the pg_timezone_names view, "posixrules" was read repetitively for each zone definition file. Fix that by caching the file on first use within any given process. (We cache other zone definitions for the life of the process, so there seems little reason not to cache this one as well.) This probably won't help much in processes that never run pg_timezone_names, but even one additional SET of the timezone GUC would come out ahead. An even worse problem for pg_timezone_names is that pg_open_tzfile() has an inefficient way of identifying the canonical case of a zone name: it basically re-descends the directory tree to the zone file. That's not awful for an individual "SET timezone" operation, but it's pretty horrid when we're inspecting every zone in the database. And it's pointless too because we already know the canonical spelling, having just read it from the filesystem. Fix by teaching pg_open_tzfile() to avoid the directory search if it's not asked for the canonical name, and backfilling the proper result in pg_tzenumerate_next(). In combination these changes seem to make the pg_timezone_names view about 3x faster to read, for me. Since a scan of pg_timezone_names has up to now been one of the slowest queries in the regression tests, this should help some little bit for buildfarm cycle times. Back-patch to all supported branches, not so much because it's likely that users will care much about the view's performance as because tracking changes in the upstream IANA timezone code is really painful if we don't keep all the branches in sync. Discussion: https://postgr.es/m/27962.1493671706@sss.pgh.pa.us
511 lines
13 KiB
C
511 lines
13 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* pgtz.c
|
|
* Timezone Library Integration Functions
|
|
*
|
|
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/timezone/pgtz.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <ctype.h>
|
|
#include <fcntl.h>
|
|
#include <sys/stat.h>
|
|
#include <time.h>
|
|
|
|
#include "datatype/timestamp.h"
|
|
#include "miscadmin.h"
|
|
#include "pgtz.h"
|
|
#include "storage/fd.h"
|
|
#include "utils/hsearch.h"
|
|
|
|
|
|
/* Current session timezone (controlled by TimeZone GUC) */
|
|
pg_tz *session_timezone = NULL;
|
|
|
|
/* Current log timezone (controlled by log_timezone GUC) */
|
|
pg_tz *log_timezone = NULL;
|
|
|
|
|
|
static bool scan_directory_ci(const char *dirname,
|
|
const char *fname, int fnamelen,
|
|
char *canonname, int canonnamelen);
|
|
|
|
|
|
/*
|
|
* Return full pathname of timezone data directory
|
|
*/
|
|
static const char *
|
|
pg_TZDIR(void)
|
|
{
|
|
#ifndef SYSTEMTZDIR
|
|
/* normal case: timezone stuff is under our share dir */
|
|
static bool done_tzdir = false;
|
|
static char tzdir[MAXPGPATH];
|
|
|
|
if (done_tzdir)
|
|
return tzdir;
|
|
|
|
get_share_path(my_exec_path, tzdir);
|
|
strlcpy(tzdir + strlen(tzdir), "/timezone", MAXPGPATH - strlen(tzdir));
|
|
|
|
done_tzdir = true;
|
|
return tzdir;
|
|
#else
|
|
/* we're configured to use system's timezone database */
|
|
return SYSTEMTZDIR;
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* Given a timezone name, open() the timezone data file. Return the
|
|
* file descriptor if successful, -1 if not.
|
|
*
|
|
* The input name is searched for case-insensitively (we assume that the
|
|
* timezone database does not contain case-equivalent names).
|
|
*
|
|
* If "canonname" is not NULL, then on success the canonical spelling of the
|
|
* given name is stored there (the buffer must be > TZ_STRLEN_MAX bytes!).
|
|
*/
|
|
int
|
|
pg_open_tzfile(const char *name, char *canonname)
|
|
{
|
|
const char *fname;
|
|
char fullname[MAXPGPATH];
|
|
int fullnamelen;
|
|
int orignamelen;
|
|
|
|
/* Initialize fullname with base name of tzdata directory */
|
|
strlcpy(fullname, pg_TZDIR(), sizeof(fullname));
|
|
orignamelen = fullnamelen = strlen(fullname);
|
|
|
|
if (fullnamelen + 1 + strlen(name) >= MAXPGPATH)
|
|
return -1; /* not gonna fit */
|
|
|
|
/*
|
|
* If the caller doesn't need the canonical spelling, first just try to
|
|
* open the name as-is. This can be expected to succeed if the given name
|
|
* is already case-correct, or if the filesystem is case-insensitive; and
|
|
* we don't need to distinguish those situations if we aren't tasked with
|
|
* reporting the canonical spelling.
|
|
*/
|
|
if (canonname == NULL)
|
|
{
|
|
int result;
|
|
|
|
fullname[fullnamelen] = '/';
|
|
/* test above ensured this will fit: */
|
|
strcpy(fullname + fullnamelen + 1, name);
|
|
result = open(fullname, O_RDONLY | PG_BINARY, 0);
|
|
if (result >= 0)
|
|
return result;
|
|
/* If that didn't work, fall through to do it the hard way */
|
|
}
|
|
|
|
/*
|
|
* Loop to split the given name into directory levels; for each level,
|
|
* search using scan_directory_ci().
|
|
*/
|
|
fname = name;
|
|
for (;;)
|
|
{
|
|
const char *slashptr;
|
|
int fnamelen;
|
|
|
|
slashptr = strchr(fname, '/');
|
|
if (slashptr)
|
|
fnamelen = slashptr - fname;
|
|
else
|
|
fnamelen = strlen(fname);
|
|
if (!scan_directory_ci(fullname, fname, fnamelen,
|
|
fullname + fullnamelen + 1,
|
|
MAXPGPATH - fullnamelen - 1))
|
|
return -1;
|
|
fullname[fullnamelen++] = '/';
|
|
fullnamelen += strlen(fullname + fullnamelen);
|
|
if (slashptr)
|
|
fname = slashptr + 1;
|
|
else
|
|
break;
|
|
}
|
|
|
|
if (canonname)
|
|
strlcpy(canonname, fullname + orignamelen + 1, TZ_STRLEN_MAX + 1);
|
|
|
|
return open(fullname, O_RDONLY | PG_BINARY, 0);
|
|
}
|
|
|
|
|
|
/*
|
|
* Scan specified directory for a case-insensitive match to fname
|
|
* (of length fnamelen --- fname may not be null terminated!). If found,
|
|
* copy the actual filename into canonname and return true.
|
|
*/
|
|
static bool
|
|
scan_directory_ci(const char *dirname, const char *fname, int fnamelen,
|
|
char *canonname, int canonnamelen)
|
|
{
|
|
bool found = false;
|
|
DIR *dirdesc;
|
|
struct dirent *direntry;
|
|
|
|
dirdesc = AllocateDir(dirname);
|
|
if (!dirdesc)
|
|
{
|
|
ereport(LOG,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open directory \"%s\": %m", dirname)));
|
|
return false;
|
|
}
|
|
|
|
while ((direntry = ReadDir(dirdesc, dirname)) != NULL)
|
|
{
|
|
/*
|
|
* Ignore . and .., plus any other "hidden" files. This is a security
|
|
* measure to prevent access to files outside the timezone directory.
|
|
*/
|
|
if (direntry->d_name[0] == '.')
|
|
continue;
|
|
|
|
if (strlen(direntry->d_name) == fnamelen &&
|
|
pg_strncasecmp(direntry->d_name, fname, fnamelen) == 0)
|
|
{
|
|
/* Found our match */
|
|
strlcpy(canonname, direntry->d_name, canonnamelen);
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
FreeDir(dirdesc);
|
|
|
|
return found;
|
|
}
|
|
|
|
|
|
/*
|
|
* We keep loaded timezones in a hashtable so we don't have to
|
|
* load and parse the TZ definition file every time one is selected.
|
|
* Because we want timezone names to be found case-insensitively,
|
|
* the hash key is the uppercased name of the zone.
|
|
*/
|
|
typedef struct
|
|
{
|
|
/* tznameupper contains the all-upper-case name of the timezone */
|
|
char tznameupper[TZ_STRLEN_MAX + 1];
|
|
pg_tz tz;
|
|
} pg_tz_cache;
|
|
|
|
static HTAB *timezone_cache = NULL;
|
|
|
|
|
|
static bool
|
|
init_timezone_hashtable(void)
|
|
{
|
|
HASHCTL hash_ctl;
|
|
|
|
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
|
|
|
|
hash_ctl.keysize = TZ_STRLEN_MAX + 1;
|
|
hash_ctl.entrysize = sizeof(pg_tz_cache);
|
|
|
|
timezone_cache = hash_create("Timezones",
|
|
4,
|
|
&hash_ctl,
|
|
HASH_ELEM);
|
|
if (!timezone_cache)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Load a timezone from file or from cache.
|
|
* Does not verify that the timezone is acceptable!
|
|
*
|
|
* "GMT" is always interpreted as the tzparse() definition, without attempting
|
|
* to load a definition from the filesystem. This has a number of benefits:
|
|
* 1. It's guaranteed to succeed, so we don't have the failure mode wherein
|
|
* the bootstrap default timezone setting doesn't work (as could happen if
|
|
* the OS attempts to supply a leap-second-aware version of "GMT").
|
|
* 2. Because we aren't accessing the filesystem, we can safely initialize
|
|
* the "GMT" zone definition before my_exec_path is known.
|
|
* 3. It's quick enough that we don't waste much time when the bootstrap
|
|
* default timezone setting is later overridden from postgresql.conf.
|
|
*/
|
|
pg_tz *
|
|
pg_tzset(const char *name)
|
|
{
|
|
pg_tz_cache *tzp;
|
|
struct state tzstate;
|
|
char uppername[TZ_STRLEN_MAX + 1];
|
|
char canonname[TZ_STRLEN_MAX + 1];
|
|
char *p;
|
|
|
|
if (strlen(name) > TZ_STRLEN_MAX)
|
|
return NULL; /* not going to fit */
|
|
|
|
if (!timezone_cache)
|
|
if (!init_timezone_hashtable())
|
|
return NULL;
|
|
|
|
/*
|
|
* Upcase the given name to perform a case-insensitive hashtable search.
|
|
* (We could alternatively downcase it, but we prefer upcase so that we
|
|
* can get consistently upcased results from tzparse() in case the name is
|
|
* a POSIX-style timezone spec.)
|
|
*/
|
|
p = uppername;
|
|
while (*name)
|
|
*p++ = pg_toupper((unsigned char) *name++);
|
|
*p = '\0';
|
|
|
|
tzp = (pg_tz_cache *) hash_search(timezone_cache,
|
|
uppername,
|
|
HASH_FIND,
|
|
NULL);
|
|
if (tzp)
|
|
{
|
|
/* Timezone found in cache, nothing more to do */
|
|
return &tzp->tz;
|
|
}
|
|
|
|
/*
|
|
* "GMT" is always sent to tzparse(), as per discussion above.
|
|
*/
|
|
if (strcmp(uppername, "GMT") == 0)
|
|
{
|
|
if (!tzparse(uppername, &tzstate, true))
|
|
{
|
|
/* This really, really should not happen ... */
|
|
elog(ERROR, "could not initialize GMT time zone");
|
|
}
|
|
/* Use uppercase name as canonical */
|
|
strcpy(canonname, uppername);
|
|
}
|
|
else if (tzload(uppername, canonname, &tzstate, true) != 0)
|
|
{
|
|
if (uppername[0] == ':' || !tzparse(uppername, &tzstate, false))
|
|
{
|
|
/* Unknown timezone. Fail our call instead of loading GMT! */
|
|
return NULL;
|
|
}
|
|
/* For POSIX timezone specs, use uppercase name as canonical */
|
|
strcpy(canonname, uppername);
|
|
}
|
|
|
|
/* Save timezone in the cache */
|
|
tzp = (pg_tz_cache *) hash_search(timezone_cache,
|
|
uppername,
|
|
HASH_ENTER,
|
|
NULL);
|
|
|
|
/* hash_search already copied uppername into the hash key */
|
|
strcpy(tzp->tz.TZname, canonname);
|
|
memcpy(&tzp->tz.state, &tzstate, sizeof(tzstate));
|
|
|
|
return &tzp->tz;
|
|
}
|
|
|
|
/*
|
|
* Load a fixed-GMT-offset timezone.
|
|
* This is used for SQL-spec SET TIME ZONE INTERVAL 'foo' cases.
|
|
* It's otherwise equivalent to pg_tzset().
|
|
*
|
|
* The GMT offset is specified in seconds, positive values meaning west of
|
|
* Greenwich (ie, POSIX not ISO sign convention). However, we use ISO
|
|
* sign convention in the displayable abbreviation for the zone.
|
|
*
|
|
* Caution: this can fail (return NULL) if the specified offset is outside
|
|
* the range allowed by the zic library.
|
|
*/
|
|
pg_tz *
|
|
pg_tzset_offset(long gmtoffset)
|
|
{
|
|
long absoffset = (gmtoffset < 0) ? -gmtoffset : gmtoffset;
|
|
char offsetstr[64];
|
|
char tzname[128];
|
|
|
|
snprintf(offsetstr, sizeof(offsetstr),
|
|
"%02ld", absoffset / SECS_PER_HOUR);
|
|
absoffset %= SECS_PER_HOUR;
|
|
if (absoffset != 0)
|
|
{
|
|
snprintf(offsetstr + strlen(offsetstr),
|
|
sizeof(offsetstr) - strlen(offsetstr),
|
|
":%02ld", absoffset / SECS_PER_MINUTE);
|
|
absoffset %= SECS_PER_MINUTE;
|
|
if (absoffset != 0)
|
|
snprintf(offsetstr + strlen(offsetstr),
|
|
sizeof(offsetstr) - strlen(offsetstr),
|
|
":%02ld", absoffset);
|
|
}
|
|
if (gmtoffset > 0)
|
|
snprintf(tzname, sizeof(tzname), "<-%s>+%s",
|
|
offsetstr, offsetstr);
|
|
else
|
|
snprintf(tzname, sizeof(tzname), "<+%s>-%s",
|
|
offsetstr, offsetstr);
|
|
|
|
return pg_tzset(tzname);
|
|
}
|
|
|
|
|
|
/*
|
|
* Initialize timezone library
|
|
*
|
|
* This is called before GUC variable initialization begins. Its purpose
|
|
* is to ensure that log_timezone has a valid value before any logging GUC
|
|
* variables could become set to values that require elog.c to provide
|
|
* timestamps (e.g., log_line_prefix). We may as well initialize
|
|
* session_timestamp to something valid, too.
|
|
*/
|
|
void
|
|
pg_timezone_initialize(void)
|
|
{
|
|
/*
|
|
* We may not yet know where PGSHAREDIR is (in particular this is true in
|
|
* an EXEC_BACKEND subprocess). So use "GMT", which pg_tzset forces to be
|
|
* interpreted without reference to the filesystem. This corresponds to
|
|
* the bootstrap default for these variables in guc.c, although in
|
|
* principle it could be different.
|
|
*/
|
|
session_timezone = pg_tzset("GMT");
|
|
log_timezone = session_timezone;
|
|
}
|
|
|
|
|
|
/*
|
|
* Functions to enumerate available timezones
|
|
*
|
|
* Note that pg_tzenumerate_next() will return a pointer into the pg_tzenum
|
|
* structure, so the data is only valid up to the next call.
|
|
*
|
|
* All data is allocated using palloc in the current context.
|
|
*/
|
|
#define MAX_TZDIR_DEPTH 10
|
|
|
|
struct pg_tzenum
|
|
{
|
|
int baselen;
|
|
int depth;
|
|
DIR *dirdesc[MAX_TZDIR_DEPTH];
|
|
char *dirname[MAX_TZDIR_DEPTH];
|
|
struct pg_tz tz;
|
|
};
|
|
|
|
/* typedef pg_tzenum is declared in pgtime.h */
|
|
|
|
pg_tzenum *
|
|
pg_tzenumerate_start(void)
|
|
{
|
|
pg_tzenum *ret = (pg_tzenum *) palloc0(sizeof(pg_tzenum));
|
|
char *startdir = pstrdup(pg_TZDIR());
|
|
|
|
ret->baselen = strlen(startdir) + 1;
|
|
ret->depth = 0;
|
|
ret->dirname[0] = startdir;
|
|
ret->dirdesc[0] = AllocateDir(startdir);
|
|
if (!ret->dirdesc[0])
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open directory \"%s\": %m", startdir)));
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
pg_tzenumerate_end(pg_tzenum *dir)
|
|
{
|
|
while (dir->depth >= 0)
|
|
{
|
|
FreeDir(dir->dirdesc[dir->depth]);
|
|
pfree(dir->dirname[dir->depth]);
|
|
dir->depth--;
|
|
}
|
|
pfree(dir);
|
|
}
|
|
|
|
pg_tz *
|
|
pg_tzenumerate_next(pg_tzenum *dir)
|
|
{
|
|
while (dir->depth >= 0)
|
|
{
|
|
struct dirent *direntry;
|
|
char fullname[MAXPGPATH];
|
|
struct stat statbuf;
|
|
|
|
direntry = ReadDir(dir->dirdesc[dir->depth], dir->dirname[dir->depth]);
|
|
|
|
if (!direntry)
|
|
{
|
|
/* End of this directory */
|
|
FreeDir(dir->dirdesc[dir->depth]);
|
|
pfree(dir->dirname[dir->depth]);
|
|
dir->depth--;
|
|
continue;
|
|
}
|
|
|
|
if (direntry->d_name[0] == '.')
|
|
continue;
|
|
|
|
snprintf(fullname, MAXPGPATH, "%s/%s",
|
|
dir->dirname[dir->depth], direntry->d_name);
|
|
if (stat(fullname, &statbuf) != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not stat \"%s\": %m", fullname)));
|
|
|
|
if (S_ISDIR(statbuf.st_mode))
|
|
{
|
|
/* Step into the subdirectory */
|
|
if (dir->depth >= MAX_TZDIR_DEPTH - 1)
|
|
ereport(ERROR,
|
|
(errmsg_internal("timezone directory stack overflow")));
|
|
dir->depth++;
|
|
dir->dirname[dir->depth] = pstrdup(fullname);
|
|
dir->dirdesc[dir->depth] = AllocateDir(fullname);
|
|
if (!dir->dirdesc[dir->depth])
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open directory \"%s\": %m",
|
|
fullname)));
|
|
|
|
/* Start over reading in the new directory */
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Load this timezone using tzload() not pg_tzset(), so we don't fill
|
|
* the cache. Also, don't ask for the canonical spelling: we already
|
|
* know it, and pg_open_tzfile's way of finding it out is pretty
|
|
* inefficient.
|
|
*/
|
|
if (tzload(fullname + dir->baselen, NULL, &dir->tz.state, true) != 0)
|
|
{
|
|
/* Zone could not be loaded, ignore it */
|
|
continue;
|
|
}
|
|
|
|
if (!pg_tz_acceptable(&dir->tz))
|
|
{
|
|
/* Ignore leap-second zones */
|
|
continue;
|
|
}
|
|
|
|
/* OK, return the canonical zone name spelling. */
|
|
strlcpy(dir->tz.TZname, fullname + dir->baselen,
|
|
sizeof(dir->tz.TZname));
|
|
|
|
/* Timezone loaded OK. */
|
|
return &dir->tz;
|
|
}
|
|
|
|
/* Nothing more found */
|
|
return NULL;
|
|
}
|