Seek zone abbreviations in the IANA data before timezone_abbreviations.

If a time zone abbreviation used in datetime input is defined in the currently active timezone, use that definition in preference to looking in the timezone_abbreviations list. That allows us to correctly handle abbreviations that have different meanings in different timezones. Also, it eliminates an inconsistency between datetime input and datetime output: the non-ISO datestyles for timestamptz have always printed abbreviations taken from the IANA data, not from timezone_abbreviations. Before this fix, it was possible to demonstrate cases where casting a timestamp to text and back fails or changes the value significantly because of that inconsistency. While this change removes the ability to override the IANA data about an abbreviation known in the current zone, it's not clear that there's any real use-case for doing so. But it is clear that this makes life a lot easier for dealing with abbreviations that have conflicts across different time zones. Also update the pg_timezone_abbrevs view to report abbreviations that are recognized via the IANA data, and *not* report any timezone_abbreviations entries that are thereby overridden. Under the hood, there are now two SRFs, one that pulls the IANA data and one that pulls timezone_abbreviations entries. They're combined by logic in the view. This approach was useful for debugging (since the functions can be called on their own). While I don't intend to document the functions explicitly, they might be useful to call directly. Also improve DecodeTimezoneAbbrev's caching logic so that it can cache zone abbreviations found in the IANA data. Without that, this patch would have caused a noticeable degradation of the runtime of timestamptz_in. Per report from Aleksander Alekseev and additional investigation. Discussion: https://postgr.es/m/CAJ7c6TOATjJqvhnYsui0=CO5XFMF4dvTGH+skzB--jNhqSQu5g@mail.gmail.com
2025-11-22 12:22:45 +03:00 · 2025-01-16 14:11:19 -05:00
parent bc10219b9c
commit d7674c9fab
20 changed files with 521 additions and 35 deletions
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -634,7 +634,12 @@ REVOKE ALL ON pg_ident_file_mappings FROM PUBLIC;
 REVOKE EXECUTE ON FUNCTION pg_ident_file_mappings() FROM PUBLIC;

 CREATE VIEW pg_timezone_abbrevs AS
-    SELECT * FROM pg_timezone_abbrevs();
+    SELECT * FROM pg_timezone_abbrevs_zone() z
+    UNION ALL
+    (SELECT * FROM pg_timezone_abbrevs_abbrevs() a
+     WHERE NOT EXISTS (SELECT 1 FROM pg_timezone_abbrevs_zone() z2
+                       WHERE z2.abbrev = a.abbrev))
+    ORDER BY abbrev;

 CREATE VIEW pg_timezone_names AS
    SELECT * FROM pg_timezone_names();
--- a/src/backend/commands/variable.c
+++ b/src/backend/commands/variable.c
@@ -381,6 +381,8 @@ void
 assign_timezone(const char *newval, void *extra)
 {
 	session_timezone = *((pg_tz **) extra);
+	/* datetime.c's cache of timezone abbrevs may now be obsolete */
+	ClearTimeZoneAbbrevCache();
 }

 /*
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -259,7 +259,17 @@ static const datetkn *datecache[MAXDATEFIELDS] = {NULL};

 static const datetkn *deltacache[MAXDATEFIELDS] = {NULL};

-static const datetkn *abbrevcache[MAXDATEFIELDS] = {NULL};
+/* Cache for results of timezone abbreviation lookups */
+
+typedef struct TzAbbrevCache
+{
+	char		abbrev[TOKMAXLEN + 1];	/* always NUL-terminated */
+	char		ftype;			/* TZ, DTZ, or DYNTZ */
+	int			offset;			/* GMT offset, if fixed-offset */
+	pg_tz	   *tz;				/* relevant zone, if variable-offset */
+} TzAbbrevCache;
+
+static TzAbbrevCache tzabbrevcache[MAXDATEFIELDS];


 /*
@@ -1845,6 +1855,40 @@ DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, pg_tz *tzp,
 }


+/* TimeZoneAbbrevIsKnown()
+ *
+ * Detect whether the given string is a time zone abbreviation that's known
+ * in the specified TZDB timezone, and if so whether it's fixed or varying
+ * meaning.  The match is not case-sensitive.
+ */
+static bool
+TimeZoneAbbrevIsKnown(const char *abbr, pg_tz *tzp,
+					  bool *isfixed, int *offset, int *isdst)
+{
+	char		upabbr[TZ_STRLEN_MAX + 1];
+	unsigned char *p;
+	long int	gmtoff;
+
+	/* We need to force the abbrev to upper case */
+	strlcpy(upabbr, abbr, sizeof(upabbr));
+	for (p = (unsigned char *) upabbr; *p; p++)
+		*p = pg_toupper(*p);
+
+	/* Look up the abbrev's meaning in this zone */
+	if (pg_timezone_abbrev_is_known(upabbr,
+									isfixed,
+									&gmtoff,
+									isdst,
+									tzp))
+	{
+		/* Change sign to agree with DetermineTimeZoneOffset() */
+		*offset = (int) -gmtoff;
+		return true;
+	}
+	return false;
+}
+
+
 /* DecodeTimeOnly()
 * Interpret parsed string as time fields only.
 * Returns 0 if successful, DTERR code if bogus input detected.
@@ -3092,27 +3136,60 @@ DecodeTimezoneAbbrev(int field, const char *lowtoken,
 					 int *ftype, int *offset, pg_tz **tz,
 					 DateTimeErrorExtra *extra)
 {
+	TzAbbrevCache *tzc = &tzabbrevcache[field];
+	bool		isfixed;
+	int			isdst;
 	const datetkn *tp;

-	tp = abbrevcache[field];
-	/* use strncmp so that we match truncated tokens */
-	if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0)
+	/*
+	 * Do we have a cached result?  Use strncmp so that we match truncated
+	 * names, although we shouldn't really see that happen with normal
+	 * abbreviations.
+	 */
+	if (strncmp(lowtoken, tzc->abbrev, TOKMAXLEN) == 0)
 	{
-		if (zoneabbrevtbl)
-			tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs,
-							 zoneabbrevtbl->numabbrevs);
-		else
-			tp = NULL;
+		*ftype = tzc->ftype;
+		*offset = tzc->offset;
+		*tz = tzc->tz;
+		return 0;
 	}
+
+	/*
+	 * See if the current session_timezone recognizes it.  Checking this
+	 * before zoneabbrevtbl allows us to correctly handle abbreviations whose
+	 * meaning varies across zones, such as "LMT".
+	 */
+	if (session_timezone &&
+		TimeZoneAbbrevIsKnown(lowtoken, session_timezone,
+							  &isfixed, offset, &isdst))
+	{
+		*ftype = (isfixed ? (isdst ? DTZ : TZ) : DYNTZ);
+		*tz = (isfixed ? NULL : session_timezone);
+		/* flip sign to agree with the convention used in zoneabbrevtbl */
+		*offset = -(*offset);
+		/* cache result; use strlcpy to truncate name if necessary */
+		strlcpy(tzc->abbrev, lowtoken, TOKMAXLEN + 1);
+		tzc->ftype = *ftype;
+		tzc->offset = *offset;
+		tzc->tz = *tz;
+		return 0;
+	}
+
+	/* Nope, so look in zoneabbrevtbl */
+	if (zoneabbrevtbl)
+		tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs,
+						 zoneabbrevtbl->numabbrevs);
+	else
+		tp = NULL;
 	if (tp == NULL)
 	{
 		*ftype = UNKNOWN_FIELD;
 		*offset = 0;
 		*tz = NULL;
+		/* failure results are not cached */
 	}
 	else
 	{
-		abbrevcache[field] = tp;
 		*ftype = tp->type;
 		if (tp->type == DYNTZ)
 		{
@@ -3126,11 +3203,26 @@ DecodeTimezoneAbbrev(int field, const char *lowtoken,
 			*offset = tp->value;
 			*tz = NULL;
 		}
+
+		/* cache result; use strlcpy to truncate name if necessary */
+		strlcpy(tzc->abbrev, lowtoken, TOKMAXLEN + 1);
+		tzc->ftype = *ftype;
+		tzc->offset = *offset;
+		tzc->tz = *tz;
 	}

 	return 0;
 }

+/*
+ * Reset tzabbrevcache after a change in session_timezone.
+ */
+void
+ClearTimeZoneAbbrevCache(void)
+{
+	memset(tzabbrevcache, 0, sizeof(tzabbrevcache));
+}
+

 /* DecodeSpecial()
 * Decode text string using lookup table.
@@ -3278,9 +3370,6 @@ DecodeTimezoneAbbrevPrefix(const char *str, int *offset, pg_tz **tz)
 	*offset = 0;				/* avoid uninitialized vars on failure */
 	*tz = NULL;

-	if (!zoneabbrevtbl)
-		return -1;				/* no abbrevs known, so fail immediately */
-
 	/* Downcase as much of the string as we could need */
 	for (len = 0; len < TOKMAXLEN; len++)
 	{
@@ -3299,9 +3388,34 @@ DecodeTimezoneAbbrevPrefix(const char *str, int *offset, pg_tz **tz)
 	 */
 	while (len > 0)
 	{
-		const datetkn *tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs,
-										zoneabbrevtbl->numabbrevs);
+		bool		isfixed;
+		int			isdst;
+		const datetkn *tp;

+		/* See if the current session_timezone recognizes it. */
+		if (session_timezone &&
+			TimeZoneAbbrevIsKnown(lowtoken, session_timezone,
+								  &isfixed, offset, &isdst))
+		{
+			if (isfixed)
+			{
+				/* flip sign to agree with the convention in zoneabbrevtbl */
+				*offset = -(*offset);
+			}
+			else
+			{
+				/* Caller must resolve the abbrev's current meaning */
+				*tz = session_timezone;
+			}
+			return len;
+		}
+
+		/* Known in zoneabbrevtbl? */
+		if (zoneabbrevtbl)
+			tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs,
+							 zoneabbrevtbl->numabbrevs);
+		else
+			tp = NULL;
 		if (tp != NULL)
 		{
 			if (tp->type == DYNTZ)
@@ -3324,6 +3438,8 @@ DecodeTimezoneAbbrevPrefix(const char *str, int *offset, pg_tz **tz)
 				return len;
 			}
 		}
+
+		/* Nope, try the next shorter string. */
 		lowtoken[--len] = '\0';
 	}

@@ -4957,8 +5073,8 @@ void
 InstallTimeZoneAbbrevs(TimeZoneAbbrevTable *tbl)
 {
 	zoneabbrevtbl = tbl;
-	/* reset abbrevcache, which may contain pointers into old table */
-	memset(abbrevcache, 0, sizeof(abbrevcache));
+	/* reset tzabbrevcache, which may contain results from old table */
+	memset(tzabbrevcache, 0, sizeof(tzabbrevcache));
 }

 /*
@@ -4994,11 +5110,99 @@ FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp,


 /*
- * This set-returning function reads all the available time zone abbreviations
+ * This set-returning function reads all the time zone abbreviations
+ * defined by the IANA data for the current timezone setting,
 * and returns a set of (abbrev, utc_offset, is_dst).
 */
 Datum
-pg_timezone_abbrevs(PG_FUNCTION_ARGS)
+pg_timezone_abbrevs_zone(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	int		   *pindex;
+	Datum		result;
+	HeapTuple	tuple;
+	Datum		values[3];
+	bool		nulls[3] = {0};
+	TimestampTz now = GetCurrentTransactionStartTimestamp();
+	pg_time_t	t = timestamptz_to_time_t(now);
+	const char *abbrev;
+	long int	gmtoff;
+	int			isdst;
+	struct pg_itm_in itm_in;
+	Interval   *resInterval;
+
+	/* stuff done only on the first call of the function */
+	if (SRF_IS_FIRSTCALL())
+	{
+		TupleDesc	tupdesc;
+		MemoryContext oldcontext;
+
+		/* create a function context for cross-call persistence */
+		funcctx = SRF_FIRSTCALL_INIT();
+
+		/*
+		 * switch to memory context appropriate for multiple function calls
+		 */
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+		/* allocate memory for user context */
+		pindex = (int *) palloc(sizeof(int));
+		*pindex = 0;
+		funcctx->user_fctx = pindex;
+
+		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+			elog(ERROR, "return type must be a row type");
+		funcctx->tuple_desc = tupdesc;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/* stuff done on every call of the function */
+	funcctx = SRF_PERCALL_SETUP();
+	pindex = (int *) funcctx->user_fctx;
+
+	while ((abbrev = pg_get_next_timezone_abbrev(pindex,
+												 session_timezone)) != NULL)
+	{
+		/* Ignore abbreviations that aren't all-alphabetic */
+		if (strspn(abbrev, "ABCDEFGHIJKLMNOPQRSTUVWXYZ") != strlen(abbrev))
+			continue;
+
+		/* Determine the current meaning of the abbrev */
+		if (!pg_interpret_timezone_abbrev(abbrev,
+										  &t,
+										  &gmtoff,
+										  &isdst,
+										  session_timezone))
+			continue;			/* hm, not actually used in this zone? */
+
+		values[0] = CStringGetTextDatum(abbrev);
+
+		/* Convert offset (in seconds) to an interval; can't overflow */
+		MemSet(&itm_in, 0, sizeof(struct pg_itm_in));
+		itm_in.tm_usec = (int64) gmtoff * USECS_PER_SEC;
+		resInterval = (Interval *) palloc(sizeof(Interval));
+		(void) itmin2interval(&itm_in, resInterval);
+		values[1] = IntervalPGetDatum(resInterval);
+
+		values[2] = BoolGetDatum(isdst);
+
+		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+		result = HeapTupleGetDatum(tuple);
+
+		SRF_RETURN_NEXT(funcctx, result);
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * This set-returning function reads all the time zone abbreviations
+ * defined by the timezone_abbreviations setting,
+ * and returns a set of (abbrev, utc_offset, is_dst).
+ */
+Datum
+pg_timezone_abbrevs_abbrevs(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	int		   *pindex;