From 7da4e7ac6a0680210a2df8b711d8c765d2014e06 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 21 Jan 2006 04:38:46 +0000 Subject: [PATCH] Repair longstanding bug in slru/clog logic: it is possible for two backends to try to create a log segment file concurrently, but the code erroneously specified O_EXCL to open(), resulting in a needless failure. Before 7.4, it was even a PANIC condition :-(. Correct code is actually simpler than what we had, because we can just say O_CREAT to start with and not need a second open() call. I believe this accounts for several recent reports of hard-to-reproduce "could not create file ...: File exists" errors in both pg_clog and pg_subtrans. --- src/backend/access/transam/slru.c | 35 +++++++++---------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 0385cf1c2e1..094955ac27c 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/slru.c,v 1.7.2.2 2005/11/03 00:23:50 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/slru.c,v 1.7.2.3 2006/01/21 04:38:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -133,7 +133,6 @@ typedef SlruSharedData *SlruShared; typedef enum { SLRU_OPEN_FAILED, - SLRU_CREATE_FAILED, SLRU_SEEK_FAILED, SLRU_READ_FAILED, SLRU_WRITE_FAILED @@ -554,25 +553,18 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno) * transactions that have already been truncated from the commit log. * Easiest way to deal with that is to accept references to * nonexistent files here and in SlruPhysicalReadPage.) + * + * Note: it is possible for more than one backend to be executing + * this code simultaneously for different pages of the same file. + * Hence, don't use O_EXCL or O_TRUNC or anything like that. */ - fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + fd = BasicOpenFile(path, O_RDWR | O_CREAT | PG_BINARY, + S_IRUSR | S_IWUSR); if (fd < 0) { - if (errno != ENOENT) - { - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - return false; - } - - fd = BasicOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, - S_IRUSR | S_IWUSR); - if (fd < 0) - { - slru_errcause = SLRU_CREATE_FAILED; - slru_errno = errno; - return false; - } + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + return false; } if (lseek(fd, (off_t) offset, SEEK_SET) < 0) @@ -620,13 +612,6 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) errdetail("could not open file \"%s\": %m", path))); break; - case SLRU_CREATE_FAILED: - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), - errdetail("could not create file \"%s\": %m", - path))); - break; case SLRU_SEEK_FAILED: ereport(ERROR, (errcode_for_file_access(),