1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Improve table locking behavior in the face of current DDL.

In the previous coding, callers were faced with an awkward choice:
look up the name, do permissions checks, and then lock the table; or
look up the name, lock the table, and then do permissions checks.
The first choice was wrong because the results of the name lookup
and permissions checks might be out-of-date by the time the table
lock was acquired, while the second allowed a user with no privileges
to interfere with access to a table by users who do have privileges
(e.g. if a malicious backend queues up for an AccessExclusiveLock on
a table on which AccessShareLock is already held, further attempts
to access the table will be blocked until the AccessExclusiveLock
is obtained and the malicious backend's transaction rolls back).

To fix, allow callers of RangeVarGetRelid() to pass a callback which
gets executed after performing the name lookup but before acquiring
the relation lock.  If the name lookup is retried (because
invalidation messages are received), the callback will be re-executed
as well, so we get the best of both worlds.  RangeVarGetRelid() is
renamed to RangeVarGetRelidExtended(); callers not wishing to supply
a callback can continue to invoke it as RangeVarGetRelid(), which is
now a macro.  Since the only one caller that uses nowait = true now
passes a callback anyway, the RangeVarGetRelid() macro defaults nowait
as well.  The callback can also be used for supplemental locking - for
example, REINDEX INDEX needs to acquire the table lock before the index
lock to reduce deadlock possibilities.

There's a lot more work to be done here to fix all the cases where this
can be a problem, but this commit provides the general infrastructure
and fixes the following specific cases: REINDEX INDEX, REINDEX TABLE,
LOCK TABLE, and and DROP TABLE/INDEX/SEQUENCE/VIEW/FOREIGN TABLE.

Per discussion with Noah Misch and Alvaro Herrera.
This commit is contained in:
Robert Haas
2011-11-30 10:12:27 -05:00
parent a87ebace19
commit 2ad36c4e44
21 changed files with 350 additions and 210 deletions

View File

@ -235,6 +235,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = {
{'\0', 0, NULL, NULL, NULL, NULL}
};
struct DropRelationCallbackState
{
char relkind;
Oid heapOid;
};
/* Alter table target-type flags for ATSimplePermissions */
#define ATT_TABLE 0x0001
#define ATT_VIEW 0x0002
@ -375,6 +381,9 @@ static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
ForkNumber forkNum, char relpersistence);
static const char *storage_name(char c);
static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
Oid oldRelOid, void *arg);
/* ----------------------------------------------------------------
* DefineRelation
@ -751,9 +760,8 @@ RemoveRelations(DropStmt *drop)
{
RangeVar *rel = makeRangeVarFromNameList((List *) lfirst(cell));
Oid relOid;
HeapTuple tuple;
Form_pg_class classform;
ObjectAddress obj;
struct DropRelationCallbackState state;
/*
* These next few steps are a great deal like relation_openrv, but we
@ -767,15 +775,13 @@ RemoveRelations(DropStmt *drop)
*/
AcceptInvalidationMessages();
/*
* Look up the appropriate relation using namespace search.
*
* XXX: Doing this without a lock is unsafe in the presence of
* concurrent DDL, but acquiring a lock here might violate the rule
* that a table must be locked before its corresponding index.
* So, for now, we ignore the hazard.
*/
relOid = RangeVarGetRelid(rel, NoLock, true, false);
/* Look up the appropriate relation using namespace search. */
state.relkind = relkind;
state.heapOid = InvalidOid;
relOid = RangeVarGetRelidExtended(rel, AccessExclusiveLock, true,
false,
RangeVarCallbackForDropRelation,
(void *) &state);
/* Not there? */
if (!OidIsValid(relOid))
@ -784,57 +790,12 @@ RemoveRelations(DropStmt *drop)
continue;
}
/*
* In DROP INDEX, attempt to acquire lock on the parent table before
* locking the index. index_drop() will need this anyway, and since
* regular queries lock tables before their indexes, we risk deadlock
* if we do it the other way around. No error if we don't find a
* pg_index entry, though --- that most likely means it isn't an
* index, and we'll fail below.
*/
if (relkind == RELKIND_INDEX)
{
tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid));
if (HeapTupleIsValid(tuple))
{
Form_pg_index index = (Form_pg_index) GETSTRUCT(tuple);
LockRelationOid(index->indrelid, AccessExclusiveLock);
ReleaseSysCache(tuple);
}
}
/* Get the lock before trying to fetch the syscache entry */
LockRelationOid(relOid, AccessExclusiveLock);
tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u", relOid);
classform = (Form_pg_class) GETSTRUCT(tuple);
if (classform->relkind != relkind)
DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
/* Allow DROP to either table owner or schema owner */
if (!pg_class_ownercheck(relOid, GetUserId()) &&
!pg_namespace_ownercheck(classform->relnamespace, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
rel->relname);
if (!allowSystemTableMods && IsSystemClass(classform))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied: \"%s\" is a system catalog",
rel->relname)));
/* OK, we're ready to delete this one */
obj.classId = RelationRelationId;
obj.objectId = relOid;
obj.objectSubId = 0;
add_exact_object_address(&obj, objects);
ReleaseSysCache(tuple);
}
performMultipleDeletions(objects, drop->behavior);
@ -842,6 +803,74 @@ RemoveRelations(DropStmt *drop)
free_object_addresses(objects);
}
/*
* Before acquiring a table lock, check whether we have sufficient rights.
* In the case of DROP INDEX, also try to lock the table before the index.
*/
static void
RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
void *arg)
{
HeapTuple tuple;
struct DropRelationCallbackState *state;
char relkind;
Form_pg_class classform;
state = (struct DropRelationCallbackState *) arg;
relkind = state->relkind;
/*
* If we previously locked some other index's heap, and the name we're
* looking up no longer refers to that relation, release the now-useless
* lock.
*/
if (relOid != oldRelOid && OidIsValid(state->heapOid))
{
UnlockRelationOid(state->heapOid, AccessExclusiveLock);
state->heapOid = InvalidOid;
}
/* Didn't find a relation, so need for locking or permission checks. */
if (!OidIsValid(relOid))
return;
tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
if (!HeapTupleIsValid(tuple))
return; /* concurrently dropped, so nothing to do */
classform = (Form_pg_class) GETSTRUCT(tuple);
if (classform->relkind != relkind)
DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
/* Allow DROP to either table owner or schema owner */
if (!pg_class_ownercheck(relOid, GetUserId()) &&
!pg_namespace_ownercheck(classform->relnamespace, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
rel->relname);
if (!allowSystemTableMods && IsSystemClass(classform))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied: \"%s\" is a system catalog",
rel->relname)));
ReleaseSysCache(tuple);
/*
* In DROP INDEX, attempt to acquire lock on the parent table before
* locking the index. index_drop() will need this anyway, and since
* regular queries lock tables before their indexes, we risk deadlock
* if we do it the other way around. No error if we don't find a
* pg_index entry, though --- the relation may have been droppd.
*/
if (relkind == RELKIND_INDEX && relOid != oldRelOid)
{
state->heapOid = IndexGetRelation(relOid, true);
if (OidIsValid(state->heapOid))
LockRelationOid(state->heapOid, AccessExclusiveLock);
}
}
/*
* ExecuteTruncate
* Executes a TRUNCATE command.