1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-24 10:47:04 +03:00

Fix REALLOCATE_BITMAPSETS code

7d58f2342 added a compile-time option to have bitmapset.c reallocate the
set before returning when a set is modified.  That commit failed to do
its job in various cases and returned the input set when it shouldn't
have in these cases.  Here we fix those missing cases.

This commit also adds some documentation about what
REALLOCATE_BITMAPSETS is for.  This is important as future functions
that go inside bitmapset.c need to know if they need to do anything
special when this compile-time option is defined.

Also, between 71a3e8c43 and 7d58f2342 some Asserts seem to have become
duplicated.  Tidy these up.  Rather than having the Assert check each
aspect of what makes a set invalid, here we introduce a helper function
which returns false when a set is invalid and have the Asserts use this
instead.

Also, make a pass on improving the comments in bitmapset.c.  Various
comments mentioned the input sets being "recycled".  This could be
interpreted to mean that the output set will always point to the same
memory as the given input parameter.  Here we try to make it clear that
this must not be relied upon and that callers must ensure that all
references to a given set are updated on each modification.

In passing, improve comments for bms_union(), bms_intersect() and
bms_difference() to detail what they do.  I (David) have too often had to
remind myself by reading the code each time to find out if I need, for
example, to use bms_union() or bms_join().  I also removed some
low-value comments that were trying to convey information about "these
operations" without mentioning which operations it was talking about.
It seems better to document these things in the function header comment
instead.

Author: Richard Guo, David Rowley
Discussion: https://postgr.es/m/CAMbWs4-djy9qYux2gZrtmxA0StrYXJjvB-oqLxn-d7J88t=PQQ@mail.gmail.com
This commit is contained in:
David Rowley 2024-01-17 09:30:21 +13:00
parent 45d395cd75
commit c7e5e994b2

View File

@ -16,6 +16,18 @@
* containing only a single word (likely the majority of them) this halves the
* number of loop condition checks.
*
* Callers must ensure that the set returned by functions in this file which
* adjust the members of an existing set is assigned to all pointers pointing
* to that existing set. No guarantees are made that we'll ever modify the
* existing set in-place and return it.
*
* To help find bugs caused by callers failing to record the return value of
* the function which manipulates an existing set, we support building with
* REALLOCATE_BITMAPSETS. This results in the set being reallocated each time
* the set is altered and the existing being pfreed. This is useful as if any
* references still exist to the old set, we're more likely to notice as
* any users of the old set will be accessing pfree'd memory. This option is
* only intended to be used for debugging.
*
* Copyright (c) 2003-2024, PostgreSQL Global Development Group
*
@ -72,6 +84,49 @@
#error "invalid BITS_PER_BITMAPWORD"
#endif
#ifdef USE_ASSERT_CHECKING
/*
* bms_is_valid_set - for cassert builds to check for valid sets
*/
static bool
bms_is_valid_set(const Bitmapset *a)
{
/* NULL is the correct representation of an empty set */
if (a == NULL)
return true;
/* check the node tag is set correctly. pfree'd pointer, maybe? */
if (!IsA(a, Bitmapset))
return false;
/* trailing zero words are not allowed */
if (a->words[a->nwords - 1] == 0)
return false;
return true;
}
#endif
#ifdef REALLOCATE_BITMAPSETS
/*
* bms_copy_and_free
* Only required in REALLOCATE_BITMAPSETS builds. Provide a simple way
* to return a freshly allocated set and pfree the original.
*
* Note: callers which accept multiple sets must be careful when calling this
* function to clone one parameter as other parameters may point to the same
* set. A good option is to call this just before returning the resulting
* set.
*/
static Bitmapset *
bms_copy_and_free(Bitmapset *a)
{
Bitmapset *c = bms_copy(a);
bms_free(a);
return c;
}
#endif
/*
* bms_copy - make a palloc'd copy of a bitmapset
@ -82,9 +137,11 @@ bms_copy(const Bitmapset *a)
Bitmapset *result;
size_t size;
Assert(bms_is_valid_set(a));
if (a == NULL)
return NULL;
Assert(IsA(a, Bitmapset));
size = BITMAPSET_SIZE(a->nwords);
result = (Bitmapset *) palloc(size);
memcpy(result, a, size);
@ -99,8 +156,8 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
{
int i;
Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0));
Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -140,8 +197,8 @@ bms_compare(const Bitmapset *a, const Bitmapset *b)
{
int i;
Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0));
Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -200,13 +257,8 @@ bms_free(Bitmapset *a)
/*
* These operations all make a freshly palloc'd result,
* leaving their inputs untouched
*/
/*
* bms_union - set union
* bms_union - create and return a new set containing all members from both
* input sets. Both inputs are left unmodified.
*/
Bitmapset *
bms_union(const Bitmapset *a, const Bitmapset *b)
@ -216,8 +268,8 @@ bms_union(const Bitmapset *a, const Bitmapset *b)
int otherlen;
int i;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -246,7 +298,8 @@ bms_union(const Bitmapset *a, const Bitmapset *b)
}
/*
* bms_intersect - set intersection
* bms_intersect - create and return a new set containing members which both
* input sets have in common. Both inputs are left unmodified.
*/
Bitmapset *
bms_intersect(const Bitmapset *a, const Bitmapset *b)
@ -257,8 +310,8 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
int resultlen;
int i;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL || b == NULL)
@ -299,7 +352,8 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
}
/*
* bms_difference - set difference (ie, A without members of B)
* bms_difference - create and return a new set containing all the members of
* 'a' without the members of 'b'.
*/
Bitmapset *
bms_difference(const Bitmapset *a, const Bitmapset *b)
@ -307,8 +361,8 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
Bitmapset *result;
int i;
Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0));
Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -316,8 +370,6 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
if (b == NULL)
return bms_copy(a);
Assert(IsA(a, Bitmapset) && IsA(b, Bitmapset));
/*
* In Postgres' usage, an empty result is a very common case, so it's
* worth optimizing for that by testing bms_nonempty_difference(). This
@ -374,8 +426,8 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b)
{
int i;
Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0));
Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -383,8 +435,6 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b)
if (b == NULL)
return false;
Assert(IsA(a, Bitmapset) && IsA(b, Bitmapset));
/* 'a' can't be a subset of 'b' if it contains more words */
if (a->nwords > b->nwords)
return false;
@ -411,8 +461,8 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
int shortlen;
int i;
Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0));
Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -424,8 +474,6 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
if (b == NULL)
return BMS_SUBSET2;
Assert(IsA(a, Bitmapset) && IsA(b, Bitmapset));
/* Check common words */
result = BMS_EQUAL; /* status so far */
shortlen = Min(a->nwords, b->nwords);
@ -477,14 +525,14 @@ bms_is_member(int x, const Bitmapset *a)
int wordnum,
bitnum;
Assert(bms_is_valid_set(a));
/* XXX better to just return false for x<0 ? */
if (x < 0)
elog(ERROR, "negative bitmapset member not allowed");
if (a == NULL)
return false;
Assert(IsA(a, Bitmapset));
wordnum = WORDNUM(x);
bitnum = BITNUM(x);
if (wordnum >= a->nwords)
@ -509,12 +557,12 @@ bms_member_index(Bitmapset *a, int x)
int result = 0;
bitmapword mask;
Assert(bms_is_valid_set(a));
/* return -1 if not a member of the bitmap */
if (!bms_is_member(x, a))
return -1;
Assert(IsA(a, Bitmapset));
wordnum = WORDNUM(x);
bitnum = BITNUM(x);
@ -549,8 +597,8 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
int shortlen;
int i;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL || b == NULL)
@ -576,7 +624,7 @@ bms_overlap_list(const Bitmapset *a, const List *b)
int wordnum,
bitnum;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(bms_is_valid_set(a));
if (a == NULL || b == NIL)
return false;
@ -607,8 +655,8 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
{
int i;
Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0));
Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -640,11 +688,11 @@ bms_singleton_member(const Bitmapset *a)
int nwords;
int wordnum;
Assert(bms_is_valid_set(a));
if (a == NULL)
elog(ERROR, "bitmapset is empty");
Assert(IsA(a, Bitmapset));
nwords = a->nwords;
wordnum = 0;
do
@ -683,9 +731,11 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
int nwords;
int wordnum;
Assert(bms_is_valid_set(a));
if (a == NULL)
return false;
Assert(IsA(a, Bitmapset));
nwords = a->nwords;
wordnum = 0;
do
@ -717,9 +767,11 @@ bms_num_members(const Bitmapset *a)
int nwords;
int wordnum;
Assert(bms_is_valid_set(a));
if (a == NULL)
return 0;
Assert(IsA(a, Bitmapset));
nwords = a->nwords;
wordnum = 0;
do
@ -745,9 +797,11 @@ bms_membership(const Bitmapset *a)
int nwords;
int wordnum;
Assert(bms_is_valid_set(a));
if (a == NULL)
return BMS_EMPTY_SET;
Assert(IsA(a, Bitmapset));
nwords = a->nwords;
wordnum = 0;
do
@ -765,20 +819,10 @@ bms_membership(const Bitmapset *a)
}
/*
* These operations all "recycle" their non-const inputs, ie, either
* return the modified input or pfree it if it can't hold the result.
*
* These should generally be used in the style
*
* foo = bms_add_member(foo, x);
*/
/*
* bms_add_member - add a specified member to set
*
* Input set is modified or recycled!
* 'a' is recycled when possible.
*/
Bitmapset *
bms_add_member(Bitmapset *a, int x)
@ -786,11 +830,13 @@ bms_add_member(Bitmapset *a, int x)
int wordnum,
bitnum;
Assert(bms_is_valid_set(a));
if (x < 0)
elog(ERROR, "negative bitmapset member not allowed");
if (a == NULL)
return bms_make_singleton(x);
Assert(IsA(a, Bitmapset));
wordnum = WORDNUM(x);
bitnum = BITNUM(x);
@ -799,15 +845,8 @@ bms_add_member(Bitmapset *a, int x)
{
int oldnwords = a->nwords;
int i;
#ifdef REALLOCATE_BITMAPSETS
Bitmapset *tmp = a;
a = (Bitmapset *) palloc(BITMAPSET_SIZE(wordnum + 1));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
#else
a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
#endif
a->nwords = wordnum + 1;
/* zero out the enlarged portion */
i = oldnwords;
@ -816,18 +855,18 @@ bms_add_member(Bitmapset *a, int x)
a->words[i] = 0;
} while (++i < a->nwords);
}
#ifdef REALLOCATE_BITMAPSETS
else
{
Bitmapset *tmp = a;
a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
}
#endif
a->words[wordnum] |= ((bitmapword) 1 << bitnum);
#ifdef REALLOCATE_BITMAPSETS
/*
* There's no guarantee that the repalloc returned a new pointer, so copy
* and free unconditionally here.
*/
a = bms_copy_and_free(a);
#endif
return a;
}
@ -836,31 +875,26 @@ bms_add_member(Bitmapset *a, int x)
*
* No error if x is not currently a member of set
*
* Input set is modified in-place!
* 'a' is recycled when possible.
*/
Bitmapset *
bms_del_member(Bitmapset *a, int x)
{
int wordnum,
bitnum;
#ifdef REALLOCATE_BITMAPSETS
Bitmapset *tmp = a;
#endif
Assert(bms_is_valid_set(a));
if (x < 0)
elog(ERROR, "negative bitmapset member not allowed");
if (a == NULL)
return NULL;
Assert(IsA(a, Bitmapset));
wordnum = WORDNUM(x);
bitnum = BITNUM(x);
#ifdef REALLOCATE_BITMAPSETS
a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
a = bms_copy_and_free(a);
#endif
/* member can't exist. Return 'a' unmodified */
@ -890,7 +924,7 @@ bms_del_member(Bitmapset *a, int x)
}
/*
* bms_add_members - like bms_union, but left input is recycled
* bms_add_members - like bms_union, but left input is recycled when possible
*/
Bitmapset *
bms_add_members(Bitmapset *a, const Bitmapset *b)
@ -900,14 +934,20 @@ bms_add_members(Bitmapset *a, const Bitmapset *b)
int otherlen;
int i;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
return bms_copy(b);
if (b == NULL)
{
#ifdef REALLOCATE_BITMAPSETS
a = bms_copy_and_free(a);
#endif
return a;
}
/* Identify shorter and longer input; copy the longer one if needed */
if (a->nwords < b->nwords)
{
@ -916,13 +956,6 @@ bms_add_members(Bitmapset *a, const Bitmapset *b)
}
else
{
#ifdef REALLOCATE_BITMAPSETS
Bitmapset *tmp = a;
a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
#endif
result = a;
other = b;
}
@ -935,6 +968,11 @@ bms_add_members(Bitmapset *a, const Bitmapset *b)
} while (++i < otherlen);
if (result != a)
pfree(a);
#ifdef REALLOCATE_BITMAPSETS
else
result = bms_copy_and_free(result);
#endif
return result;
}
@ -955,11 +993,17 @@ bms_add_range(Bitmapset *a, int lower, int upper)
ushiftbits,
wordnum;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(bms_is_valid_set(a));
/* do nothing if nothing is called for, without further checking */
if (upper < lower)
{
#ifdef REALLOCATE_BITMAPSETS
a = bms_copy_and_free(a);
#endif
return a;
}
if (lower < 0)
elog(ERROR, "negative bitmapset member not allowed");
@ -975,16 +1019,9 @@ bms_add_range(Bitmapset *a, int lower, int upper)
{
int oldnwords = a->nwords;
int i;
#ifdef REALLOCATE_BITMAPSETS
Bitmapset *tmp = a;
a = (Bitmapset *) palloc(BITMAPSET_SIZE(uwordnum + 1));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
#else
/* ensure we have enough words to store the upper bit */
a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
#endif
a->nwords = uwordnum + 1;
/* zero out the enlarged portion */
i = oldnwords;
@ -1021,11 +1058,21 @@ bms_add_range(Bitmapset *a, int lower, int upper)
a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits;
}
#ifdef REALLOCATE_BITMAPSETS
/*
* There's no guarantee that the repalloc returned a new pointer, so copy
* and free unconditionally here.
*/
a = bms_copy_and_free(a);
#endif
return a;
}
/*
* bms_int_members - like bms_intersect, but left input is recycled
* bms_int_members - like bms_intersect, but left input is recycled when
* possible
*/
Bitmapset *
bms_int_members(Bitmapset *a, const Bitmapset *b)
@ -1033,15 +1080,9 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
int lastnonzero;
int shortlen;
int i;
#ifdef REALLOCATE_BITMAPSETS
Bitmapset *tmp = a;
#endif
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
@ -1052,12 +1093,6 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
return NULL;
}
#ifdef REALLOCATE_BITMAPSETS
a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
#endif
/* Intersect b into a; we need never copy */
shortlen = Min(a->nwords, b->nwords);
lastnonzero = -1;
@ -1079,35 +1114,38 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
/* get rid of trailing zero words */
a->nwords = lastnonzero + 1;
#ifdef REALLOCATE_BITMAPSETS
a = bms_copy_and_free(a);
#endif
return a;
}
/*
* bms_del_members - like bms_difference, but left input is recycled
* bms_del_members - delete members in 'a' that are set in 'b'. 'a' is
* recycled when possible.
*/
Bitmapset *
bms_del_members(Bitmapset *a, const Bitmapset *b)
{
int i;
#ifdef REALLOCATE_BITMAPSETS
Bitmapset *tmp = a;
#endif
Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0));
Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
return NULL;
if (b == NULL)
return a;
{
#ifdef REALLOCATE_BITMAPSETS
a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
a = bms_copy_and_free(a);
#endif
return a;
}
/* Remove b's bits from a; we need never copy */
if (a->nwords > b->nwords)
{
@ -1147,11 +1185,15 @@ bms_del_members(Bitmapset *a, const Bitmapset *b)
a->nwords = lastnonzero + 1;
}
#ifdef REALLOCATE_BITMAPSETS
a = bms_copy_and_free(a);
#endif
return a;
}
/*
* bms_join - like bms_union, but *both* inputs are recycled
* bms_join - like bms_union, but *either* input *may* be recycled
*/
Bitmapset *
bms_join(Bitmapset *a, Bitmapset *b)
@ -1160,28 +1202,28 @@ bms_join(Bitmapset *a, Bitmapset *b)
Bitmapset *other;
int otherlen;
int i;
#ifdef REALLOCATE_BITMAPSETS
Bitmapset *tmp = a;
#endif
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(a == NULL || IsA(a, Bitmapset));
Assert(b == NULL || IsA(b, Bitmapset));
Assert(bms_is_valid_set(a));
Assert(bms_is_valid_set(b));
/* Handle cases where either input is NULL */
if (a == NULL)
return b;
if (b == NULL)
return a;
{
#ifdef REALLOCATE_BITMAPSETS
a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords));
memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords));
pfree(tmp);
b = bms_copy_and_free(b);
#endif
return b;
}
if (b == NULL)
{
#ifdef REALLOCATE_BITMAPSETS
a = bms_copy_and_free(a);
#endif
return a;
}
/* Identify shorter and longer input; use longer one as result */
if (a->nwords < b->nwords)
{
@ -1202,6 +1244,11 @@ bms_join(Bitmapset *a, Bitmapset *b)
} while (++i < otherlen);
if (other != result) /* pure paranoia */
pfree(other);
#ifdef REALLOCATE_BITMAPSETS
result = bms_copy_and_free(result);
#endif
return result;
}
@ -1231,7 +1278,7 @@ bms_next_member(const Bitmapset *a, int prevbit)
int wordnum;
bitmapword mask;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(bms_is_valid_set(a));
if (a == NULL)
return -2;
@ -1292,7 +1339,7 @@ bms_prev_member(const Bitmapset *a, int prevbit)
int ushiftbits;
bitmapword mask;
Assert(a == NULL || IsA(a, Bitmapset));
Assert(bms_is_valid_set(a));
/*
* If set is NULL or if there are no more bits to the right then we've
@ -1337,6 +1384,8 @@ bms_prev_member(const Bitmapset *a, int prevbit)
uint32
bms_hash_value(const Bitmapset *a)
{
Assert(bms_is_valid_set(a));
if (a == NULL)
return 0; /* All empty sets hash to 0 */
return DatumGetUInt32(hash_any((const unsigned char *) a->words,