mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Add two HyperLogLog functions
New functions initHyperLogLogError() and freeHyperLogLog() simplify using this module from elsewhere. Author: Tomáš Vondra Review: Peter Geoghegan
This commit is contained in:
parent
9ff60273e3
commit
948c97958b
@ -56,7 +56,7 @@
|
|||||||
static inline uint8 rho(uint32 x, uint8 b);
|
static inline uint8 rho(uint32 x, uint8 b);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize HyperLogLog track state
|
* Initialize HyperLogLog track state, by bit width
|
||||||
*
|
*
|
||||||
* bwidth is bit width (so register size will be 2 to the power of bwidth).
|
* bwidth is bit width (so register size will be 2 to the power of bwidth).
|
||||||
* Must be between 4 and 16 inclusive.
|
* Must be between 4 and 16 inclusive.
|
||||||
@ -107,6 +107,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
|
|||||||
cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters;
|
cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize HyperLogLog track state, by error rate
|
||||||
|
*
|
||||||
|
* Instead of specifying bwidth (number of bits used for addressing the
|
||||||
|
* register), this method allows sizing the counter for particular error
|
||||||
|
* rate using a simple formula from the paper:
|
||||||
|
*
|
||||||
|
* e = 1.04 / sqrt(m)
|
||||||
|
*
|
||||||
|
* where 'm' is the number of registers, i.e. (2^bwidth). The method
|
||||||
|
* finds the lowest bwidth with 'e' below the requested error rate, and
|
||||||
|
* then uses it to initialize the counter.
|
||||||
|
*
|
||||||
|
* As bwidth has to be between 4 and 16, the worst possible error rate
|
||||||
|
* is between ~25% (bwidth=4) and 0.4% (bwidth=16).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
initHyperLogLogError(hyperLogLogState *cState, double error)
|
||||||
|
{
|
||||||
|
uint8 bwidth = 4;
|
||||||
|
|
||||||
|
while (bwidth < 16)
|
||||||
|
{
|
||||||
|
double m = (Size) 1 << bwidth;
|
||||||
|
|
||||||
|
if (1.04 / sqrt(m) < error)
|
||||||
|
break;
|
||||||
|
bwidth++;
|
||||||
|
}
|
||||||
|
|
||||||
|
initHyperLogLog(cState, bwidth);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free HyperLogLog track state
|
||||||
|
*
|
||||||
|
* Releases allocated resources, but not the state itself (in case it's not
|
||||||
|
* allocated by palloc).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
freeHyperLogLog(hyperLogLogState *cState)
|
||||||
|
{
|
||||||
|
Assert(cState->hashesArr != NULL);
|
||||||
|
pfree(cState->hashesArr);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adds element to the estimator, from caller-supplied hash.
|
* Adds element to the estimator, from caller-supplied hash.
|
||||||
*
|
*
|
||||||
|
@ -60,8 +60,10 @@ typedef struct hyperLogLogState
|
|||||||
} hyperLogLogState;
|
} hyperLogLogState;
|
||||||
|
|
||||||
extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
|
extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
|
||||||
|
extern void initHyperLogLogError(hyperLogLogState *cState, double error);
|
||||||
extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash);
|
extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash);
|
||||||
extern double estimateHyperLogLog(hyperLogLogState *cState);
|
extern double estimateHyperLogLog(hyperLogLogState *cState);
|
||||||
extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState);
|
extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState);
|
||||||
|
extern void freeHyperLogLog(hyperLogLogState *cState);
|
||||||
|
|
||||||
#endif /* HYPERLOGLOG_H */
|
#endif /* HYPERLOGLOG_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user