mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Adjust bytea get_bit/set_bit to use int8 not int4 for bit numbering.
Since the existing bit number argument can't exceed INT32_MAX, it's not possible for these functions to manipulate bits beyond the first 256MB of a bytea value. Lift that restriction by redeclaring the bit number arguments as int8 (which requires a catversion bump, hence is not back-patchable). The similarly-named functions for bit/varbit don't really have a problem because we restrict those types to at most VARBITMAXLEN bits; hence leave them alone. While here, extend the encode/decode functions in utils/adt/encode.c to allow dealing with values wider than 1GB. This is not a live bug or restriction in current usage, because no input could be more than 1GB, and since none of the encoders can expand a string more than 4X, the result size couldn't overflow uint32. But it might be desirable to support more in future, so make the input length values size_t and the potential-output-length values uint64. Also add some test cases to improve the miserable code coverage of these functions. Movead Li, editorialized some by me; also reviewed by Ashutosh Bapat Discussion: https://postgr.es/m/20200312115135445367128@highgo.ca
This commit is contained in:
		| @@ -16,14 +16,24 @@ | ||||
| #include <ctype.h> | ||||
|  | ||||
| #include "utils/builtins.h" | ||||
| #include "utils/memutils.h" | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Encoding conversion API. | ||||
|  * encode_len() and decode_len() compute the amount of space needed, while | ||||
|  * encode() and decode() perform the actual conversions.  It is okay for | ||||
|  * the _len functions to return an overestimate, but not an underestimate. | ||||
|  * (Having said that, large overestimates could cause unnecessary errors, | ||||
|  * so it's better to get it right.)  The conversion routines write to the | ||||
|  * buffer at *res and return the true length of their output. | ||||
|  */ | ||||
| struct pg_encoding | ||||
| { | ||||
| 	unsigned	(*encode_len) (const char *data, unsigned dlen); | ||||
| 	unsigned	(*decode_len) (const char *data, unsigned dlen); | ||||
| 	unsigned	(*encode) (const char *data, unsigned dlen, char *res); | ||||
| 	unsigned	(*decode) (const char *data, unsigned dlen, char *res); | ||||
| 	uint64		(*encode_len) (const char *data, size_t dlen); | ||||
| 	uint64		(*decode_len) (const char *data, size_t dlen); | ||||
| 	uint64		(*encode) (const char *data, size_t dlen, char *res); | ||||
| 	uint64		(*decode) (const char *data, size_t dlen, char *res); | ||||
| }; | ||||
|  | ||||
| static const struct pg_encoding *pg_find_encoding(const char *name); | ||||
| @@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS) | ||||
| 	Datum		name = PG_GETARG_DATUM(1); | ||||
| 	text	   *result; | ||||
| 	char	   *namebuf; | ||||
| 	int			datalen, | ||||
| 				resultlen, | ||||
| 				res; | ||||
| 	char	   *dataptr; | ||||
| 	size_t		datalen; | ||||
| 	uint64		resultlen; | ||||
| 	uint64		res; | ||||
| 	const struct pg_encoding *enc; | ||||
|  | ||||
| 	datalen = VARSIZE_ANY_EXHDR(data); | ||||
|  | ||||
| 	namebuf = TextDatumGetCString(name); | ||||
|  | ||||
| 	enc = pg_find_encoding(namebuf); | ||||
| @@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS) | ||||
| 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE), | ||||
| 				 errmsg("unrecognized encoding: \"%s\"", namebuf))); | ||||
|  | ||||
| 	resultlen = enc->encode_len(VARDATA_ANY(data), datalen); | ||||
| 	dataptr = VARDATA_ANY(data); | ||||
| 	datalen = VARSIZE_ANY_EXHDR(data); | ||||
|  | ||||
| 	resultlen = enc->encode_len(dataptr, datalen); | ||||
|  | ||||
| 	/* | ||||
| 	 * resultlen possibly overflows uint32, therefore on 32-bit machines it's | ||||
| 	 * unsafe to rely on palloc's internal check. | ||||
| 	 */ | ||||
| 	if (resultlen > MaxAllocSize - VARHDRSZ) | ||||
| 		ereport(ERROR, | ||||
| 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), | ||||
| 				 errmsg("result of encoding conversion is too large"))); | ||||
|  | ||||
| 	result = palloc(VARHDRSZ + resultlen); | ||||
|  | ||||
| 	res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result)); | ||||
| 	res = enc->encode(dataptr, datalen, VARDATA(result)); | ||||
|  | ||||
| 	/* Make this FATAL 'cause we've trodden on memory ... */ | ||||
| 	if (res > resultlen) | ||||
| @@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS) | ||||
| 	Datum		name = PG_GETARG_DATUM(1); | ||||
| 	bytea	   *result; | ||||
| 	char	   *namebuf; | ||||
| 	int			datalen, | ||||
| 				resultlen, | ||||
| 				res; | ||||
| 	char	   *dataptr; | ||||
| 	size_t		datalen; | ||||
| 	uint64		resultlen; | ||||
| 	uint64		res; | ||||
| 	const struct pg_encoding *enc; | ||||
|  | ||||
| 	datalen = VARSIZE_ANY_EXHDR(data); | ||||
|  | ||||
| 	namebuf = TextDatumGetCString(name); | ||||
|  | ||||
| 	enc = pg_find_encoding(namebuf); | ||||
| @@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS) | ||||
| 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE), | ||||
| 				 errmsg("unrecognized encoding: \"%s\"", namebuf))); | ||||
|  | ||||
| 	resultlen = enc->decode_len(VARDATA_ANY(data), datalen); | ||||
| 	dataptr = VARDATA_ANY(data); | ||||
| 	datalen = VARSIZE_ANY_EXHDR(data); | ||||
|  | ||||
| 	resultlen = enc->decode_len(dataptr, datalen); | ||||
|  | ||||
| 	/* | ||||
| 	 * resultlen possibly overflows uint32, therefore on 32-bit machines it's | ||||
| 	 * unsafe to rely on palloc's internal check. | ||||
| 	 */ | ||||
| 	if (resultlen > MaxAllocSize - VARHDRSZ) | ||||
| 		ereport(ERROR, | ||||
| 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), | ||||
| 				 errmsg("result of decoding conversion is too large"))); | ||||
|  | ||||
| 	result = palloc(VARHDRSZ + resultlen); | ||||
|  | ||||
| 	res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result)); | ||||
| 	res = enc->decode(dataptr, datalen, VARDATA(result)); | ||||
|  | ||||
| 	/* Make this FATAL 'cause we've trodden on memory ... */ | ||||
| 	if (res > resultlen) | ||||
| @@ -122,8 +156,8 @@ static const int8 hexlookup[128] = { | ||||
| 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
| }; | ||||
|  | ||||
| unsigned | ||||
| hex_encode(const char *src, unsigned len, char *dst) | ||||
| uint64 | ||||
| hex_encode(const char *src, size_t len, char *dst) | ||||
| { | ||||
| 	const char *end = src + len; | ||||
|  | ||||
| @@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst) | ||||
| 		*dst++ = hextbl[*src & 0xF]; | ||||
| 		src++; | ||||
| 	} | ||||
| 	return len * 2; | ||||
| 	return (uint64) len * 2; | ||||
| } | ||||
|  | ||||
| static inline char | ||||
| @@ -152,8 +186,8 @@ get_hex(char c) | ||||
| 	return (char) res; | ||||
| } | ||||
|  | ||||
| unsigned | ||||
| hex_decode(const char *src, unsigned len, char *dst) | ||||
| uint64 | ||||
| hex_decode(const char *src, size_t len, char *dst) | ||||
| { | ||||
| 	const char *s, | ||||
| 			   *srcend; | ||||
| @@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst) | ||||
| 	return p - dst; | ||||
| } | ||||
|  | ||||
| static unsigned | ||||
| hex_enc_len(const char *src, unsigned srclen) | ||||
| static uint64 | ||||
| hex_enc_len(const char *src, size_t srclen) | ||||
| { | ||||
| 	return srclen << 1; | ||||
| 	return (uint64) srclen << 1; | ||||
| } | ||||
|  | ||||
| static unsigned | ||||
| hex_dec_len(const char *src, unsigned srclen) | ||||
| static uint64 | ||||
| hex_dec_len(const char *src, size_t srclen) | ||||
| { | ||||
| 	return srclen >> 1; | ||||
| 	return (uint64) srclen >> 1; | ||||
| } | ||||
|  | ||||
| /* | ||||
| @@ -214,8 +248,8 @@ static const int8 b64lookup[128] = { | ||||
| 	41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, | ||||
| }; | ||||
|  | ||||
| static unsigned | ||||
| pg_base64_encode(const char *src, unsigned len, char *dst) | ||||
| static uint64 | ||||
| pg_base64_encode(const char *src, size_t len, char *dst) | ||||
| { | ||||
| 	char	   *p, | ||||
| 			   *lend = dst + 76; | ||||
| @@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst) | ||||
| 	return p - dst; | ||||
| } | ||||
|  | ||||
| static unsigned | ||||
| pg_base64_decode(const char *src, unsigned len, char *dst) | ||||
| static uint64 | ||||
| pg_base64_decode(const char *src, size_t len, char *dst) | ||||
| { | ||||
| 	const char *srcend = src + len, | ||||
| 			   *s = src; | ||||
| @@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst) | ||||
| } | ||||
|  | ||||
|  | ||||
| static unsigned | ||||
| pg_base64_enc_len(const char *src, unsigned srclen) | ||||
| static uint64 | ||||
| pg_base64_enc_len(const char *src, size_t srclen) | ||||
| { | ||||
| 	/* 3 bytes will be converted to 4, linefeed after 76 chars */ | ||||
| 	return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4); | ||||
| 	return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4); | ||||
| } | ||||
|  | ||||
| static unsigned | ||||
| pg_base64_dec_len(const char *src, unsigned srclen) | ||||
| static uint64 | ||||
| pg_base64_dec_len(const char *src, size_t srclen) | ||||
| { | ||||
| 	return (srclen * 3) >> 2; | ||||
| 	return ((uint64) srclen * 3) >> 2; | ||||
| } | ||||
|  | ||||
| /* | ||||
| @@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen) | ||||
| #define VAL(CH)			((CH) - '0') | ||||
| #define DIG(VAL)		((VAL) + '0') | ||||
|  | ||||
| static unsigned | ||||
| esc_encode(const char *src, unsigned srclen, char *dst) | ||||
| static uint64 | ||||
| esc_encode(const char *src, size_t srclen, char *dst) | ||||
| { | ||||
| 	const char *end = src + srclen; | ||||
| 	char	   *rp = dst; | ||||
| 	int			len = 0; | ||||
| 	uint64		len = 0; | ||||
|  | ||||
| 	while (src < end) | ||||
| 	{ | ||||
| @@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst) | ||||
| 	return len; | ||||
| } | ||||
|  | ||||
| static unsigned | ||||
| esc_decode(const char *src, unsigned srclen, char *dst) | ||||
| static uint64 | ||||
| esc_decode(const char *src, size_t srclen, char *dst) | ||||
| { | ||||
| 	const char *end = src + srclen; | ||||
| 	char	   *rp = dst; | ||||
| 	int			len = 0; | ||||
| 	uint64		len = 0; | ||||
|  | ||||
| 	while (src < end) | ||||
| 	{ | ||||
| @@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst) | ||||
| 	return len; | ||||
| } | ||||
|  | ||||
| static unsigned | ||||
| esc_enc_len(const char *src, unsigned srclen) | ||||
| static uint64 | ||||
| esc_enc_len(const char *src, size_t srclen) | ||||
| { | ||||
| 	const char *end = src + srclen; | ||||
| 	int			len = 0; | ||||
| 	uint64		len = 0; | ||||
|  | ||||
| 	while (src < end) | ||||
| 	{ | ||||
| @@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen) | ||||
| 	return len; | ||||
| } | ||||
|  | ||||
| static unsigned | ||||
| esc_dec_len(const char *src, unsigned srclen) | ||||
| static uint64 | ||||
| esc_dec_len(const char *src, size_t srclen) | ||||
| { | ||||
| 	const char *end = src + srclen; | ||||
| 	int			len = 0; | ||||
| 	uint64		len = 0; | ||||
|  | ||||
| 	while (src < end) | ||||
| 	{ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user