mirror of
https://github.com/postgres/postgres.git
synced 2025-10-18 04:29:09 +03:00
Speed up byteain by not parsing traditional-style input twice.
Instead of laboriously computing the exact output length, use strlen to get an upper bound cheaply. (This is still O(N) of course, but the constant factor is a lot less.) This will typically result in overallocating the output datum, but that's of little concern since it's a short-lived allocation in just about all use-cases. A simple microbenchmark showed about 40% speedup for long input strings. While here, make some cosmetic cleanups and add a test case that covers the double-backslash code path in byteain and byteaout. Author: Steven Niu <niushiji@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Reviewed-by: Stepan Neretin <slpmcf@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/ca315729-140b-426e-81a6-6cd5cfe7ecc5@gmail.com
This commit is contained in:
@@ -182,27 +182,21 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
|
|||||||
*
|
*
|
||||||
* Non-printable characters must be passed as '\nnn' (octal) and are
|
* Non-printable characters must be passed as '\nnn' (octal) and are
|
||||||
* converted to internal form. '\' must be passed as '\\'.
|
* converted to internal form. '\' must be passed as '\\'.
|
||||||
* ereport(ERROR, ...) if bad form.
|
|
||||||
*
|
|
||||||
* BUGS:
|
|
||||||
* The input is scanned twice.
|
|
||||||
* The error checking of input is minimal.
|
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
byteain(PG_FUNCTION_ARGS)
|
byteain(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
char *inputText = PG_GETARG_CSTRING(0);
|
char *inputText = PG_GETARG_CSTRING(0);
|
||||||
Node *escontext = fcinfo->context;
|
Node *escontext = fcinfo->context;
|
||||||
|
size_t len = strlen(inputText);
|
||||||
|
size_t bc;
|
||||||
char *tp;
|
char *tp;
|
||||||
char *rp;
|
char *rp;
|
||||||
int bc;
|
|
||||||
bytea *result;
|
bytea *result;
|
||||||
|
|
||||||
/* Recognize hex input */
|
/* Recognize hex input */
|
||||||
if (inputText[0] == '\\' && inputText[1] == 'x')
|
if (inputText[0] == '\\' && inputText[1] == 'x')
|
||||||
{
|
{
|
||||||
size_t len = strlen(inputText);
|
|
||||||
|
|
||||||
bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
|
bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
|
||||||
result = palloc(bc);
|
result = palloc(bc);
|
||||||
bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
|
bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
|
||||||
@@ -213,18 +207,33 @@ byteain(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Else, it's the traditional escaped style */
|
/* Else, it's the traditional escaped style */
|
||||||
for (bc = 0, tp = inputText; *tp != '\0'; bc++)
|
result = (bytea *) palloc(len + VARHDRSZ); /* maximum possible length */
|
||||||
|
|
||||||
|
tp = inputText;
|
||||||
|
rp = VARDATA(result);
|
||||||
|
while (*tp != '\0')
|
||||||
{
|
{
|
||||||
if (tp[0] != '\\')
|
if (tp[0] != '\\')
|
||||||
tp++;
|
*rp++ = *tp++;
|
||||||
else if ((tp[0] == '\\') &&
|
else if ((tp[1] >= '0' && tp[1] <= '3') &&
|
||||||
(tp[1] >= '0' && tp[1] <= '3') &&
|
|
||||||
(tp[2] >= '0' && tp[2] <= '7') &&
|
(tp[2] >= '0' && tp[2] <= '7') &&
|
||||||
(tp[3] >= '0' && tp[3] <= '7'))
|
(tp[3] >= '0' && tp[3] <= '7'))
|
||||||
|
{
|
||||||
|
int v;
|
||||||
|
|
||||||
|
v = VAL(tp[1]);
|
||||||
|
v <<= 3;
|
||||||
|
v += VAL(tp[2]);
|
||||||
|
v <<= 3;
|
||||||
|
*rp++ = v + VAL(tp[3]);
|
||||||
|
|
||||||
tp += 4;
|
tp += 4;
|
||||||
else if ((tp[0] == '\\') &&
|
}
|
||||||
(tp[1] == '\\'))
|
else if (tp[1] == '\\')
|
||||||
|
{
|
||||||
|
*rp++ = '\\';
|
||||||
tp += 2;
|
tp += 2;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -236,46 +245,8 @@ byteain(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bc += VARHDRSZ;
|
bc = rp - VARDATA(result); /* actual length */
|
||||||
|
SET_VARSIZE(result, bc + VARHDRSZ);
|
||||||
result = (bytea *) palloc(bc);
|
|
||||||
SET_VARSIZE(result, bc);
|
|
||||||
|
|
||||||
tp = inputText;
|
|
||||||
rp = VARDATA(result);
|
|
||||||
while (*tp != '\0')
|
|
||||||
{
|
|
||||||
if (tp[0] != '\\')
|
|
||||||
*rp++ = *tp++;
|
|
||||||
else if ((tp[0] == '\\') &&
|
|
||||||
(tp[1] >= '0' && tp[1] <= '3') &&
|
|
||||||
(tp[2] >= '0' && tp[2] <= '7') &&
|
|
||||||
(tp[3] >= '0' && tp[3] <= '7'))
|
|
||||||
{
|
|
||||||
bc = VAL(tp[1]);
|
|
||||||
bc <<= 3;
|
|
||||||
bc += VAL(tp[2]);
|
|
||||||
bc <<= 3;
|
|
||||||
*rp++ = bc + VAL(tp[3]);
|
|
||||||
|
|
||||||
tp += 4;
|
|
||||||
}
|
|
||||||
else if ((tp[0] == '\\') &&
|
|
||||||
(tp[1] == '\\'))
|
|
||||||
{
|
|
||||||
*rp++ = '\\';
|
|
||||||
tp += 2;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* We should never get here. The first pass should not allow it.
|
|
||||||
*/
|
|
||||||
ereturn(escontext, (Datum) 0,
|
|
||||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
||||||
errmsg("invalid input syntax for type %s", "bytea")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PG_RETURN_BYTEA_P(result);
|
PG_RETURN_BYTEA_P(result);
|
||||||
}
|
}
|
||||||
|
@@ -236,6 +236,12 @@ SELECT E'De\\678dBeEf'::bytea;
|
|||||||
ERROR: invalid input syntax for type bytea
|
ERROR: invalid input syntax for type bytea
|
||||||
LINE 1: SELECT E'De\\678dBeEf'::bytea;
|
LINE 1: SELECT E'De\\678dBeEf'::bytea;
|
||||||
^
|
^
|
||||||
|
SELECT E'DeAd\\\\BeEf'::bytea;
|
||||||
|
bytea
|
||||||
|
----------------------
|
||||||
|
\x446541645c42654566
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT reverse(''::bytea);
|
SELECT reverse(''::bytea);
|
||||||
reverse
|
reverse
|
||||||
---------
|
---------
|
||||||
@@ -291,6 +297,12 @@ SELECT E'De\\123dBeEf'::bytea;
|
|||||||
DeSdBeEf
|
DeSdBeEf
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT E'DeAd\\\\BeEf'::bytea;
|
||||||
|
bytea
|
||||||
|
------------
|
||||||
|
DeAd\\BeEf
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- Test non-error-throwing API too
|
-- Test non-error-throwing API too
|
||||||
SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
|
SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
|
||||||
pg_input_is_valid
|
pg_input_is_valid
|
||||||
|
@@ -76,6 +76,7 @@ SELECT E'De\\000dBeEf'::bytea;
|
|||||||
SELECT E'De\123dBeEf'::bytea;
|
SELECT E'De\123dBeEf'::bytea;
|
||||||
SELECT E'De\\123dBeEf'::bytea;
|
SELECT E'De\\123dBeEf'::bytea;
|
||||||
SELECT E'De\\678dBeEf'::bytea;
|
SELECT E'De\\678dBeEf'::bytea;
|
||||||
|
SELECT E'DeAd\\\\BeEf'::bytea;
|
||||||
|
|
||||||
SELECT reverse(''::bytea);
|
SELECT reverse(''::bytea);
|
||||||
SELECT reverse('\xaa'::bytea);
|
SELECT reverse('\xaa'::bytea);
|
||||||
@@ -88,6 +89,7 @@ SELECT E'\\xDe00BeEf'::bytea;
|
|||||||
SELECT E'DeAdBeEf'::bytea;
|
SELECT E'DeAdBeEf'::bytea;
|
||||||
SELECT E'De\\000dBeEf'::bytea;
|
SELECT E'De\\000dBeEf'::bytea;
|
||||||
SELECT E'De\\123dBeEf'::bytea;
|
SELECT E'De\\123dBeEf'::bytea;
|
||||||
|
SELECT E'DeAd\\\\BeEf'::bytea;
|
||||||
|
|
||||||
-- Test non-error-throwing API too
|
-- Test non-error-throwing API too
|
||||||
SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
|
SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
|
||||||
|
Reference in New Issue
Block a user