1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-29 10:41:53 +03:00

> Here's a revised patch. Changes:

>
> 1. Now outputs '\\' instead of '\134' when using encode(bytea, 'escape')
> Note that I ended up leaving \0 as \000 so that there are no ambiguities
> when decoding something like, for example, \0123.
>
> 2. Fixed bug in byteain which allowed input values which were not valid
> octals (e.g. \789), to be parsed as if they were octals.
>
> Joe
>

Here's rev 2 of the bytea string support patch. Changes:

1. Added missing declaration for MatchBytea function
2. Added PQescapeBytea to fe-exec.c
3. Applies cleanly on cvs tip from this afternoon

I'm hoping that someone can review/approve/apply this before beta starts, so
I guess I'd vote (not that it counts for much) to delay beta a few days :-)

Joe Conway
This commit is contained in:
Bruce Momjian
2001-09-14 17:46:40 +00:00
parent e8d5b8d290
commit c1fbf06654
11 changed files with 799 additions and 38 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/encode.c,v 1.1 2001/07/12 14:05:31 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/encode.c,v 1.2 2001/09/14 17:46:40 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@ -19,8 +19,8 @@
struct pg_encoding
{
unsigned (*encode_len) (unsigned dlen);
unsigned (*decode_len) (unsigned dlen);
unsigned (*encode_len) (const uint8 *data, unsigned dlen);
unsigned (*decode_len) (const uint8 *data, unsigned dlen);
unsigned (*encode) (const uint8 *data, unsigned dlen, uint8 *res);
unsigned (*decode) (const uint8 *data, unsigned dlen, uint8 *res);
};
@ -50,7 +50,7 @@ binary_encode(PG_FUNCTION_ARGS)
if (enc == NULL)
elog(ERROR, "No such encoding");
resultlen = enc->encode_len(datalen);
resultlen = enc->encode_len(VARDATA(data), datalen);
result = palloc(VARHDRSZ + resultlen);
res = enc->encode(VARDATA(data), datalen, VARDATA(result));
@ -81,7 +81,7 @@ binary_decode(PG_FUNCTION_ARGS)
if (enc == NULL)
elog(ERROR, "No such encoding");
resultlen = enc->decode_len(datalen);
resultlen = enc->decode_len(VARDATA(data), datalen);
result = palloc(VARHDRSZ + resultlen);
res = enc->decode(VARDATA(data), datalen, VARDATA(result));
@ -169,13 +169,13 @@ hex_decode(const uint8 * src, unsigned len, uint8 * dst)
}
static unsigned
hex_enc_len(unsigned srclen)
hex_enc_len(const uint8 * src, unsigned srclen)
{
return srclen << 1;
}
static unsigned
hex_dec_len(unsigned srclen)
hex_dec_len(const uint8 * src, unsigned srclen)
{
return srclen >> 1;
}
@ -308,18 +308,188 @@ b64_decode(const uint8 * src, unsigned len, uint8 * dst)
static unsigned
b64_enc_len(unsigned srclen)
b64_enc_len(const uint8 * src, unsigned srclen)
{
/* 3 bytes will be converted to 4, linefeed after 76 chars */
return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
}
static unsigned
b64_dec_len(unsigned srclen)
b64_dec_len(const uint8 * src, unsigned srclen)
{
return (srclen * 3) >> 2;
}
/*
* Escape
* Minimally escape bytea to text.
* De-escape text to bytea.
*
* Only two characters are escaped:
* \0 (null) and \\ (backslash)
*
* De-escapes \\ and any \### octal
*/
#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')
static unsigned
esc_encode(const uint8 *src, unsigned srclen, uint8 *dst)
{
const uint8 *end = src + srclen;
uint8 *rp = dst;
int val;
int len = 0;
while (src < end)
{
if (*src == '\0')
{
val = *src;
rp[0] = '\\';
rp[1] = '0';
rp[2] = '0';
rp[3] = '0';
rp += 4;
len += 4;
}
else if (*src == '\\')
{
val = *src;
rp[0] = '\\';
rp[1] = '\\';
rp += 2;
len += 2;
}
else
{
*rp++ = *src;
len++;
}
src++;
}
*rp = '\0';
return len;
}
static unsigned
esc_decode(const uint8 *src, unsigned srclen, uint8 *dst)
{
const uint8 *end = src + srclen;
uint8 *rp = dst;
int val;
int len = 0;
while (src < end)
{
if (src[0] != '\\')
{
*rp++ = *src++;
}
else if ( (src[0] == '\\') &&
(src[1] >= '0' && src[1] <= '3') &&
(src[2] >= '0' && src[2] <= '7') &&
(src[3] >= '0' && src[3] <= '7') )
{
val = VAL(src[1]);
val <<= 3;
val += VAL(src[2]);
val <<= 3;
*rp++ = val + VAL(src[3]);
src += 4;
}
else if ( (src[0] == '\\') &&
(src[1] == '\\') )
{
*rp++ = '\\';
src += 2;
}
else
{
/*
* One backslash, not followed by ### valid octal.
* Should never get here, since esc_dec_len does same check.
*/
elog(ERROR, "decode: Bad input string for type bytea");
}
len++;
}
return len;
}
static unsigned
esc_enc_len(const uint8 *src, unsigned srclen)
{
const uint8 *end = src + srclen;
int len = 0;
while (src < end)
{
if (*src == '\0')
len += 4;
else if (*src == '\\')
len += 2;
else
len++;
src++;
}
/*
* Allow for null terminator
*/
len++;
return len;
}
static unsigned
esc_dec_len(const uint8 *src, unsigned srclen)
{
const uint8 *end = src + srclen;
int len = 0;
while (src < end)
{
if (src[0] != '\\')
{
src++;
}
else if ( (src[0] == '\\') &&
(src[1] >= '0' && src[1] <= '3') &&
(src[2] >= '0' && src[2] <= '7') &&
(src[3] >= '0' && src[3] <= '7') )
{
/*
* backslash + valid octal
*/
src += 4;
}
else if ( (src[0] == '\\') &&
(src[1] == '\\') )
{
/*
* two backslashes = backslash
*/
src += 2;
}
else
{
/*
* one backslash, not followed by ### valid octal
*/
elog(ERROR, "decode: Bad input string for type bytea");
}
len++;
}
return len;
}
/*
* Common
*/
@ -330,6 +500,7 @@ static struct {
} enclist[] = {
{"hex", { hex_enc_len, hex_dec_len, hex_encode, hex_decode }},
{"base64", { b64_enc_len, b64_dec_len, b64_encode, b64_decode }},
{"escape", { esc_enc_len, esc_dec_len, esc_encode, esc_decode }},
{NULL, { NULL, NULL, NULL, NULL } }
};