mirror of
https://github.com/postgres/postgres.git
synced 2025-11-29 23:43:17 +03:00
Get rid of VARATT_SIZE and VARATT_DATA, which were simply redundant with VARSIZE and VARDATA, and as a consequence almost no code was using the longer names. Rename the length fields of struct varlena and various derived structures to catch anyplace that was accessing them directly; and clean up various places so caught. In itself this patch doesn't change any behavior at all, but it is necessary infrastructure if we hope to play any games with the representation of varlena headers. Greg Stark and Tom Lane
557 lines
10 KiB
C
557 lines
10 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* encode.c
|
|
* Various data encoding/decoding things.
|
|
*
|
|
* Copyright (c) 2001-2007, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.19 2007/02/27 23:48:08 tgl Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <ctype.h>
|
|
|
|
#include "utils/builtins.h"
|
|
|
|
|
|
struct pg_encoding
|
|
{
|
|
unsigned (*encode_len) (const char *data, unsigned dlen);
|
|
unsigned (*decode_len) (const char *data, unsigned dlen);
|
|
unsigned (*encode) (const char *data, unsigned dlen, char *res);
|
|
unsigned (*decode) (const char *data, unsigned dlen, char *res);
|
|
};
|
|
|
|
static struct pg_encoding *pg_find_encoding(const char *name);
|
|
|
|
/*
|
|
* SQL functions.
|
|
*/
|
|
|
|
Datum
|
|
binary_encode(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea *data = PG_GETARG_BYTEA_P(0);
|
|
Datum name = PG_GETARG_DATUM(1);
|
|
text *result;
|
|
char *namebuf;
|
|
int datalen,
|
|
resultlen,
|
|
res;
|
|
struct pg_encoding *enc;
|
|
|
|
datalen = VARSIZE(data) - VARHDRSZ;
|
|
|
|
namebuf = DatumGetCString(DirectFunctionCall1(textout, name));
|
|
|
|
enc = pg_find_encoding(namebuf);
|
|
if (enc == NULL)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unrecognized encoding: \"%s\"", namebuf)));
|
|
|
|
resultlen = enc->encode_len(VARDATA(data), datalen);
|
|
result = palloc(VARHDRSZ + resultlen);
|
|
|
|
res = enc->encode(VARDATA(data), datalen, VARDATA(result));
|
|
|
|
/* Make this FATAL 'cause we've trodden on memory ... */
|
|
if (res > resultlen)
|
|
elog(FATAL, "overflow - encode estimate too small");
|
|
|
|
SET_VARSIZE(result, VARHDRSZ + res);
|
|
|
|
PG_RETURN_TEXT_P(result);
|
|
}
|
|
|
|
Datum
|
|
binary_decode(PG_FUNCTION_ARGS)
|
|
{
|
|
text *data = PG_GETARG_TEXT_P(0);
|
|
Datum name = PG_GETARG_DATUM(1);
|
|
bytea *result;
|
|
char *namebuf;
|
|
int datalen,
|
|
resultlen,
|
|
res;
|
|
struct pg_encoding *enc;
|
|
|
|
datalen = VARSIZE(data) - VARHDRSZ;
|
|
|
|
namebuf = DatumGetCString(DirectFunctionCall1(textout, name));
|
|
|
|
enc = pg_find_encoding(namebuf);
|
|
if (enc == NULL)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unrecognized encoding: \"%s\"", namebuf)));
|
|
|
|
resultlen = enc->decode_len(VARDATA(data), datalen);
|
|
result = palloc(VARHDRSZ + resultlen);
|
|
|
|
res = enc->decode(VARDATA(data), datalen, VARDATA(result));
|
|
|
|
/* Make this FATAL 'cause we've trodden on memory ... */
|
|
if (res > resultlen)
|
|
elog(FATAL, "overflow - decode estimate too small");
|
|
|
|
SET_VARSIZE(result, VARHDRSZ + res);
|
|
|
|
PG_RETURN_BYTEA_P(result);
|
|
}
|
|
|
|
|
|
/*
|
|
* HEX
|
|
*/
|
|
|
|
static const char *hextbl = "0123456789abcdef";
|
|
|
|
static const int8 hexlookup[128] = {
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
};
|
|
|
|
static unsigned
|
|
hex_encode(const char *src, unsigned len, char *dst)
|
|
{
|
|
const char *end = src + len;
|
|
|
|
while (src < end)
|
|
{
|
|
*dst++ = hextbl[(*src >> 4) & 0xF];
|
|
*dst++ = hextbl[*src & 0xF];
|
|
src++;
|
|
}
|
|
return len * 2;
|
|
}
|
|
|
|
static char
|
|
get_hex(char c)
|
|
{
|
|
int res = -1;
|
|
|
|
if (c > 0 && c < 127)
|
|
res = hexlookup[(unsigned char) c];
|
|
|
|
if (res < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid hexadecimal digit: \"%c\"", c)));
|
|
|
|
return (char) res;
|
|
}
|
|
|
|
static unsigned
|
|
hex_decode(const char *src, unsigned len, char *dst)
|
|
{
|
|
const char *s,
|
|
*srcend;
|
|
char v1,
|
|
v2,
|
|
*p = dst;
|
|
|
|
srcend = src + len;
|
|
s = src;
|
|
p = dst;
|
|
while (s < srcend)
|
|
{
|
|
if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
|
|
{
|
|
s++;
|
|
continue;
|
|
}
|
|
v1 = get_hex(*s++) << 4;
|
|
if (s >= srcend)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid hexadecimal data: odd number of digits")));
|
|
|
|
v2 = get_hex(*s++);
|
|
*p++ = v1 | v2;
|
|
}
|
|
|
|
return p - dst;
|
|
}
|
|
|
|
static unsigned
|
|
hex_enc_len(const char *src, unsigned srclen)
|
|
{
|
|
return srclen << 1;
|
|
}
|
|
|
|
static unsigned
|
|
hex_dec_len(const char *src, unsigned srclen)
|
|
{
|
|
return srclen >> 1;
|
|
}
|
|
|
|
/*
|
|
* BASE64
|
|
*/
|
|
|
|
static const char _base64[] =
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
|
|
static const int8 b64lookup[128] = {
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
|
|
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
|
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
|
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
|
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
|
|
};
|
|
|
|
static unsigned
|
|
b64_encode(const char *src, unsigned len, char *dst)
|
|
{
|
|
char *p,
|
|
*lend = dst + 76;
|
|
const char *s,
|
|
*end = src + len;
|
|
int pos = 2;
|
|
uint32 buf = 0;
|
|
|
|
s = src;
|
|
p = dst;
|
|
|
|
while (s < end)
|
|
{
|
|
buf |= (unsigned char) *s << (pos << 3);
|
|
pos--;
|
|
s++;
|
|
|
|
/* write it out */
|
|
if (pos < 0)
|
|
{
|
|
*p++ = _base64[(buf >> 18) & 0x3f];
|
|
*p++ = _base64[(buf >> 12) & 0x3f];
|
|
*p++ = _base64[(buf >> 6) & 0x3f];
|
|
*p++ = _base64[buf & 0x3f];
|
|
|
|
pos = 2;
|
|
buf = 0;
|
|
}
|
|
if (p >= lend)
|
|
{
|
|
*p++ = '\n';
|
|
lend = p + 76;
|
|
}
|
|
}
|
|
if (pos != 2)
|
|
{
|
|
*p++ = _base64[(buf >> 18) & 0x3f];
|
|
*p++ = _base64[(buf >> 12) & 0x3f];
|
|
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
|
|
*p++ = '=';
|
|
}
|
|
|
|
return p - dst;
|
|
}
|
|
|
|
static unsigned
|
|
b64_decode(const char *src, unsigned len, char *dst)
|
|
{
|
|
const char *srcend = src + len,
|
|
*s = src;
|
|
char *p = dst;
|
|
char c;
|
|
int b = 0;
|
|
uint32 buf = 0;
|
|
int pos = 0,
|
|
end = 0;
|
|
|
|
while (s < srcend)
|
|
{
|
|
c = *s++;
|
|
|
|
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
|
|
continue;
|
|
|
|
if (c == '=')
|
|
{
|
|
/* end sequence */
|
|
if (!end)
|
|
{
|
|
if (pos == 2)
|
|
end = 1;
|
|
else if (pos == 3)
|
|
end = 2;
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unexpected \"=\"")));
|
|
}
|
|
b = 0;
|
|
}
|
|
else
|
|
{
|
|
b = -1;
|
|
if (c > 0 && c < 127)
|
|
b = b64lookup[(unsigned char) c];
|
|
if (b < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid symbol")));
|
|
}
|
|
/* add it to buffer */
|
|
buf = (buf << 6) + b;
|
|
pos++;
|
|
if (pos == 4)
|
|
{
|
|
*p++ = (buf >> 16) & 255;
|
|
if (end == 0 || end > 1)
|
|
*p++ = (buf >> 8) & 255;
|
|
if (end == 0 || end > 2)
|
|
*p++ = buf & 255;
|
|
buf = 0;
|
|
pos = 0;
|
|
}
|
|
}
|
|
|
|
if (pos != 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid end sequence")));
|
|
|
|
return p - dst;
|
|
}
|
|
|
|
|
|
static unsigned
|
|
b64_enc_len(const char *src, unsigned srclen)
|
|
{
|
|
/* 3 bytes will be converted to 4, linefeed after 76 chars */
|
|
return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
|
|
}
|
|
|
|
static unsigned
|
|
b64_dec_len(const char *src, unsigned srclen)
|
|
{
|
|
return (srclen * 3) >> 2;
|
|
}
|
|
|
|
/*
|
|
* Escape
|
|
* Minimally escape bytea to text.
|
|
* De-escape text to bytea.
|
|
*
|
|
* Only two characters are escaped:
|
|
* \0 (null) and \\ (backslash)
|
|
*
|
|
* De-escapes \\ and any \### octal
|
|
*/
|
|
|
|
#define VAL(CH) ((CH) - '0')
|
|
#define DIG(VAL) ((VAL) + '0')
|
|
|
|
static unsigned
|
|
esc_encode(const char *src, unsigned srclen, char *dst)
|
|
{
|
|
const char *end = src + srclen;
|
|
char *rp = dst;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
if (*src == '\0')
|
|
{
|
|
rp[0] = '\\';
|
|
rp[1] = '0';
|
|
rp[2] = '0';
|
|
rp[3] = '0';
|
|
rp += 4;
|
|
len += 4;
|
|
}
|
|
else if (*src == '\\')
|
|
{
|
|
rp[0] = '\\';
|
|
rp[1] = '\\';
|
|
rp += 2;
|
|
len += 2;
|
|
}
|
|
else
|
|
{
|
|
*rp++ = *src;
|
|
len++;
|
|
}
|
|
|
|
src++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static unsigned
|
|
esc_decode(const char *src, unsigned srclen, char *dst)
|
|
{
|
|
const char *end = src + srclen;
|
|
char *rp = dst;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
if (src[0] != '\\')
|
|
*rp++ = *src++;
|
|
else if (src + 3 < end &&
|
|
(src[1] >= '0' && src[1] <= '3') &&
|
|
(src[2] >= '0' && src[2] <= '7') &&
|
|
(src[3] >= '0' && src[3] <= '7'))
|
|
{
|
|
int val;
|
|
|
|
val = VAL(src[1]);
|
|
val <<= 3;
|
|
val += VAL(src[2]);
|
|
val <<= 3;
|
|
*rp++ = val + VAL(src[3]);
|
|
src += 4;
|
|
}
|
|
else if (src + 1 < end &&
|
|
(src[1] == '\\'))
|
|
{
|
|
*rp++ = '\\';
|
|
src += 2;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* One backslash, not followed by ### valid octal. Should never
|
|
* get here, since esc_dec_len does same check.
|
|
*/
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type bytea")));
|
|
}
|
|
|
|
len++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static unsigned
|
|
esc_enc_len(const char *src, unsigned srclen)
|
|
{
|
|
const char *end = src + srclen;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
if (*src == '\0')
|
|
len += 4;
|
|
else if (*src == '\\')
|
|
len += 2;
|
|
else
|
|
len++;
|
|
|
|
src++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static unsigned
|
|
esc_dec_len(const char *src, unsigned srclen)
|
|
{
|
|
const char *end = src + srclen;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
if (src[0] != '\\')
|
|
src++;
|
|
else if (src + 3 < end &&
|
|
(src[1] >= '0' && src[1] <= '3') &&
|
|
(src[2] >= '0' && src[2] <= '7') &&
|
|
(src[3] >= '0' && src[3] <= '7'))
|
|
{
|
|
/*
|
|
* backslash + valid octal
|
|
*/
|
|
src += 4;
|
|
}
|
|
else if (src + 1 < end &&
|
|
(src[1] == '\\'))
|
|
{
|
|
/*
|
|
* two backslashes = backslash
|
|
*/
|
|
src += 2;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* one backslash, not followed by ### valid octal
|
|
*/
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type bytea")));
|
|
}
|
|
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* Common
|
|
*/
|
|
|
|
static struct
|
|
{
|
|
const char *name;
|
|
struct pg_encoding enc;
|
|
} enclist[] =
|
|
|
|
{
|
|
{
|
|
"hex",
|
|
{
|
|
hex_enc_len, hex_dec_len, hex_encode, hex_decode
|
|
}
|
|
},
|
|
{
|
|
"base64",
|
|
{
|
|
b64_enc_len, b64_dec_len, b64_encode, b64_decode
|
|
}
|
|
},
|
|
{
|
|
"escape",
|
|
{
|
|
esc_enc_len, esc_dec_len, esc_encode, esc_decode
|
|
}
|
|
},
|
|
{
|
|
NULL,
|
|
{
|
|
NULL, NULL, NULL, NULL
|
|
}
|
|
}
|
|
};
|
|
|
|
static struct pg_encoding *
|
|
pg_find_encoding(const char *name)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; enclist[i].name; i++)
|
|
if (pg_strcasecmp(enclist[i].name, name) == 0)
|
|
return &enclist[i].enc;
|
|
|
|
return NULL;
|
|
}
|