1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-09 06:21:09 +03:00

Support hex-string input and output for type BYTEA.

Both hex format and the traditional "escape" format are automatically
handled on input.  The output format is selected by the new GUC variable
bytea_output.

As committed, bytea_output defaults to HEX, which is an *incompatible
change*.  We will keep it this way for awhile for testing purposes, but
should consider whether to switch to the more backwards-compatible
default of ESCAPE before 8.5 is released.

Peter Eisentraut
This commit is contained in:
Tom Lane
2009-08-04 16:08:37 +00:00
parent f192e4a5d0
commit a2a8c7a662
21 changed files with 442 additions and 111 deletions

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23 2009/01/01 17:23:49 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.24 2009/08/04 16:08:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -109,7 +109,7 @@ binary_decode(PG_FUNCTION_ARGS)
* HEX
*/
static const char *hextbl = "0123456789abcdef";
static const char hextbl[] = "0123456789abcdef";
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -122,7 +122,7 @@ static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static unsigned
unsigned
hex_encode(const char *src, unsigned len, char *dst)
{
const char *end = src + len;
@@ -136,7 +136,7 @@ hex_encode(const char *src, unsigned len, char *dst)
return len * 2;
}
static char
static inline char
get_hex(char c)
{
int res = -1;
@@ -152,7 +152,7 @@ get_hex(char c)
return (char) res;
}
static unsigned
unsigned
hex_decode(const char *src, unsigned len, char *dst)
{
const char *s,

View File

@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.261 2009/06/11 14:49:04 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.262 2009/08/04 16:08:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -109,6 +109,7 @@
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/date.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.171 2009/06/11 14:49:04 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.172 2009/08/04 16:08:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -24,10 +24,14 @@
#include "parser/scansup.h"
#include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
/* GUC variable */
int bytea_output = BYTEA_OUTPUT_HEX;
typedef struct varlena unknown;
typedef struct
@@ -186,10 +190,24 @@ byteain(PG_FUNCTION_ARGS)
char *inputText = PG_GETARG_CSTRING(0);
char *tp;
char *rp;
int byte;
int bc;
bytea *result;
for (byte = 0, tp = inputText; *tp != '\0'; byte ++)
/* Recognize hex input */
if (inputText[0] == '\\' && inputText[1] == 'x')
{
size_t len = strlen(inputText);
bc = (len - 2)/2 + VARHDRSZ; /* maximum possible length */
result = palloc(bc);
bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
PG_RETURN_BYTEA_P(result);
}
/* Else, it's the traditional escaped style */
for (bc = 0, tp = inputText; *tp != '\0'; bc++)
{
if (tp[0] != '\\')
tp++;
@@ -204,7 +222,7 @@ byteain(PG_FUNCTION_ARGS)
else
{
/*
* one backslash, not followed by 0 or ### valid octal
* one backslash, not followed by another or ### valid octal
*/
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
@@ -212,10 +230,10 @@ byteain(PG_FUNCTION_ARGS)
}
}
byte +=VARHDRSZ;
bc += VARHDRSZ;
result = (bytea *) palloc(byte);
SET_VARSIZE(result, byte);
result = (bytea *) palloc(bc);
SET_VARSIZE(result, bc);
tp = inputText;
rp = VARDATA(result);
@@ -228,11 +246,11 @@ byteain(PG_FUNCTION_ARGS)
(tp[2] >= '0' && tp[2] <= '7') &&
(tp[3] >= '0' && tp[3] <= '7'))
{
byte = VAL(tp[1]);
byte <<=3;
byte +=VAL(tp[2]);
byte <<=3;
*rp++ = byte +VAL(tp[3]);
bc = VAL(tp[1]);
bc <<= 3;
bc += VAL(tp[2]);
bc <<= 3;
*rp++ = bc + VAL(tp[3]);
tp += 4;
}
@@ -259,21 +277,30 @@ byteain(PG_FUNCTION_ARGS)
/*
* byteaout - converts to printable representation of byte array
*
* Non-printable characters are inserted as '\nnn' (octal) and '\' as
* '\\'.
*
* NULL vlena should be an error--returning string with NULL for now.
* In the traditional escaped format, non-printable characters are
* printed as '\nnn' (octal) and '\' as '\\'.
*/
Datum
byteaout(PG_FUNCTION_ARGS)
{
bytea *vlena = PG_GETARG_BYTEA_PP(0);
char *result;
char *vp;
char *rp;
int val; /* holds unprintable chars */
int i;
if (bytea_output == BYTEA_OUTPUT_HEX)
{
/* Print hex format */
rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
*rp++ = '\\';
*rp++ = 'x';
rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
}
else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
{
/* Print traditional escaped format */
char *vp;
int len;
int i;
len = 1; /* empty string has 1 char */
vp = VARDATA_ANY(vlena);
@@ -297,6 +324,8 @@ byteaout(PG_FUNCTION_ARGS)
}
else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
{
int val; /* holds unprintable chars */
val = *vp;
rp[0] = '\\';
rp[3] = DIG(val & 07);
@@ -309,6 +338,13 @@ byteaout(PG_FUNCTION_ARGS)
else
*rp++ = *vp;
}
}
else
{
elog(ERROR, "unrecognized bytea_output setting: %d",
bytea_output);
rp = result = NULL; /* keep compiler quiet */
}
*rp = '\0';
PG_RETURN_CSTRING(result);
}

View File

@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.509 2009/07/22 17:00:23 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.510 2009/08/04 16:08:36 tgl Exp $
*
*--------------------------------------------------------------------
*/
@@ -61,6 +61,7 @@
#include "tcop/tcopprot.h"
#include "tsearch/ts_cache.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/guc_tables.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
@@ -180,6 +181,12 @@ static char *config_enum_get_options(struct config_enum * record,
* NOTE! Option values may not contain double quotes!
*/
static const struct config_enum_entry bytea_output_options[] = {
{"escape", BYTEA_OUTPUT_ESCAPE, false},
{"hex", BYTEA_OUTPUT_HEX, false},
{NULL, 0, false}
};
/*
* We have different sets for client and server message level options because
* they sort slightly different (see "log" level)
@@ -2540,6 +2547,15 @@ static struct config_enum ConfigureNamesEnum[] =
BACKSLASH_QUOTE_SAFE_ENCODING, backslash_quote_options, NULL, NULL
},
{
{"bytea_output", PGC_USERSET, CLIENT_CONN_STATEMENT,
gettext_noop("Sets the output format for bytea."),
NULL
},
&bytea_output,
BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL
},
{
{"client_min_messages", PGC_USERSET, LOGGING_WHEN,
gettext_noop("Sets the message levels that are sent to the client."),

View File

@@ -424,6 +424,7 @@
#statement_timeout = 0 # in milliseconds, 0 is disabled
#vacuum_freeze_min_age = 50000000
#vacuum_freeze_table_age = 150000000
#bytea_output = 'hex' # hex, escape
#xmlbinary = 'base64'
#xmloption = 'content'