mirror of
https://github.com/postgres/postgres.git
synced 2025-11-09 06:21:09 +03:00
Support hex-string input and output for type BYTEA.
Both hex format and the traditional "escape" format are automatically handled on input. The output format is selected by the new GUC variable bytea_output. As committed, bytea_output defaults to HEX, which is an *incompatible change*. We will keep it this way for awhile for testing purposes, but should consider whether to switch to the more backwards-compatible default of ESCAPE before 8.5 is released. Peter Eisentraut
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23 2009/01/01 17:23:49 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.24 2009/08/04 16:08:36 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -109,7 +109,7 @@ binary_decode(PG_FUNCTION_ARGS)
|
||||
* HEX
|
||||
*/
|
||||
|
||||
static const char *hextbl = "0123456789abcdef";
|
||||
static const char hextbl[] = "0123456789abcdef";
|
||||
|
||||
static const int8 hexlookup[128] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
@@ -122,7 +122,7 @@ static const int8 hexlookup[128] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
static unsigned
|
||||
unsigned
|
||||
hex_encode(const char *src, unsigned len, char *dst)
|
||||
{
|
||||
const char *end = src + len;
|
||||
@@ -136,7 +136,7 @@ hex_encode(const char *src, unsigned len, char *dst)
|
||||
return len * 2;
|
||||
}
|
||||
|
||||
static char
|
||||
static inline char
|
||||
get_hex(char c)
|
||||
{
|
||||
int res = -1;
|
||||
@@ -152,7 +152,7 @@ get_hex(char c)
|
||||
return (char) res;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
unsigned
|
||||
hex_decode(const char *src, unsigned len, char *dst)
|
||||
{
|
||||
const char *s,
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.261 2009/06/11 14:49:04 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.262 2009/08/04 16:08:36 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -109,6 +109,7 @@
|
||||
#include "parser/parse_coerce.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/bytea.h"
|
||||
#include "utils/date.h"
|
||||
#include "utils/datum.h"
|
||||
#include "utils/fmgroids.h"
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.171 2009/06/11 14:49:04 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.172 2009/08/04 16:08:36 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -24,10 +24,14 @@
|
||||
#include "parser/scansup.h"
|
||||
#include "regex/regex.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/bytea.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/pg_locale.h"
|
||||
|
||||
|
||||
/* GUC variable */
|
||||
int bytea_output = BYTEA_OUTPUT_HEX;
|
||||
|
||||
typedef struct varlena unknown;
|
||||
|
||||
typedef struct
|
||||
@@ -186,10 +190,24 @@ byteain(PG_FUNCTION_ARGS)
|
||||
char *inputText = PG_GETARG_CSTRING(0);
|
||||
char *tp;
|
||||
char *rp;
|
||||
int byte;
|
||||
int bc;
|
||||
bytea *result;
|
||||
|
||||
for (byte = 0, tp = inputText; *tp != '\0'; byte ++)
|
||||
/* Recognize hex input */
|
||||
if (inputText[0] == '\\' && inputText[1] == 'x')
|
||||
{
|
||||
size_t len = strlen(inputText);
|
||||
|
||||
bc = (len - 2)/2 + VARHDRSZ; /* maximum possible length */
|
||||
result = palloc(bc);
|
||||
bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
|
||||
SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
|
||||
|
||||
PG_RETURN_BYTEA_P(result);
|
||||
}
|
||||
|
||||
/* Else, it's the traditional escaped style */
|
||||
for (bc = 0, tp = inputText; *tp != '\0'; bc++)
|
||||
{
|
||||
if (tp[0] != '\\')
|
||||
tp++;
|
||||
@@ -204,7 +222,7 @@ byteain(PG_FUNCTION_ARGS)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* one backslash, not followed by 0 or ### valid octal
|
||||
* one backslash, not followed by another or ### valid octal
|
||||
*/
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
@@ -212,10 +230,10 @@ byteain(PG_FUNCTION_ARGS)
|
||||
}
|
||||
}
|
||||
|
||||
byte +=VARHDRSZ;
|
||||
bc += VARHDRSZ;
|
||||
|
||||
result = (bytea *) palloc(byte);
|
||||
SET_VARSIZE(result, byte);
|
||||
result = (bytea *) palloc(bc);
|
||||
SET_VARSIZE(result, bc);
|
||||
|
||||
tp = inputText;
|
||||
rp = VARDATA(result);
|
||||
@@ -228,11 +246,11 @@ byteain(PG_FUNCTION_ARGS)
|
||||
(tp[2] >= '0' && tp[2] <= '7') &&
|
||||
(tp[3] >= '0' && tp[3] <= '7'))
|
||||
{
|
||||
byte = VAL(tp[1]);
|
||||
byte <<=3;
|
||||
byte +=VAL(tp[2]);
|
||||
byte <<=3;
|
||||
*rp++ = byte +VAL(tp[3]);
|
||||
bc = VAL(tp[1]);
|
||||
bc <<= 3;
|
||||
bc += VAL(tp[2]);
|
||||
bc <<= 3;
|
||||
*rp++ = bc + VAL(tp[3]);
|
||||
|
||||
tp += 4;
|
||||
}
|
||||
@@ -259,21 +277,30 @@ byteain(PG_FUNCTION_ARGS)
|
||||
/*
|
||||
* byteaout - converts to printable representation of byte array
|
||||
*
|
||||
* Non-printable characters are inserted as '\nnn' (octal) and '\' as
|
||||
* '\\'.
|
||||
*
|
||||
* NULL vlena should be an error--returning string with NULL for now.
|
||||
* In the traditional escaped format, non-printable characters are
|
||||
* printed as '\nnn' (octal) and '\' as '\\'.
|
||||
*/
|
||||
Datum
|
||||
byteaout(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bytea *vlena = PG_GETARG_BYTEA_PP(0);
|
||||
char *result;
|
||||
char *vp;
|
||||
char *rp;
|
||||
int val; /* holds unprintable chars */
|
||||
int i;
|
||||
|
||||
if (bytea_output == BYTEA_OUTPUT_HEX)
|
||||
{
|
||||
/* Print hex format */
|
||||
rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
|
||||
*rp++ = '\\';
|
||||
*rp++ = 'x';
|
||||
rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
|
||||
}
|
||||
else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
|
||||
{
|
||||
/* Print traditional escaped format */
|
||||
char *vp;
|
||||
int len;
|
||||
int i;
|
||||
|
||||
len = 1; /* empty string has 1 char */
|
||||
vp = VARDATA_ANY(vlena);
|
||||
@@ -297,6 +324,8 @@ byteaout(PG_FUNCTION_ARGS)
|
||||
}
|
||||
else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
|
||||
{
|
||||
int val; /* holds unprintable chars */
|
||||
|
||||
val = *vp;
|
||||
rp[0] = '\\';
|
||||
rp[3] = DIG(val & 07);
|
||||
@@ -309,6 +338,13 @@ byteaout(PG_FUNCTION_ARGS)
|
||||
else
|
||||
*rp++ = *vp;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(ERROR, "unrecognized bytea_output setting: %d",
|
||||
bytea_output);
|
||||
rp = result = NULL; /* keep compiler quiet */
|
||||
}
|
||||
*rp = '\0';
|
||||
PG_RETURN_CSTRING(result);
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
* Written by Peter Eisentraut <peter_e@gmx.net>.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.509 2009/07/22 17:00:23 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.510 2009/08/04 16:08:36 tgl Exp $
|
||||
*
|
||||
*--------------------------------------------------------------------
|
||||
*/
|
||||
@@ -61,6 +61,7 @@
|
||||
#include "tcop/tcopprot.h"
|
||||
#include "tsearch/ts_cache.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/bytea.h"
|
||||
#include "utils/guc_tables.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/pg_locale.h"
|
||||
@@ -180,6 +181,12 @@ static char *config_enum_get_options(struct config_enum * record,
|
||||
* NOTE! Option values may not contain double quotes!
|
||||
*/
|
||||
|
||||
static const struct config_enum_entry bytea_output_options[] = {
|
||||
{"escape", BYTEA_OUTPUT_ESCAPE, false},
|
||||
{"hex", BYTEA_OUTPUT_HEX, false},
|
||||
{NULL, 0, false}
|
||||
};
|
||||
|
||||
/*
|
||||
* We have different sets for client and server message level options because
|
||||
* they sort slightly different (see "log" level)
|
||||
@@ -2540,6 +2547,15 @@ static struct config_enum ConfigureNamesEnum[] =
|
||||
BACKSLASH_QUOTE_SAFE_ENCODING, backslash_quote_options, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"bytea_output", PGC_USERSET, CLIENT_CONN_STATEMENT,
|
||||
gettext_noop("Sets the output format for bytea."),
|
||||
NULL
|
||||
},
|
||||
&bytea_output,
|
||||
BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"client_min_messages", PGC_USERSET, LOGGING_WHEN,
|
||||
gettext_noop("Sets the message levels that are sent to the client."),
|
||||
|
||||
@@ -424,6 +424,7 @@
|
||||
#statement_timeout = 0 # in milliseconds, 0 is disabled
|
||||
#vacuum_freeze_min_age = 50000000
|
||||
#vacuum_freeze_table_age = 150000000
|
||||
#bytea_output = 'hex' # hex, escape
|
||||
#xmlbinary = 'base64'
|
||||
#xmloption = 'content'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user