1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-08 22:02:03 +03:00

plperl: Skip setting UTF8 flag when in SQL_ASCII encoding

When in SQL_ASCII encoding, strings passed around are not necessarily
UTF8-safe.  We had already fixed this in some places, but it looks like
we missed some.

I had to backpatch Peter Eisentraut's a8b92b60 to 9.1 in order for this
patch to cherry-pick more cleanly.

Patch from Alex Hunsaker, tweaked by Kyotaro HORIGUCHI and myself.

Some desultory cleanup and comment addition by me, during patch review.

Per bug report from Christoph Berg in
20120209102116.GA14429@msgid.df7cb.de
This commit is contained in:
Alvaro Herrera 2012-07-09 17:36:29 -04:00
parent 1fbe7d377c
commit fc661f78c6
5 changed files with 72 additions and 48 deletions

View File

@ -44,7 +44,7 @@ PERLCHUNKS = plc_perlboot.pl plc_trusted.pl
SHLIB_LINK = $(perl_embed_ldflags) SHLIB_LINK = $(perl_embed_ldflags)
REGRESS_OPTS = --dbname=$(PL_TESTDB) --load-extension=plperl --load-extension=plperlu REGRESS_OPTS = --dbname=$(PL_TESTDB) --load-extension=plperl --load-extension=plperlu
REGRESS = plperl plperl_trigger plperl_shared plperl_elog plperl_util plperl_init plperlu plperl_array REGRESS = plperl plperl_lc plperl_trigger plperl_shared plperl_elog plperl_util plperl_init plperlu plperl_array
# if Perl can support two interpreters in one backend, # if Perl can support two interpreters in one backend,
# test plperl-and-plperlu cases # test plperl-and-plperlu cases
ifneq ($(PERL),) ifneq ($(PERL),)

View File

@ -67,8 +67,11 @@ static text *
sv2text(SV *sv) sv2text(SV *sv)
{ {
char *str = sv2cstr(sv); char *str = sv2cstr(sv);
text *text;
return cstring_to_text(str); text = cstring_to_text(str);
pfree(str);
return text;
} }
MODULE = PostgreSQL::InServer::Util PREFIX = util_ MODULE = PostgreSQL::InServer::Util PREFIX = util_
@ -113,8 +116,11 @@ util_quote_literal(sv)
} }
else { else {
text *arg = sv2text(sv); text *arg = sv2text(sv);
text *ret = DatumGetTextP(DirectFunctionCall1(quote_literal, PointerGetDatum(arg))); text *quoted = DatumGetTextP(DirectFunctionCall1(quote_literal, PointerGetDatum(arg)));
char *str = text_to_cstring(ret); char *str;
pfree(arg);
str = text_to_cstring(quoted);
RETVAL = cstr2sv(str); RETVAL = cstr2sv(str);
pfree(str); pfree(str);
} }
@ -132,8 +138,11 @@ util_quote_nullable(sv)
else else
{ {
text *arg = sv2text(sv); text *arg = sv2text(sv);
text *ret = DatumGetTextP(DirectFunctionCall1(quote_nullable, PointerGetDatum(arg))); text *quoted = DatumGetTextP(DirectFunctionCall1(quote_nullable, PointerGetDatum(arg)));
char *str = text_to_cstring(ret); char *str;
pfree(arg);
str = text_to_cstring(quoted);
RETVAL = cstr2sv(str); RETVAL = cstr2sv(str);
pfree(str); pfree(str);
} }
@ -145,12 +154,14 @@ util_quote_ident(sv)
SV *sv SV *sv
PREINIT: PREINIT:
text *arg; text *arg;
text *ret; text *quoted;
char *str; char *str;
CODE: CODE:
arg = sv2text(sv); arg = sv2text(sv);
ret = DatumGetTextP(DirectFunctionCall1(quote_ident, PointerGetDatum(arg))); quoted = DatumGetTextP(DirectFunctionCall1(quote_ident, PointerGetDatum(arg)));
str = text_to_cstring(ret);
pfree(arg);
str = text_to_cstring(quoted);
RETVAL = cstr2sv(str); RETVAL = cstr2sv(str);
pfree(str); pfree(str);
OUTPUT: OUTPUT:

View File

@ -650,16 +650,6 @@ CONTEXT: PL/Perl anonymous code block
DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl; DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl;
ERROR: Useless use of sort in scalar context at line 1. ERROR: Useless use of sort in scalar context at line 1.
CONTEXT: PL/Perl anonymous code block CONTEXT: PL/Perl anonymous code block
--
-- Make sure strings are validated
-- Should fail for all encodings, as nul bytes are never permitted.
--
CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$
return "abcd\0efg";
$$ LANGUAGE plperl;
SELECT perl_zerob();
ERROR: invalid byte sequence for encoding "UTF8": 0x00
CONTEXT: PL/Perl function "perl_zerob"
-- make sure functions marked as VOID without an explicit return work -- make sure functions marked as VOID without an explicit return work
CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$ CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$
$_SHARED{myquote} = sub { $_SHARED{myquote} = sub {

View File

@ -3,21 +3,29 @@
/* /*
* convert from utf8 to database encoding * convert from utf8 to database encoding
*
* Returns a palloc'ed copy of the original string
*/ */
static inline char * static inline char *
utf_u2e(const char *utf8_str, size_t len) utf_u2e(char *utf8_str, size_t len)
{ {
int enc = GetDatabaseEncoding(); int enc = GetDatabaseEncoding();
char *ret;
char *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc);
/* /*
* when we are a PG_UTF8 or SQL_ASCII database * When we are in a PG_UTF8 or SQL_ASCII database
* pg_do_encoding_conversion() will not do any conversion or * pg_do_encoding_conversion() will not do any conversion (which is good)
* verification. we need to do it manually instead. * or verification (not so much), so we need to run the verification step
* separately.
*/ */
if (enc == PG_UTF8 || enc == PG_SQL_ASCII) if (enc == PG_UTF8 || enc == PG_SQL_ASCII)
pg_verify_mbstr_len(PG_UTF8, utf8_str, len, false); {
pg_verify_mbstr_len(enc, utf8_str, len, false);
ret = utf8_str;
}
else
ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str,
len, PG_UTF8, enc);
if (ret == utf8_str) if (ret == utf8_str)
ret = pstrdup(ret); ret = pstrdup(ret);
@ -27,11 +35,15 @@ utf_u2e(const char *utf8_str, size_t len)
/* /*
* convert from database encoding to utf8 * convert from database encoding to utf8
*
* Returns a palloc'ed copy of the original string
*/ */
static inline char * static inline char *
utf_e2u(const char *str) utf_e2u(const char *str)
{ {
char *ret = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), GetDatabaseEncoding(), PG_UTF8); char *ret =
(char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
GetDatabaseEncoding(), PG_UTF8);
if (ret == str) if (ret == str)
ret = pstrdup(ret); ret = pstrdup(ret);
@ -41,6 +53,8 @@ utf_e2u(const char *str)
/* /*
* Convert an SV to a char * in the current database encoding * Convert an SV to a char * in the current database encoding
*
* Returns a palloc'ed copy of the original string
*/ */
static inline char * static inline char *
sv2cstr(SV *sv) sv2cstr(SV *sv)
@ -50,7 +64,9 @@ sv2cstr(SV *sv)
/* /*
* get a utf8 encoded char * out of perl. *note* it may not be valid utf8! * get a utf8 encoded char * out of perl. *note* it may not be valid utf8!
* */
/*
* SvPVutf8() croaks nastily on certain things, like typeglobs and * SvPVutf8() croaks nastily on certain things, like typeglobs and
* readonly objects such as $^V. That's a perl bug - it's not supposed to * readonly objects such as $^V. That's a perl bug - it's not supposed to
* happen. To avoid crashing the backend, we make a copy of the sv before * happen. To avoid crashing the backend, we make a copy of the sv before
@ -62,15 +78,27 @@ sv2cstr(SV *sv)
(SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM)) (SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM))
sv = newSVsv(sv); sv = newSVsv(sv);
else else
/* increase the reference count so we cant just SvREFCNT_dec() it when {
* we are done */ /*
* increase the reference count so we can just SvREFCNT_dec() it when
* we are done
*/
SvREFCNT_inc_simple_void(sv); SvREFCNT_inc_simple_void(sv);
}
/*
* Request the string from Perl, in UTF-8 encoding; but if we're in a
* SQL_ASCII database, just request the byte soup without trying to make it
* UTF8, because that might fail.
*/
if (GetDatabaseEncoding() == PG_SQL_ASCII)
val = SvPV(sv, len);
else
val = SvPVutf8(sv, len); val = SvPVutf8(sv, len);
/* /*
* we use perl's length in the event we had an embedded null byte to ensure * Now convert to database encoding. We use perl's length in the event we
* we error out properly * had an embedded null byte to ensure we error out properly.
*/ */
res = utf_u2e(val, len); res = utf_u2e(val, len);
@ -84,16 +112,20 @@ sv2cstr(SV *sv)
* Create a new SV from a string assumed to be in the current database's * Create a new SV from a string assumed to be in the current database's
* encoding. * encoding.
*/ */
static inline SV * static inline SV *
cstr2sv(const char *str) cstr2sv(const char *str)
{ {
SV *sv; SV *sv;
char *utf8_str = utf_e2u(str); char *utf8_str;
/* no conversion when SQL_ASCII */
if (GetDatabaseEncoding() == PG_SQL_ASCII)
return newSVpv(str, 0);
utf8_str = utf_e2u(str);
sv = newSVpv(utf8_str, 0); sv = newSVpv(utf8_str, 0);
SvUTF8_on(sv); SvUTF8_on(sv);
pfree(utf8_str); pfree(utf8_str);
return sv; return sv;

View File

@ -423,15 +423,6 @@ DO $do$ use strict; my $name = "foo"; my $ref = $$name; $do$ LANGUAGE plperl;
-- yields "ERROR: Useless use of sort in scalar context." -- yields "ERROR: Useless use of sort in scalar context."
DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl; DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl;
--
-- Make sure strings are validated
-- Should fail for all encodings, as nul bytes are never permitted.
--
CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$
return "abcd\0efg";
$$ LANGUAGE plperl;
SELECT perl_zerob();
-- make sure functions marked as VOID without an explicit return work -- make sure functions marked as VOID without an explicit return work
CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$ CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$
$_SHARED{myquote} = sub { $_SHARED{myquote} = sub {