mirror of
https://github.com/postgres/postgres.git
synced 2025-06-01 14:21:49 +03:00
Add some tests for encoding conversion in COPY TO/FROM
This adds a couple of tests to trigger encoding conversion when input and server encodings do not match in COPY FROM/TO, or need_transcoding set to true in the COPY state data. These tests rely on UTF8 <-> LATIN1 for the valid cases as LATIN1 accepts any bytes, and UTF8 <-> EUC_JP for some of the invalid cases where a character cannot be understood, causing a conversion failure. Both ENCODING and client_encoding are covered. Test suggested by Andres Freund. Author: Sutou Kouhei Discussion: https://postgr.es/m/20240206222445.hzq22pb2nye7rm67@awork3.anarazel.de
This commit is contained in:
parent
bf9165bb0c
commit
3ad8b840ce
46
src/test/regress/expected/copyencoding.out
Normal file
46
src/test/regress/expected/copyencoding.out
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
--
|
||||||
|
-- Test cases for encoding with COPY commands
|
||||||
|
--
|
||||||
|
-- skip test if not UTF8 server encoding
|
||||||
|
SELECT getdatabaseencoding() <> 'UTF8'
|
||||||
|
AS skip_test \gset
|
||||||
|
\if :skip_test
|
||||||
|
\quit
|
||||||
|
\endif
|
||||||
|
-- directory paths are passed to us in environment variables
|
||||||
|
\getenv abs_builddir PG_ABS_BUILDDIR
|
||||||
|
\set utf8_csv :abs_builddir '/results/copyencoding_utf8.csv'
|
||||||
|
CREATE TABLE copy_encoding_tab (t text);
|
||||||
|
-- Valid cases
|
||||||
|
-- Use ENCODING option
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
|
||||||
|
-- Read UTF8 data as LATIN1: no error
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1');
|
||||||
|
-- Use client_encoding
|
||||||
|
SET client_encoding TO UTF8;
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
-- Read UTF8 data as LATIN1: no error
|
||||||
|
SET client_encoding TO LATIN1;
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
RESET client_encoding;
|
||||||
|
-- Invalid cases
|
||||||
|
-- Use ENCODING explicitly
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
|
||||||
|
-- Read UTF8 data as EUC_JP: no error
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'EUC_JP');
|
||||||
|
ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81
|
||||||
|
CONTEXT: COPY copy_encoding_tab, line 1
|
||||||
|
-- Use client_encoding
|
||||||
|
SET client_encoding TO UTF8;
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
-- Read UTF8 data as EUC_JP: no error
|
||||||
|
SET client_encoding TO EUC_JP;
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81
|
||||||
|
CONTEXT: COPY copy_encoding_tab, line 1
|
||||||
|
RESET client_encoding;
|
||||||
|
DROP TABLE copy_encoding_tab;
|
8
src/test/regress/expected/copyencoding_1.out
Normal file
8
src/test/regress/expected/copyencoding_1.out
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
--
|
||||||
|
-- Test cases for encoding with COPY commands
|
||||||
|
--
|
||||||
|
-- skip test if not UTF8 server encoding
|
||||||
|
SELECT getdatabaseencoding() <> 'UTF8'
|
||||||
|
AS skip_test \gset
|
||||||
|
\if :skip_test
|
||||||
|
\quit
|
@ -36,7 +36,7 @@ test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comment
|
|||||||
# execute two copy tests in parallel, to check that copy itself
|
# execute two copy tests in parallel, to check that copy itself
|
||||||
# is concurrent safe.
|
# is concurrent safe.
|
||||||
# ----------
|
# ----------
|
||||||
test: copy copyselect copydml insert insert_conflict
|
test: copy copyselect copydml copyencoding insert insert_conflict
|
||||||
|
|
||||||
# ----------
|
# ----------
|
||||||
# More groups of parallel tests
|
# More groups of parallel tests
|
||||||
|
53
src/test/regress/sql/copyencoding.sql
Normal file
53
src/test/regress/sql/copyencoding.sql
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
--
|
||||||
|
-- Test cases for encoding with COPY commands
|
||||||
|
--
|
||||||
|
|
||||||
|
-- skip test if not UTF8 server encoding
|
||||||
|
SELECT getdatabaseencoding() <> 'UTF8'
|
||||||
|
AS skip_test \gset
|
||||||
|
\if :skip_test
|
||||||
|
\quit
|
||||||
|
\endif
|
||||||
|
|
||||||
|
-- directory paths are passed to us in environment variables
|
||||||
|
\getenv abs_builddir PG_ABS_BUILDDIR
|
||||||
|
|
||||||
|
\set utf8_csv :abs_builddir '/results/copyencoding_utf8.csv'
|
||||||
|
|
||||||
|
CREATE TABLE copy_encoding_tab (t text);
|
||||||
|
|
||||||
|
-- Valid cases
|
||||||
|
|
||||||
|
-- Use ENCODING option
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
|
||||||
|
-- Read UTF8 data as LATIN1: no error
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1');
|
||||||
|
|
||||||
|
-- Use client_encoding
|
||||||
|
SET client_encoding TO UTF8;
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
-- Read UTF8 data as LATIN1: no error
|
||||||
|
SET client_encoding TO LATIN1;
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
RESET client_encoding;
|
||||||
|
|
||||||
|
-- Invalid cases
|
||||||
|
|
||||||
|
-- Use ENCODING explicitly
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
|
||||||
|
-- Read UTF8 data as EUC_JP: no error
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'EUC_JP');
|
||||||
|
|
||||||
|
-- Use client_encoding
|
||||||
|
SET client_encoding TO UTF8;
|
||||||
|
-- U+3042 HIRAGANA LETTER A
|
||||||
|
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
-- Read UTF8 data as EUC_JP: no error
|
||||||
|
SET client_encoding TO EUC_JP;
|
||||||
|
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
|
||||||
|
RESET client_encoding;
|
||||||
|
|
||||||
|
DROP TABLE copy_encoding_tab;
|
Loading…
x
Reference in New Issue
Block a user