diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index b2a6dd797ce..6d34c319888 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -34,6 +34,7 @@ COPY { table_name [ ( format_name OIDS [ boolean ] + FREEZE [ boolean ] DELIMITER 'delimiter_character' NULL 'null_string' HEADER [ boolean ] @@ -181,6 +182,28 @@ COPY { table_name [ ( + + FREEZE + + + Specifies copying the data with rows already frozen, just as they + would be after running the VACUUM FREEZE command. + This is intended as a performance option for initial data loading. + Rows will be frozen only if the table being loaded has been created + in the current subtransaction, there are no cursors open and there + are no older snapshots held by this transaction. If those conditions + are not met the command will continue without error though will not + freeze rows. + + + Note that all sessions will immediately be able to see the data + once it has been successfully loaded. This violates the normal rules + of MVCC visibility and by specifying this option the user acknowledges + explicitly that this is understood. + + + + DELIMITER diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4abbdb68468..b66e26bebff 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1875,6 +1875,14 @@ FreeBulkInsertState(BulkInsertState bistate) * The HEAP_INSERT_SKIP_FSM option is passed directly to * RelationGetBufferForTuple, which see for more info. * + * HEAP_INSERT_COMMITTED should only be specified for inserts into + * relfilenodes created during the current subtransaction and when + * there are no prior snapshots or pre-existing portals open. + * + * HEAP_INSERT_FROZEN only has meaning when HEAP_INSERT_COMMITTED is + * also set. This causes rows to be frozen, which is an MVCC violation + * and requires explicit options chosen by user. + * * Note that these options will be applied when inserting into the heap's * TOAST table, too, if the tuple requires any out-of-line data. * @@ -2078,7 +2086,14 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; - HeapTupleHeaderSetXmin(tup->t_data, xid); + if (options & HEAP_INSERT_COMMITTED) + { + tup->t_data->t_infomask |= HEAP_XMIN_COMMITTED; + if (options & HEAP_INSERT_FROZEN) + HeapTupleHeaderSetXmin(tup->t_data, FrozenTransactionId); + } + else + HeapTupleHeaderSetXmin(tup->t_data, xid); HeapTupleHeaderSetCmin(tup->t_data, cid); HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 10c89c79b91..479c4cb17d6 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -44,6 +44,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/portal.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -109,6 +110,7 @@ typedef struct CopyStateData char *filename; /* filename, or NULL for STDIN/STDOUT */ bool binary; /* binary format? */ bool oids; /* include OIDs? */ + bool freeze; /* freeze rows on loading? */ bool csv_mode; /* Comma Separated Value format? */ bool header_line; /* CSV header line? */ char *null_print; /* NULL marker string (server encoding!) */ @@ -895,6 +897,14 @@ ProcessCopyOptions(CopyState cstate, errmsg("conflicting or redundant options"))); cstate->oids = defGetBoolean(defel); } + else if (strcmp(defel->defname, "freeze") == 0) + { + if (cstate->freeze) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + cstate->freeze = defGetBoolean(defel); + } else if (strcmp(defel->defname, "delimiter") == 0) { if (cstate->delim) @@ -1974,8 +1984,31 @@ CopyFrom(CopyState cstate) hi_options |= HEAP_INSERT_SKIP_FSM; if (!XLogIsNeeded()) hi_options |= HEAP_INSERT_SKIP_WAL; + + /* + * Optimize if new relfilenode was created in this subxact or + * one of its committed children and we won't see those rows later + * as part of an earlier scan or command. This ensures that if this + * subtransaction aborts then the frozen rows won't be visible + * after xact cleanup. Note that the stronger test of exactly + * which subtransaction created it is crucial for correctness + * of this optimisation. + */ + if (ThereAreNoPriorRegisteredSnapshots() && + ThereAreNoReadyPortals() && + cstate->rel->rd_newRelfilenodeSubid == GetCurrentSubTransactionId()) + { + hi_options |= HEAP_INSERT_COMMITTED; + if (cstate->freeze) + hi_options |= HEAP_INSERT_FROZEN; + } } + if (cstate->freeze && (hi_options & HEAP_INSERT_FROZEN) == 0) + ereport(NOTICE, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("FREEZE option specified but pre-conditions not met"))); + /* * We need a ResultRelInfo so we can use the regular executor's * index-entry-making machinery. (There used to be a huge amount of code diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index e4ff76e66e0..ad98b364f13 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2383,6 +2383,10 @@ copy_opt_item: { $$ = makeDefElem("oids", (Node *)makeInteger(TRUE)); } + | FREEZE + { + $$ = makeDefElem("freeze", (Node *)makeInteger(TRUE)); + } | DELIMITER opt_as Sconst { $$ = makeDefElem("delimiter", (Node *)makeString($3)); diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index 5713bbe12ce..b981f975af5 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -1055,3 +1055,22 @@ pg_cursor(PG_FUNCTION_ARGS) return (Datum) 0; } + +bool +ThereAreNoReadyPortals(void) +{ + HASH_SEQ_STATUS status; + PortalHashEnt *hentry; + + hash_seq_init(&status, PortalHashTable); + + while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL) + { + Portal portal = hentry->portal; + + if (portal->status == PORTAL_READY) + return false; + } + + return true; +} diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index fa514f6b48f..5705a2d75e9 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -1184,3 +1184,12 @@ DeleteAllExportedSnapshotFiles(void) FreeDir(s_dir); } + +bool +ThereAreNoPriorRegisteredSnapshots(void) +{ + if (RegisteredSnapshots <= 1) + return true; + + return false; +} diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 3be6b3a39f1..92627bfc25c 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -26,6 +26,8 @@ /* "options" flag bits for heap_insert */ #define HEAP_INSERT_SKIP_WAL 0x0001 #define HEAP_INSERT_SKIP_FSM 0x0002 +#define HEAP_INSERT_COMMITTED 0x0004 +#define HEAP_INSERT_FROZEN 0x0008 typedef struct BulkInsertStateData *BulkInsertState; diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index daafd0e15e3..bfb03b88907 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -220,5 +220,6 @@ extern void PortalDefineQuery(Portal portal, extern Node *PortalListGetPrimaryStmt(List *stmts); extern void PortalCreateHoldStore(Portal portal); extern void PortalHashTableDeleteAll(void); +extern bool ThereAreNoReadyPortals(void); #endif /* PORTAL_H */ diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 6b2ef80d067..da47e79eda3 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -48,5 +48,6 @@ extern Datum pg_export_snapshot(PG_FUNCTION_ARGS); extern void ImportSnapshot(const char *idstr); extern bool XactHasExportedSnapshots(void); extern void DeleteAllExportedSnapshotFiles(void); +extern bool ThereAreNoPriorRegisteredSnapshots(void); #endif /* SNAPMGR_H */ diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index b1d07b3e1f0..d08f8cfdd47 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -254,6 +254,112 @@ SELECT * FROM testnull; | (4 rows) +CREATE TABLE vistest (LIKE testeoc); +BEGIN; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +SELECT * FROM vistest; + a +--- + a + b +(2 rows) + +SAVEPOINT s1; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +SELECT * FROM vistest; + a +--- + d + e +(2 rows) + +COMMIT; +BEGIN; +TRUNCATE vistest; +COPY vistest FROM stdin CSV FREEZE; +SELECT * FROM vistest; + a +--- + a + b +(2 rows) + +SAVEPOINT s1; +TRUNCATE vistest; +COPY vistest FROM stdin CSV FREEZE; +SELECT * FROM vistest; + a +--- + d + e +(2 rows) + +COMMIT; +TRUNCATE vistest; +COPY vistest FROM stdin CSV FREEZE; +NOTICE: FREEZE option specified but pre-conditions not met +SELECT * FROM vistest; + a +--- + a + b +(2 rows) + +BEGIN; +INSERT INTO vistest VALUES ('z'); +SAVEPOINT s1; +TRUNCATE vistest; +ROLLBACK TO SAVEPOINT s1; +-- FREEZE should be silently ignored here +COPY vistest FROM stdin CSV FREEZE; +NOTICE: FREEZE option specified but pre-conditions not met +SELECT * FROM vistest; + a +--- + a + b + z + d + e +(5 rows) + +COMMIT; +CREATE FUNCTION truncate_in_subxact() RETURNS VOID AS +$$ +BEGIN + SELECT * FROM nonexistent; +EXCEPTION + WHEN OTHERS THEN + TRUNCATE vistest; +END; +$$ language plpgsql; +BEGIN; +INSERT INTO vistest VALUES ('z'); +SELECT truncate_in_subxact(); + truncate_in_subxact +--------------------- + +(1 row) + +COPY vistest FROM stdin CSV FREEZE; +SELECT * FROM vistest; + a +--- + d + e +(2 rows) + +COMMIT; +SELECT * FROM vistest; + a +--- + d + e +(2 rows) + +DROP TABLE vistest; DROP TABLE x, y; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after(); diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 1961446fdb1..3b6da45439d 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -179,6 +179,84 @@ COPY testnull FROM stdin WITH NULL AS E'\\0'; SELECT * FROM testnull; +CREATE TABLE vistest (LIKE testeoc); +BEGIN; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +a +b +\. +SELECT * FROM vistest; +SAVEPOINT s1; +TRUNCATE vistest; +COPY vistest FROM stdin CSV; +d +e +\. +SELECT * FROM vistest; +COMMIT; + +BEGIN; +TRUNCATE vistest; +COPY vistest FROM stdin CSV FREEZE; +a +b +\. +SELECT * FROM vistest; +SAVEPOINT s1; +TRUNCATE vistest; +COPY vistest FROM stdin CSV FREEZE; +d +e +\. +SELECT * FROM vistest; +COMMIT; +BEGIN; +TRUNCATE vistest; +COPY vistest FROM stdin CSV FREEZE; +x +y +\. +SELECT * FROM vistest; +COMMIT; +TRUNCATE vistest; +COPY vistest FROM stdin CSV FREEZE; +p +g +\. +BEGIN; +INSERT INTO vistest VALUES ('z'); +SAVEPOINT s1; +TRUNCATE vistest; +ROLLBACK TO SAVEPOINT s1; +-- FREEZE should be silently ignored here +COPY vistest FROM stdin CSV FREEZE; +d +e +\. +SELECT * FROM vistest; +COMMIT; +CREATE FUNCTION truncate_in_subxact() RETURNS VOID AS +$$ +BEGIN + SELECT * FROM nonexistent; +EXCEPTION + WHEN OTHERS THEN + TRUNCATE vistest; +END; +$$ language plpgsql; +BEGIN; +INSERT INTO vistest VALUES ('z'); +SELECT truncate_in_subxact(); +COPY vistest FROM stdin CSV FREEZE; +d +e +\. +SELECT * FROM vistest; +COMMIT; +SELECT * FROM vistest; +DROP TABLE vistest; +DROP FUNCTION truncate_in_subxact(); DROP TABLE x, y; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after();