mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
New file format for COPY BINARY, in accordance with pghackers discussions
of early December 2000. COPY BINARY is now TOAST-safe.
This commit is contained in:
parent
8fd2e269f7
commit
676cf18c5b
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.18 2000/10/05 19:48:17 momjian Exp $
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.19 2001/01/03 20:04:09 tgl Exp $
|
||||
Postgres documentation
|
||||
-->
|
||||
|
||||
@ -49,6 +49,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
||||
<para>
|
||||
Changes the behavior of field formatting, forcing all data to be
|
||||
stored or read in binary format rather than as text.
|
||||
The DELIMITERS and WITH NULL options are irrelevant for binary format.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -66,7 +67,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
||||
<term>WITH OIDS</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Copies the internal unique object id (OID) for each row.
|
||||
Specifies copying the internal unique object id (OID) for each row.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -84,7 +85,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
||||
<term><filename>stdin</filename></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Specifies that input comes from a pipe or terminal.
|
||||
Specifies that input comes from the client application.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -93,7 +94,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
||||
<term><filename>stdout</filename></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Specifies that output goes to a pipe or terminal.
|
||||
Specifies that output goes to the client application.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -102,16 +103,16 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
||||
<term><replaceable class="parameter">delimiter</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
A character that delimits the input or output fields.
|
||||
The character that separates fields within each row (line) of the file.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><replaceable class="parameter">null print</replaceable></term>
|
||||
<term><replaceable class="parameter">null string</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
A string to represent NULL values. The default is
|
||||
The string that represents a NULL value. The default is
|
||||
<quote><literal>\N</literal></quote> (backslash-N).
|
||||
You might prefer an empty string, for example.
|
||||
</para>
|
||||
@ -166,7 +167,7 @@ ERROR: <replaceable>reason</replaceable>
|
||||
|
||||
<refsect1 id="R1-SQL-COPY-1">
|
||||
<refsect1info>
|
||||
<date>1998-09-08</date>
|
||||
<date>2001-01-02</date>
|
||||
</refsect1info>
|
||||
<title>
|
||||
Description
|
||||
@ -176,17 +177,36 @@ ERROR: <replaceable>reason</replaceable>
|
||||
<productname>Postgres</productname> tables and
|
||||
standard file-system files.
|
||||
|
||||
<command>COPY TO</command> copies the entire contents of a table to
|
||||
a file, while <command>COPY FROM</command> copies data from a file to a
|
||||
table (appending the data to whatever is in the table already).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<command>COPY</command> instructs
|
||||
the <productname>Postgres</productname> backend
|
||||
to directly read from or write to a file. The file must be directly visible to
|
||||
the backend and the name must be specified from the viewpoint of the backend.
|
||||
If <filename>stdin</filename> or <filename>stdout</filename> are
|
||||
to directly read from or write to a file. If a file name is specified,
|
||||
the file must be accessible to the backend and the name must be specified
|
||||
from the viewpoint of the backend.
|
||||
If <filename>stdin</filename> or <filename>stdout</filename> is
|
||||
specified, data flows through the client frontend to the backend.
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
Do not confuse <command>COPY</command> with the
|
||||
<application>psql</application> instruction <command>\copy</command>.
|
||||
<command>\copy</command> invokes <command>COPY FROM stdin</command>
|
||||
or <command>COPY TO stdout</command>, and then fetches/stores the data
|
||||
in a file accessible to the <application>psql</application> client.
|
||||
Thus, file accessibility and access rights depend on the client
|
||||
rather than the backend when <command>\copy</command> is used.
|
||||
</para>
|
||||
</tip>
|
||||
</para>
|
||||
|
||||
<refsect2 id="R2-SQL-COPY-3">
|
||||
<refsect2info>
|
||||
<date>1998-09-08</date>
|
||||
<date>2001-01-02</date>
|
||||
</refsect2info>
|
||||
<title>
|
||||
Notes
|
||||
@ -194,16 +214,19 @@ ERROR: <replaceable>reason</replaceable>
|
||||
<para>
|
||||
The BINARY keyword will force all data to be
|
||||
stored/read as binary format rather than as text. It is
|
||||
somewhat faster than the normal copy command, but is not
|
||||
generally portable, and the files generated are somewhat larger,
|
||||
although this factor is highly dependent on the data itself.
|
||||
somewhat faster than the normal copy command, but a binary copy
|
||||
file is not portable across machine architectures.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, a text copy uses a tab ("\t") character as a delimiter.
|
||||
The delimiter may also be changed to any other single character
|
||||
with the keyword phrase USING DELIMITERS. Characters
|
||||
By default, a text copy uses a tab ("\t") character as a delimiter
|
||||
between fields. The field delimiter may be changed to any other single
|
||||
character with the keyword phrase USING DELIMITERS. Characters
|
||||
in data fields which happen to match the delimiter character will
|
||||
be backslash quoted.
|
||||
Note that the delimiter is always a single character.
|
||||
If multiple characters are specified in the delimiter string,
|
||||
only the first character is used.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@ -217,67 +240,63 @@ ERROR: <replaceable>reason</replaceable>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The keyword phrase USING DELIMITERS specifies a single character
|
||||
to be used for all delimiters between columns. If multiple characters
|
||||
are specified in the delimiter string, only the first character is
|
||||
used.
|
||||
<command>COPY TO</command> neither invokes rules nor acts on column
|
||||
defaults. It does invoke triggers and check constraints.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<command>COPY</command> stops operation at the first error. This
|
||||
should not lead to problems in the event of
|
||||
a <command>COPY FROM</command>, but the
|
||||
target relation will already have received earlier rows in a
|
||||
<command>COPY TO</command>. These rows will not be visible or
|
||||
accessible, but they still occupy disk space. This may amount to a
|
||||
considerable amount
|
||||
of wasted disk space if the failure happened well into a large copy
|
||||
operation. You may wish to invoke <command>VACUUM</command> to recover
|
||||
the wasted space.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Files named in a <command>COPY</command> command are read or written
|
||||
directly by the backend, not by the client application. Therefore,
|
||||
they must reside on or be accessible to the database server machine,
|
||||
not the client. They must be accessible to and readable or writable
|
||||
by the Postgres user (the userid the backend runs as), not the client.
|
||||
<command>COPY</command> naming a file is only allowed to database
|
||||
superusers, since it allows writing on any file that the backend has
|
||||
privileges to write on.
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
Do not confuse <command>COPY</command> with the
|
||||
<application>psql</application> instruction <command>\copy</command>.
|
||||
The
|
||||
<application>psql</application> instruction <command>\copy</command>
|
||||
reads or writes files on the client machine with the client's
|
||||
permissions, so it is not restricted to superusers.
|
||||
</para>
|
||||
</tip>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<command>COPY</command> neither invokes rules nor acts on column defaults.
|
||||
It does invoke triggers, however.
|
||||
</para>
|
||||
<para>
|
||||
<command>COPY</command> stops operation at the first error. This
|
||||
should not lead to problems in the event of
|
||||
a <command>COPY FROM</command>, but the
|
||||
target relation will, of course, be partially modified in a
|
||||
<command>COPY TO</command>.
|
||||
<command>VACUUM</command> should be used to clean up
|
||||
after a failed copy.
|
||||
</para>
|
||||
<para>
|
||||
Because the Postgres backend's current working directory
|
||||
is not usually the same as the user's
|
||||
working directory, the result of copying to a file
|
||||
"<filename>foo</filename>" (without
|
||||
additional path information) may yield unexpected results for the
|
||||
naive user. In this case, <filename>foo</filename>
|
||||
will wind up in <filename>$PGDATA/foo</filename>. In
|
||||
general, the full pathname as it would appear to the backend server machine
|
||||
should be used when specifying files to
|
||||
be copied.
|
||||
</para>
|
||||
<para>
|
||||
Files used as arguments to <command>COPY</command>
|
||||
must reside on or be
|
||||
accessible to the database server machine by being either on
|
||||
local disks or on a networked file system.
|
||||
</para>
|
||||
<para>
|
||||
When a TCP/IP connection from one machine to another is used, and a
|
||||
target file is specified, the target file will be written on the
|
||||
machine where the backend is running rather than the user's
|
||||
machine.
|
||||
It is recommended that the filename used in <command>COPY</command>
|
||||
always be specified as an absolute path. This is enforced by the backend
|
||||
in the case of <command>COPY TO</command>, but for <command>COPY
|
||||
FROM</command> you do have the option of reading from a file specified
|
||||
by a relative path. The path will be interpreted relative to the
|
||||
backend's working directory (somewhere below
|
||||
<filename>$PGDATA</filename>), not the client's working directory.
|
||||
</para>
|
||||
</refsect2>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="R1-SQL-COPY-2">
|
||||
<refsect1info>
|
||||
<date>1998-05-04</date>
|
||||
<date>2001-01-02</date>
|
||||
</refsect1info>
|
||||
<title>File Formats</title>
|
||||
<refsect2>
|
||||
<refsect2info>
|
||||
<date>1998-05-04</date>
|
||||
<date>2001-01-02</date>
|
||||
</refsect2info>
|
||||
<title>Text Format</title>
|
||||
<para>
|
||||
@ -293,27 +312,34 @@ ERROR: <replaceable>reason</replaceable>
|
||||
<para>
|
||||
The actual format for each instance is
|
||||
<programlisting>
|
||||
<attr1><<replaceable class=parameter>separator</replaceable>><attr2><<replaceable class=parameter>separator</replaceable>>...<<replaceable class=parameter>separator</replaceable>><attr<replaceable class="parameter">n</replaceable>><newline>.
|
||||
<attr1><<replaceable class=parameter>separator</replaceable>><attr2><<replaceable class=parameter>separator</replaceable>>...<<replaceable class=parameter>separator</replaceable>><attr<replaceable class="parameter">n</replaceable>><newline>
|
||||
</programlisting>
|
||||
The oid is placed on the beginning of the line
|
||||
if WITH OIDS is specified.
|
||||
Note that the end of each row is marked by a Unix-style newline
|
||||
("\n"). <command>COPY FROM</command> will not behave as desired
|
||||
if given a file containing DOS- or Mac-style newlines.
|
||||
</para>
|
||||
<para>
|
||||
If <command>COPY</command> is sending its output to standard
|
||||
output instead of a file, it will send a backslash("\") and a period
|
||||
(".") followed immediately by a newline, on a separate line,
|
||||
when it is done. Similarly, if <command>COPY</command> is reading
|
||||
The OID is emitted as the first column if WITH OIDS is specified.
|
||||
</para>
|
||||
<para>
|
||||
If <command>COPY TO</command> is sending its output to standard
|
||||
output instead of a file, after the last row it will send a backslash ("\")
|
||||
and a period (".") followed by a newline.
|
||||
Similarly, if <command>COPY FROM</command> is reading
|
||||
from standard input, it will expect a backslash ("\") and a period
|
||||
(".") followed by a newline, as the first three characters on a
|
||||
line to denote end-of-file. However, <command>COPY</command>
|
||||
will terminate (followed by the backend itself) if a true EOF is
|
||||
encountered before this special end-of-file pattern is found.
|
||||
line to denote end-of-file. However, <command>COPY FROM</command>
|
||||
will terminate correctly (followed by the backend itself) if the
|
||||
input connection is closed before this special end-of-file pattern is
|
||||
found.
|
||||
</para>
|
||||
<para>
|
||||
The backslash character has other special meanings. A literal backslash
|
||||
character is represented as two
|
||||
consecutive backslashes ("\\"). A literal tab character is represented
|
||||
as a backslash and a tab. A literal newline character is
|
||||
as a backslash and a tab. (If you are using something other than tab
|
||||
as the column delimiter, backslash that delimiter character to include
|
||||
it in data.) A literal newline character is
|
||||
represented as a backslash and a newline. When loading text data
|
||||
not generated by <acronym>Postgres</acronym>,
|
||||
you will need to convert backslash
|
||||
@ -324,82 +350,207 @@ ERROR: <replaceable>reason</replaceable>
|
||||
|
||||
<refsect2>
|
||||
<refsect2info>
|
||||
<date>1998-05-04</date>
|
||||
<date>2001-01-02</date>
|
||||
</refsect2info>
|
||||
<title>Binary Format</title>
|
||||
<para>
|
||||
In the case of <command>COPY BINARY</command>, the first four
|
||||
bytes in the file will be the number of instances in the file. If
|
||||
this number is zero, the <command>COPY BINARY</command> command
|
||||
will read until end-of-file is encountered. Otherwise, it will
|
||||
stop reading when this number of instances has been read.
|
||||
Remaining data in the file will be ignored.
|
||||
</para>
|
||||
<para>
|
||||
The format for each instance in the file is as follows. Note that
|
||||
this format must be followed <emphasis>exactly</emphasis>.
|
||||
Unsigned four-byte integer quantities are called uint32 in the
|
||||
table below.
|
||||
</para>
|
||||
<table frame="all">
|
||||
<title>Contents of a binary copy file</title>
|
||||
<tgroup cols="2" colsep="1" rowsep="1" align="center">
|
||||
<colspec colname="col1">
|
||||
<colspec colname="col2">
|
||||
<spanspec namest="col1" nameend="col2" spanname="subhead">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry spanname="subhead" align="center">At the start of the file</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>uint32</entry>
|
||||
<entry>number of tuples</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry spanname="subhead" align="center">For each tuple</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>uint32</entry>
|
||||
<entry>total length of tuple data</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>uint32</entry>
|
||||
<entry>oid (if specified)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>uint32</entry>
|
||||
<entry>number of null attributes</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>[uint32,...,uint32]</entry>
|
||||
<entry>attribute numbers of attributes, counting from 0</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>-</entry>
|
||||
<entry><tuple data></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<refsect2info>
|
||||
<date>1998-05-04</date>
|
||||
</refsect2info>
|
||||
<title>Alignment of Binary Data</title>
|
||||
<para>
|
||||
On Sun-3s, 2-byte attributes are aligned on two-byte boundaries,
|
||||
and all larger attributes are aligned on four-byte boundaries.
|
||||
Character attributes are aligned on single-byte boundaries. On
|
||||
most other machines, all attributes larger than 1 byte are aligned on
|
||||
four-byte boundaries. Note that variable length attributes are
|
||||
preceded by the attribute's length; arrays are simply contiguous
|
||||
streams of the array element type.
|
||||
The file format used for <command>COPY BINARY</command> changed in
|
||||
Postgres v7.1. The new format consists of a file header, zero or more
|
||||
tuples, and a file trailer.
|
||||
</para>
|
||||
|
||||
<refsect3>
|
||||
<refsect3info>
|
||||
<date>2001-01-02</date>
|
||||
</refsect3info>
|
||||
<title>
|
||||
File Header
|
||||
</title>
|
||||
<para>
|
||||
The file header consists of 24 bytes of fixed fields, followed
|
||||
by a variable-length header extension area. The fixed fields are:
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>Signature</term>
|
||||
<listitem>
|
||||
<para>
|
||||
12-byte sequence "PGBCOPY\n\377\r\n\0" --- note that the null
|
||||
is a required part of the signature. (The signature is designed to allow
|
||||
easy identification of files that have been munged by a non-8-bit-clean
|
||||
transfer. This signature will be changed by newline-translation
|
||||
filters, dropped nulls, dropped high bits, or parity changes.)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>Integer layout field</term>
|
||||
<listitem>
|
||||
<para>
|
||||
int32 constant 0x01020304 in source's byte order.
|
||||
Potentially, a reader could engage in byte-flipping of subsequent fields
|
||||
if the wrong byte order is detected here.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>Flags field</term>
|
||||
<listitem>
|
||||
<para>
|
||||
int32 bit mask to denote important aspects of the file
|
||||
format. Bits are numbered from 0 (LSB) to 31 (MSB) --- note that this
|
||||
field is stored with source's endianness, as are all subsequent integer
|
||||
fields. Bits 16-31 are reserved to denote critical file format issues;
|
||||
a reader should abort if it finds an unexpected bit set in this range.
|
||||
Bits 0-15 are reserved to signal backwards-compatible format issues;
|
||||
a reader should simply ignore any unexpected bits set in this range.
|
||||
Currently only one flag bit is defined, and the rest must be zero:
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>Bit 16</term>
|
||||
<listitem>
|
||||
<para>
|
||||
if 1, OIDs are included in the dump; if 0, not
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>Header extension area length</term>
|
||||
<listitem>
|
||||
<para>
|
||||
int32 length in bytes of remainder of header, not including self. In
|
||||
the initial version this will be zero, and the first tuple follows
|
||||
immediately. Future changes to the format might allow additional data
|
||||
to be present in the header. A reader should silently skip over any header
|
||||
extension data it does not know what to do with.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The header extension area is envisioned to contain a sequence of
|
||||
self-identifying chunks. The flags field is not intended to tell readers
|
||||
what is in the extension area. Specific design of header extension contents
|
||||
is left for a later release.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
This design allows for both backwards-compatible header additions (add
|
||||
header extension chunks, or set low-order flag bits) and
|
||||
non-backwards-compatible changes (set high-order flag bits to signal such
|
||||
changes, and add supporting data to the extension area if needed).
|
||||
</para>
|
||||
</refsect3>
|
||||
|
||||
<refsect3>
|
||||
<refsect3info>
|
||||
<date>2001-01-02</date>
|
||||
</refsect3info>
|
||||
<title>
|
||||
Tuples
|
||||
</title>
|
||||
<para>
|
||||
Each tuple begins with an int16 count of the number of fields in the
|
||||
tuple. (Presently, all tuples in a table will have the same count, but
|
||||
that might not always be true.) Then, repeated for each field in the
|
||||
tuple, there is an int16 typlen word possibly followed by field data.
|
||||
The typlen field is interpreted thus:
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>Zero</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Field is NULL. No data follows.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>> 0</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Field is a fixed-length datatype. Exactly N
|
||||
bytes of data follow the typlen word.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>-1</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Field is a varlena datatype. The next four
|
||||
bytes are the varlena header, which contains
|
||||
the total value length including itself.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>< -1</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Reserved for future use.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For non-NULL fields, the reader can check that the typlen matches the
|
||||
expected typlen for the destination column. This provides a simple
|
||||
but very useful check that the data is as expected.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There is no alignment padding or any other extra data between fields.
|
||||
Note also that the format does not distinguish whether a datatype is
|
||||
pass-by-reference or pass-by-value. Both of these provisions are
|
||||
deliberate: they might help improve portability of the files (although
|
||||
of course endianness and floating-point-format issues can still keep
|
||||
you from moving a binary file across machines).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If OIDs are included in the dump, the OID field immediately follows the
|
||||
field-count word. It is a normal field except that it's not included
|
||||
in the field-count. In particular it has a typlen --- this will allow
|
||||
handling of 4-byte vs 8-byte OIDs without too much pain, and will allow
|
||||
OIDs to be shown as NULL if we someday allow OIDs to be optional.
|
||||
</para>
|
||||
</refsect3>
|
||||
|
||||
<refsect3>
|
||||
<refsect3info>
|
||||
<date>2001-01-02</date>
|
||||
</refsect3info>
|
||||
<title>
|
||||
File Trailer
|
||||
</title>
|
||||
<para>
|
||||
The file trailer consists of an int16 word containing -1. This is
|
||||
easily distinguished from a tuple's field-count word.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A reader should report an error if a field-count word is neither -1
|
||||
nor the expected number of columns. This provides an extra
|
||||
check against somehow getting out of sync with the data.
|
||||
</para>
|
||||
</refsect3>
|
||||
</refsect2>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1 id="R1-SQL-COPY-3">
|
||||
<title>
|
||||
@ -407,7 +558,7 @@ ERROR: <replaceable>reason</replaceable>
|
||||
</title>
|
||||
<para>
|
||||
The following example copies a table to standard output,
|
||||
using a pipe (|) as the field
|
||||
using a vertical bar (|) as the field
|
||||
delimiter:
|
||||
</para>
|
||||
<programlisting>
|
||||
@ -425,36 +576,36 @@ COPY country FROM '/usr1/proj/bray/sql/country_data';
|
||||
has the termination sequence on the last line):
|
||||
</para>
|
||||
<programlisting>
|
||||
AF AFGHANISTAN
|
||||
AL ALBANIA
|
||||
DZ ALGERIA
|
||||
...
|
||||
ZM ZAMBIA
|
||||
ZW ZIMBABWE
|
||||
\.
|
||||
AF AFGHANISTAN
|
||||
AL ALBANIA
|
||||
DZ ALGERIA
|
||||
ZM ZAMBIA
|
||||
ZW ZIMBABWE
|
||||
\.
|
||||
</programlisting>
|
||||
<para>
|
||||
The following is the same data, output in binary format on a Linux/i586 machine.
|
||||
The data is shown after filtering through
|
||||
the Unix utility <command>od -c</command>. The table has
|
||||
three fields; the first is <classname>char(2)</classname>
|
||||
and the second is <classname>text</classname>. All the
|
||||
Note that the white space on each line is actually a TAB.
|
||||
</para>
|
||||
<para>
|
||||
The following is the same data, output in binary format on a Linux/i586
|
||||
machine. The data is shown after filtering through
|
||||
the Unix utility <command>od -c</command>. The table has
|
||||
three fields; the first is <classname>char(2)</classname>,
|
||||
the second is <classname>text</classname>, and the third is
|
||||
<classname>int4</classname>. All the
|
||||
rows have a null value in the third field.
|
||||
Notice how the <classname>char(2)</classname>
|
||||
field is padded with nulls to four bytes and the text field is
|
||||
preceded by its length:
|
||||
</para>
|
||||
<programlisting>
|
||||
355 \0 \0 \0 027 \0 \0 \0 001 \0 \0 \0 002 \0 \0 \0
|
||||
006 \0 \0 \0 A F \0 \0 017 \0 \0 \0 A F G H
|
||||
A N I S T A N 023 \0 \0 \0 001 \0 \0 \0 002
|
||||
\0 \0 \0 006 \0 \0 \0 A L \0 \0 \v \0 \0 \0 A
|
||||
L B A N I A 023 \0 \0 \0 001 \0 \0 \0 002 \0
|
||||
\0 \0 006 \0 \0 \0 D Z \0 \0 \v \0 \0 \0 A L
|
||||
G E R I A
|
||||
... \n \0 \0 \0 Z A M B I A 024 \0
|
||||
\0 \0 001 \0 \0 \0 002 \0 \0 \0 006 \0 \0 \0 Z W
|
||||
\0 \0 \f \0 \0 \0 Z I M B A B W E
|
||||
0000000 P G B C O P Y \n 377 \r \n \0 004 003 002 001
|
||||
0000020 \0 \0 \0 \0 \0 \0 \0 \0 003 \0 377 377 006 \0 \0 \0
|
||||
0000040 A F 377 377 017 \0 \0 \0 A F G H A N I S
|
||||
0000060 T A N \0 \0 003 \0 377 377 006 \0 \0 \0 A L 377
|
||||
0000100 377 \v \0 \0 \0 A L B A N I A \0 \0 003 \0
|
||||
0000120 377 377 006 \0 \0 \0 D Z 377 377 \v \0 \0 \0 A L
|
||||
0000140 G E R I A \0 \0 003 \0 377 377 006 \0 \0 \0 Z
|
||||
0000160 M 377 377 \n \0 \0 \0 Z A M B I A \0 \0 003
|
||||
0000200 \0 377 377 006 \0 \0 \0 Z W 377 377 \f \0 \0 \0 Z
|
||||
0000220 I M B A B W E \0 \0 377 377
|
||||
</programlisting>
|
||||
</refsect1>
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.126 2000/12/27 23:59:14 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.127 2001/01/03 20:04:10 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -52,7 +52,8 @@ static Oid GetTypeElement(Oid type);
|
||||
static void CopyReadNewline(FILE *fp, int *newline);
|
||||
static char *CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_print);
|
||||
static void CopyAttributeOut(FILE *fp, char *string, char *delim);
|
||||
static int CountTuples(Relation relation);
|
||||
|
||||
static const char BinarySignature[12] = "PGBCOPY\n\377\r\n\0";
|
||||
|
||||
/*
|
||||
* Static communication variables ... pretty grotty, but COPY has
|
||||
@ -387,7 +388,8 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
|
||||
* Copy from relation TO file.
|
||||
*/
|
||||
static void
|
||||
CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_print)
|
||||
CopyTo(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
char *delim, char *null_print)
|
||||
{
|
||||
HeapTuple tuple;
|
||||
TupleDesc tupDesc;
|
||||
@ -398,20 +400,9 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
||||
FmgrInfo *out_functions;
|
||||
Oid *elements;
|
||||
bool *isvarlena;
|
||||
int32 *typmod;
|
||||
char *nulls;
|
||||
|
||||
/*
|
||||
* <nulls> is a (dynamically allocated) array with one character per
|
||||
* attribute in the instance being copied. nulls[I-1] is 'n' if
|
||||
* Attribute Number I is null, and ' ' otherwise.
|
||||
*
|
||||
* <nulls> is meaningful only if we are doing a binary copy.
|
||||
*/
|
||||
int16 fld_size;
|
||||
char *string;
|
||||
|
||||
scandesc = heap_beginscan(rel, 0, QuerySnapshot, 0, NULL);
|
||||
|
||||
tupDesc = rel->rd_att;
|
||||
attr_count = rel->rd_att->natts;
|
||||
attr = rel->rd_att->attrs;
|
||||
@ -420,7 +411,6 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
||||
out_functions = (FmgrInfo *) palloc(attr_count * sizeof(FmgrInfo));
|
||||
elements = (Oid *) palloc(attr_count * sizeof(Oid));
|
||||
isvarlena = (bool *) palloc(attr_count * sizeof(bool));
|
||||
typmod = (int32 *) palloc(attr_count * sizeof(int32));
|
||||
for (i = 0; i < attr_count; i++)
|
||||
{
|
||||
Oid out_func_oid;
|
||||
@ -430,40 +420,62 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
||||
elog(ERROR, "COPY: couldn't lookup info for type %u",
|
||||
attr[i]->atttypid);
|
||||
fmgr_info(out_func_oid, &out_functions[i]);
|
||||
typmod[i] = attr[i]->atttypmod;
|
||||
}
|
||||
|
||||
if (!binary)
|
||||
if (binary)
|
||||
{
|
||||
nulls = NULL; /* meaningless, but compiler doesn't know
|
||||
* that */
|
||||
/* Generate header for a binary copy */
|
||||
int32 tmp;
|
||||
|
||||
/* Signature */
|
||||
CopySendData((char *) BinarySignature, 12, fp);
|
||||
/* Integer layout field */
|
||||
tmp = 0x01020304;
|
||||
CopySendData(&tmp, sizeof(int32), fp);
|
||||
/* Flags field */
|
||||
tmp = 0;
|
||||
if (oids)
|
||||
tmp |= (1 << 16);
|
||||
CopySendData(&tmp, sizeof(int32), fp);
|
||||
/* No header extension */
|
||||
tmp = 0;
|
||||
CopySendData(&tmp, sizeof(int32), fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
int32 ntuples;
|
||||
|
||||
nulls = (char *) palloc(attr_count);
|
||||
for (i = 0; i < attr_count; i++)
|
||||
nulls[i] = ' ';
|
||||
|
||||
/* XXX expensive */
|
||||
|
||||
ntuples = CountTuples(rel);
|
||||
CopySendData(&ntuples, sizeof(int32), fp);
|
||||
}
|
||||
scandesc = heap_beginscan(rel, 0, QuerySnapshot, 0, NULL);
|
||||
|
||||
while (HeapTupleIsValid(tuple = heap_getnext(scandesc, 0)))
|
||||
{
|
||||
bool need_delim = false;
|
||||
|
||||
if (QueryCancel)
|
||||
CancelQuery();
|
||||
|
||||
if (oids && !binary)
|
||||
if (binary)
|
||||
{
|
||||
string = DatumGetCString(DirectFunctionCall1(oidout,
|
||||
ObjectIdGetDatum(tuple->t_data->t_oid)));
|
||||
CopySendString(string, fp);
|
||||
CopySendChar(delim[0], fp);
|
||||
pfree(string);
|
||||
/* Binary per-tuple header */
|
||||
int16 fld_count = attr_count;
|
||||
|
||||
CopySendData(&fld_count, sizeof(int16), fp);
|
||||
/* Send OID if wanted --- note fld_count doesn't include it */
|
||||
if (oids)
|
||||
{
|
||||
fld_size = sizeof(Oid);
|
||||
CopySendData(&fld_size, sizeof(int16), fp);
|
||||
CopySendData(&tuple->t_data->t_oid, sizeof(Oid), fp);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Text format has no per-tuple header, but send OID if wanted */
|
||||
if (oids)
|
||||
{
|
||||
string = DatumGetCString(DirectFunctionCall1(oidout,
|
||||
ObjectIdGetDatum(tuple->t_data->t_oid)));
|
||||
CopySendString(string, fp);
|
||||
pfree(string);
|
||||
need_delim = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < attr_count; i++)
|
||||
@ -474,18 +486,31 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
||||
|
||||
origvalue = heap_getattr(tuple, i + 1, tupDesc, &isnull);
|
||||
|
||||
if (!binary)
|
||||
{
|
||||
if (need_delim)
|
||||
CopySendChar(delim[0], fp);
|
||||
need_delim = true;
|
||||
}
|
||||
|
||||
if (isnull)
|
||||
{
|
||||
if (!binary)
|
||||
{
|
||||
CopySendString(null_print, fp); /* null indicator */
|
||||
}
|
||||
else
|
||||
nulls[i] = 'n';
|
||||
{
|
||||
fld_size = 0; /* null marker */
|
||||
CopySendData(&fld_size, sizeof(int16), fp);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If we have a toasted datum, forcibly detoast it to avoid
|
||||
* memory leakage inside the type's output routine.
|
||||
* memory leakage inside the type's output routine (or
|
||||
* for binary case, becase we must output untoasted value).
|
||||
*/
|
||||
if (isvarlena[i])
|
||||
value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
|
||||
@ -495,75 +520,71 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
||||
if (!binary)
|
||||
{
|
||||
string = DatumGetCString(FunctionCall3(&out_functions[i],
|
||||
value,
|
||||
ObjectIdGetDatum(elements[i]),
|
||||
Int32GetDatum(typmod[i])));
|
||||
value,
|
||||
ObjectIdGetDatum(elements[i]),
|
||||
Int32GetDatum(attr[i]->atttypmod)));
|
||||
CopyAttributeOut(fp, string, delim);
|
||||
pfree(string);
|
||||
}
|
||||
else
|
||||
{
|
||||
fld_size = attr[i]->attlen;
|
||||
CopySendData(&fld_size, sizeof(int16), fp);
|
||||
if (isvarlena[i])
|
||||
{
|
||||
/* varlena */
|
||||
Assert(fld_size == -1);
|
||||
CopySendData(DatumGetPointer(value),
|
||||
VARSIZE(value),
|
||||
fp);
|
||||
}
|
||||
else if (!attr[i]->attbyval)
|
||||
{
|
||||
/* fixed-length pass-by-reference */
|
||||
Assert(fld_size > 0);
|
||||
CopySendData(DatumGetPointer(value),
|
||||
fld_size,
|
||||
fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* pass-by-value */
|
||||
Datum datumBuf;
|
||||
|
||||
/*
|
||||
* We need this horsing around because we don't know
|
||||
* how shorter data values are aligned within a Datum.
|
||||
*/
|
||||
store_att_byval(&datumBuf, value, fld_size);
|
||||
CopySendData(&datumBuf,
|
||||
fld_size,
|
||||
fp);
|
||||
}
|
||||
}
|
||||
|
||||
/* Clean up detoasted copy, if any */
|
||||
if (value != origvalue)
|
||||
pfree(DatumGetPointer(value));
|
||||
}
|
||||
|
||||
if (!binary)
|
||||
{
|
||||
if (i == attr_count - 1)
|
||||
CopySendChar('\n', fp);
|
||||
else
|
||||
{
|
||||
|
||||
/*
|
||||
* when copying out, only use the first char of the
|
||||
* delim string
|
||||
*/
|
||||
CopySendChar(delim[0], fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (binary)
|
||||
{
|
||||
int32 null_ct = 0,
|
||||
length;
|
||||
|
||||
for (i = 0; i < attr_count; i++)
|
||||
{
|
||||
if (nulls[i] == 'n')
|
||||
null_ct++;
|
||||
}
|
||||
|
||||
length = tuple->t_len - tuple->t_data->t_hoff;
|
||||
CopySendData(&length, sizeof(int32), fp);
|
||||
if (oids)
|
||||
CopySendData((char *) &tuple->t_data->t_oid, sizeof(int32), fp);
|
||||
|
||||
CopySendData(&null_ct, sizeof(int32), fp);
|
||||
if (null_ct > 0)
|
||||
{
|
||||
for (i = 0; i < attr_count; i++)
|
||||
{
|
||||
if (nulls[i] == 'n')
|
||||
{
|
||||
CopySendData(&i, sizeof(int32), fp);
|
||||
nulls[i] = ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
CopySendData((char *) tuple->t_data + tuple->t_data->t_hoff,
|
||||
length, fp);
|
||||
}
|
||||
if (!binary)
|
||||
CopySendChar('\n', fp);
|
||||
}
|
||||
|
||||
heap_endscan(scandesc);
|
||||
|
||||
if (binary)
|
||||
{
|
||||
/* Generate trailer for a binary copy */
|
||||
int16 fld_count = -1;
|
||||
|
||||
CopySendData(&fld_count, sizeof(int16), fp);
|
||||
}
|
||||
|
||||
pfree(out_functions);
|
||||
pfree(elements);
|
||||
pfree(isvarlena);
|
||||
pfree(typmod);
|
||||
if (binary)
|
||||
pfree(nulls);
|
||||
}
|
||||
|
||||
|
||||
@ -580,27 +601,20 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
AttrNumber attr_count;
|
||||
FmgrInfo *in_functions;
|
||||
Oid *elements;
|
||||
int32 *typmod;
|
||||
int i;
|
||||
Oid in_func_oid;
|
||||
Datum *values;
|
||||
char *nulls;
|
||||
bool isnull;
|
||||
int done = 0;
|
||||
char *string = NULL,
|
||||
*ptr;
|
||||
int32 len,
|
||||
null_ct,
|
||||
null_id;
|
||||
int32 ntuples,
|
||||
tuples_read = 0;
|
||||
bool reading_to_eof = true;
|
||||
char *string;
|
||||
ResultRelInfo *resultRelInfo;
|
||||
EState *estate = CreateExecutorState(); /* for ExecConstraints() */
|
||||
TupleTable tupleTable;
|
||||
TupleTableSlot *slot;
|
||||
Oid loaded_oid = InvalidOid;
|
||||
bool skip_tuple = false;
|
||||
bool file_has_oids;
|
||||
|
||||
tupDesc = RelationGetDescr(rel);
|
||||
attr = tupDesc->attrs;
|
||||
@ -630,31 +644,58 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
{
|
||||
in_functions = (FmgrInfo *) palloc(attr_count * sizeof(FmgrInfo));
|
||||
elements = (Oid *) palloc(attr_count * sizeof(Oid));
|
||||
typmod = (int32 *) palloc(attr_count * sizeof(int32));
|
||||
for (i = 0; i < attr_count; i++)
|
||||
{
|
||||
in_func_oid = (Oid) GetInputFunction(attr[i]->atttypid);
|
||||
fmgr_info(in_func_oid, &in_functions[i]);
|
||||
elements[i] = GetTypeElement(attr[i]->atttypid);
|
||||
typmod[i] = attr[i]->atttypmod;
|
||||
}
|
||||
file_has_oids = oids; /* must rely on user to tell us this... */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Read and verify binary header */
|
||||
char readSig[12];
|
||||
int32 tmp;
|
||||
|
||||
/* Signature */
|
||||
CopyGetData(readSig, 12, fp);
|
||||
if (CopyGetEof(fp) ||
|
||||
memcmp(readSig, BinarySignature, 12) != 0)
|
||||
elog(ERROR, "COPY BINARY: file signature not recognized");
|
||||
/* Integer layout field */
|
||||
CopyGetData(&tmp, sizeof(int32), fp);
|
||||
if (CopyGetEof(fp) ||
|
||||
tmp != 0x01020304)
|
||||
elog(ERROR, "COPY BINARY: incompatible integer layout");
|
||||
/* Flags field */
|
||||
CopyGetData(&tmp, sizeof(int32), fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: bogus file header (missing flags)");
|
||||
file_has_oids = (tmp & (1 << 16)) != 0;
|
||||
tmp &= ~ (1 << 16);
|
||||
if ((tmp >> 16) != 0)
|
||||
elog(ERROR, "COPY BINARY: unrecognized critical flags in header");
|
||||
/* Header extension length */
|
||||
CopyGetData(&tmp, sizeof(int32), fp);
|
||||
if (CopyGetEof(fp) ||
|
||||
tmp < 0)
|
||||
elog(ERROR, "COPY BINARY: bogus file header (missing length)");
|
||||
/* Skip extension header, if present */
|
||||
while (tmp-- > 0)
|
||||
{
|
||||
CopyGetData(readSig, 1, fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: bogus file header (wrong length)");
|
||||
}
|
||||
|
||||
in_functions = NULL;
|
||||
elements = NULL;
|
||||
typmod = NULL;
|
||||
CopyGetData(&ntuples, sizeof(int32), fp);
|
||||
if (ntuples != 0)
|
||||
reading_to_eof = false;
|
||||
}
|
||||
|
||||
values = (Datum *) palloc(attr_count * sizeof(Datum));
|
||||
nulls = (char *) palloc(attr_count * sizeof(char));
|
||||
|
||||
for (i = 0; i < attr_count; i++)
|
||||
nulls[i] = ' ';
|
||||
|
||||
lineno = 0;
|
||||
fe_eof = false;
|
||||
|
||||
@ -668,15 +709,22 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
|
||||
lineno++;
|
||||
|
||||
/* Initialize all values for row to NULL */
|
||||
MemSet(values, 0, attr_count * sizeof(Datum));
|
||||
MemSet(nulls, 'n', attr_count * sizeof(char));
|
||||
|
||||
if (!binary)
|
||||
{
|
||||
int newline = 0;
|
||||
|
||||
if (oids)
|
||||
if (file_has_oids)
|
||||
{
|
||||
string = CopyReadAttribute(fp, &isnull, delim, &newline, null_print);
|
||||
if (string == NULL)
|
||||
done = 1;
|
||||
string = CopyReadAttribute(fp, &isnull, delim,
|
||||
&newline, null_print);
|
||||
if (isnull)
|
||||
elog(ERROR, "COPY TEXT: NULL Oid");
|
||||
else if (string == NULL)
|
||||
done = 1; /* end of file */
|
||||
else
|
||||
{
|
||||
loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin,
|
||||
@ -685,22 +733,24 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
elog(ERROR, "COPY TEXT: Invalid Oid");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < attr_count && !done; i++)
|
||||
{
|
||||
string = CopyReadAttribute(fp, &isnull, delim, &newline, null_print);
|
||||
string = CopyReadAttribute(fp, &isnull, delim,
|
||||
&newline, null_print);
|
||||
if (isnull)
|
||||
{
|
||||
values[i] = PointerGetDatum(NULL);
|
||||
nulls[i] = 'n';
|
||||
/* already set values[i] and nulls[i] */
|
||||
}
|
||||
else if (string == NULL)
|
||||
done = 1;
|
||||
done = 1; /* end of file */
|
||||
else
|
||||
{
|
||||
values[i] = FunctionCall3(&in_functions[i],
|
||||
CStringGetDatum(string),
|
||||
ObjectIdGetDatum(elements[i]),
|
||||
Int32GetDatum(typmod[i]));
|
||||
Int32GetDatum(attr[i]->atttypmod));
|
||||
nulls[i] = ' ';
|
||||
}
|
||||
}
|
||||
if (!done)
|
||||
@ -708,47 +758,103 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
}
|
||||
else
|
||||
{ /* binary */
|
||||
CopyGetData(&len, sizeof(int32), fp);
|
||||
if (CopyGetEof(fp))
|
||||
int16 fld_count,
|
||||
fld_size;
|
||||
|
||||
CopyGetData(&fld_count, sizeof(int16), fp);
|
||||
if (CopyGetEof(fp) ||
|
||||
fld_count == -1)
|
||||
done = 1;
|
||||
else
|
||||
{
|
||||
if (oids)
|
||||
if (fld_count <= 0 || fld_count > attr_count)
|
||||
elog(ERROR, "COPY BINARY: tuple field count is %d, expected %d",
|
||||
(int) fld_count, attr_count);
|
||||
|
||||
if (file_has_oids)
|
||||
{
|
||||
CopyGetData(&loaded_oid, sizeof(int32), fp);
|
||||
CopyGetData(&fld_size, sizeof(int16), fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||
if (fld_size != (int16) sizeof(Oid))
|
||||
elog(ERROR, "COPY BINARY: sizeof(Oid) is %d, expected %d",
|
||||
(int) fld_size, (int) sizeof(Oid));
|
||||
CopyGetData(&loaded_oid, sizeof(Oid), fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||
if (loaded_oid == InvalidOid)
|
||||
elog(ERROR, "COPY BINARY: Invalid Oid");
|
||||
}
|
||||
CopyGetData(&null_ct, sizeof(int32), fp);
|
||||
if (null_ct > 0)
|
||||
|
||||
for (i = 0; i < (int) fld_count; i++)
|
||||
{
|
||||
for (i = 0; i < null_ct; i++)
|
||||
CopyGetData(&fld_size, sizeof(int16), fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||
if (fld_size == 0)
|
||||
continue; /* it's NULL; nulls[i] already set */
|
||||
if (fld_size != attr[i]->attlen)
|
||||
elog(ERROR, "COPY BINARY: sizeof(field %d) is %d, expected %d",
|
||||
i+1, (int) fld_size, (int) attr[i]->attlen);
|
||||
if (fld_size == -1)
|
||||
{
|
||||
CopyGetData(&null_id, sizeof(int32), fp);
|
||||
nulls[null_id] = 'n';
|
||||
/* varlena field */
|
||||
int32 varlena_size;
|
||||
Pointer varlena_ptr;
|
||||
|
||||
CopyGetData(&varlena_size, sizeof(int32), fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||
if (varlena_size < (int32) sizeof(int32))
|
||||
elog(ERROR, "COPY BINARY: bogus varlena length");
|
||||
varlena_ptr = (Pointer) palloc(varlena_size);
|
||||
VARATT_SIZEP(varlena_ptr) = varlena_size;
|
||||
CopyGetData(VARDATA(varlena_ptr),
|
||||
varlena_size - sizeof(int32),
|
||||
fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||
values[i] = PointerGetDatum(varlena_ptr);
|
||||
}
|
||||
}
|
||||
else if (!attr[i]->attbyval)
|
||||
{
|
||||
/* fixed-length pass-by-reference */
|
||||
Pointer refval_ptr;
|
||||
|
||||
string = (char *) palloc(len);
|
||||
CopyGetData(string, len, fp);
|
||||
Assert(fld_size > 0);
|
||||
refval_ptr = (Pointer) palloc(fld_size);
|
||||
CopyGetData(refval_ptr, fld_size, fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||
values[i] = PointerGetDatum(refval_ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* pass-by-value */
|
||||
Datum datumBuf;
|
||||
|
||||
ptr = string;
|
||||
/*
|
||||
* We need this horsing around because we don't know
|
||||
* how shorter data values are aligned within a Datum.
|
||||
*/
|
||||
Assert(fld_size > 0 && fld_size <= sizeof(Datum));
|
||||
CopyGetData(&datumBuf, fld_size, fp);
|
||||
if (CopyGetEof(fp))
|
||||
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||
values[i] = fetch_att(&datumBuf, true, fld_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < attr_count; i++)
|
||||
{
|
||||
if (nulls[i] == 'n')
|
||||
continue;
|
||||
ptr = (char *) att_align((long) ptr, attr[i]->attlen, attr[i]->attalign);
|
||||
values[i] = fetchatt(attr[i], ptr);
|
||||
ptr = att_addlength(ptr, attr[i]->attlen, ptr);
|
||||
nulls[i] = ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (done)
|
||||
continue;
|
||||
break;
|
||||
|
||||
tuple = heap_formtuple(tupDesc, values, nulls);
|
||||
if (oids)
|
||||
|
||||
if (oids && file_has_oids)
|
||||
tuple->t_data->t_oid = loaded_oid;
|
||||
|
||||
skip_tuple = false;
|
||||
@ -796,25 +902,13 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
ExecARInsertTriggers(rel, tuple);
|
||||
}
|
||||
|
||||
if (binary)
|
||||
pfree(string);
|
||||
|
||||
for (i = 0; i < attr_count; i++)
|
||||
{
|
||||
if (!attr[i]->attbyval && nulls[i] != 'n')
|
||||
{
|
||||
if (!binary)
|
||||
pfree((void *) values[i]);
|
||||
}
|
||||
/* reset nulls[] array for next time */
|
||||
nulls[i] = ' ';
|
||||
pfree(DatumGetPointer(values[i]));
|
||||
}
|
||||
|
||||
heap_freetuple(tuple);
|
||||
tuples_read++;
|
||||
|
||||
if (!reading_to_eof && ntuples == tuples_read)
|
||||
done = true;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -829,7 +923,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
||||
{
|
||||
pfree(in_functions);
|
||||
pfree(elements);
|
||||
pfree(typmod);
|
||||
}
|
||||
|
||||
ExecDropTupleTable(tupleTable, true);
|
||||
@ -1099,26 +1192,3 @@ CopyAttributeOut(FILE *fp, char *server_string, char *delim)
|
||||
pfree(string_start); /* pfree pg_server_to_client result */
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the number of tuples in a relation. Unfortunately, currently
|
||||
* must do a scan of the entire relation to determine this.
|
||||
*
|
||||
* relation is expected to be an open relation descriptor.
|
||||
*/
|
||||
static int
|
||||
CountTuples(Relation relation)
|
||||
{
|
||||
HeapScanDesc scandesc;
|
||||
HeapTuple tuple;
|
||||
|
||||
int i;
|
||||
|
||||
scandesc = heap_beginscan(relation, 0, QuerySnapshot, 0, NULL);
|
||||
|
||||
i = 0;
|
||||
while (HeapTupleIsValid(tuple = heap_getnext(scandesc, 0)))
|
||||
i++;
|
||||
heap_endscan(scandesc);
|
||||
return i;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user