mirror of
https://github.com/postgres/postgres.git
synced 2025-06-29 10:41:53 +03:00
Add pg_read_binary_file() and whole-file-at-once versions of pg_read_file().
One of the usages of the binary version is to read files in a different encoding from the server encoding. Dimitri Fontaine and Itagaki Takahiro.
This commit is contained in:
@ -14449,11 +14449,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
|
|||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>
|
<entry>
|
||||||
<literal><function>pg_read_file(<parameter>filename</> <type>text</>, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>)</function></literal>
|
<literal><function>pg_read_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
|
||||||
</entry>
|
</entry>
|
||||||
<entry><type>text</type></entry>
|
<entry><type>text</type></entry>
|
||||||
<entry>Return the contents of a text file</entry>
|
<entry>Return the contents of a text file</entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<literal><function>pg_read_binary_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>bytea</type></entry>
|
||||||
|
<entry>Return the contents of a file</entry>
|
||||||
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>
|
<entry>
|
||||||
<literal><function>pg_stat_file(<parameter>filename</> <type>text</>)</function></literal>
|
<literal><function>pg_stat_file(<parameter>filename</> <type>text</>)</function></literal>
|
||||||
@ -14482,6 +14489,22 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
|
|||||||
at the given <parameter>offset</>, returning at most <parameter>length</>
|
at the given <parameter>offset</>, returning at most <parameter>length</>
|
||||||
bytes (less if the end of file is reached first). If <parameter>offset</>
|
bytes (less if the end of file is reached first). If <parameter>offset</>
|
||||||
is negative, it is relative to the end of the file.
|
is negative, it is relative to the end of the file.
|
||||||
|
When <parameter>offset</> and <parameter>length</> parameters are omitted,
|
||||||
|
it returns the whole of the file.
|
||||||
|
The part of a file must be a valid text in the server encoding.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>pg_read_binary_file</primary>
|
||||||
|
</indexterm>
|
||||||
|
<para>
|
||||||
|
<function>pg_read_binary_file</> returns part of a file as like as
|
||||||
|
<function>pg_read_file</>, but the result is a bytea value.
|
||||||
|
One of the usages is to read a file in the specified encoding combined with
|
||||||
|
<function>convert_from</> function:
|
||||||
|
<programlisting>
|
||||||
|
SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
|
||||||
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
|
@ -80,15 +80,14 @@ convert_and_check_filename(text *arg)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read a section of a file, returning it as text
|
* Read a section of a file, returning it as bytea
|
||||||
|
*
|
||||||
|
* We read the whole of the file when bytes_to_read is nagative.
|
||||||
*/
|
*/
|
||||||
Datum
|
static bytea *
|
||||||
pg_read_file(PG_FUNCTION_ARGS)
|
read_binary_file(text *filename_t, int64 seek_offset, int64 bytes_to_read)
|
||||||
{
|
{
|
||||||
text *filename_t = PG_GETARG_TEXT_P(0);
|
bytea *buf;
|
||||||
int64 seek_offset = PG_GETARG_INT64(1);
|
|
||||||
int64 bytes_to_read = PG_GETARG_INT64(2);
|
|
||||||
char *buf;
|
|
||||||
size_t nbytes;
|
size_t nbytes;
|
||||||
FILE *file;
|
FILE *file;
|
||||||
char *filename;
|
char *filename;
|
||||||
@ -100,6 +99,29 @@ pg_read_file(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
filename = convert_and_check_filename(filename_t);
|
filename = convert_and_check_filename(filename_t);
|
||||||
|
|
||||||
|
if (bytes_to_read < 0)
|
||||||
|
{
|
||||||
|
if (seek_offset < 0)
|
||||||
|
bytes_to_read = -seek_offset;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
struct stat fst;
|
||||||
|
|
||||||
|
if (stat(filename, &fst) < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not stat file \"%s\": %m", filename)));
|
||||||
|
|
||||||
|
bytes_to_read = fst.st_size - seek_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* not sure why anyone thought that int64 length was a good idea */
|
||||||
|
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("requested length too large")));
|
||||||
|
|
||||||
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
|
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
@ -112,18 +134,7 @@ pg_read_file(PG_FUNCTION_ARGS)
|
|||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not seek in file \"%s\": %m", filename)));
|
errmsg("could not seek in file \"%s\": %m", filename)));
|
||||||
|
|
||||||
if (bytes_to_read < 0)
|
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
||||||
errmsg("requested length cannot be negative")));
|
|
||||||
|
|
||||||
/* not sure why anyone thought that int64 length was a good idea */
|
|
||||||
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
||||||
errmsg("requested length too large")));
|
|
||||||
|
|
||||||
buf = palloc((Size) bytes_to_read + VARHDRSZ);
|
|
||||||
|
|
||||||
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
|
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
|
||||||
|
|
||||||
@ -132,15 +143,86 @@ pg_read_file(PG_FUNCTION_ARGS)
|
|||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not read file \"%s\": %m", filename)));
|
errmsg("could not read file \"%s\": %m", filename)));
|
||||||
|
|
||||||
/* Make sure the input is valid */
|
|
||||||
pg_verifymbstr(VARDATA(buf), nbytes, false);
|
|
||||||
|
|
||||||
SET_VARSIZE(buf, nbytes + VARHDRSZ);
|
SET_VARSIZE(buf, nbytes + VARHDRSZ);
|
||||||
|
|
||||||
FreeFile(file);
|
FreeFile(file);
|
||||||
pfree(filename);
|
pfree(filename);
|
||||||
|
|
||||||
PG_RETURN_TEXT_P(buf);
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In addition to read_binary_file, verify whether the contents are encoded
|
||||||
|
* in the database encoding.
|
||||||
|
*/
|
||||||
|
static text *
|
||||||
|
read_text_file(text *filename, int64 seek_offset, int64 bytes_to_read)
|
||||||
|
{
|
||||||
|
bytea *buf = read_binary_file(filename, seek_offset, bytes_to_read);
|
||||||
|
|
||||||
|
/* Make sure the input is valid */
|
||||||
|
pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
|
||||||
|
|
||||||
|
/* OK, we can cast it as text safely */
|
||||||
|
return (text *) buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read a section of a file, returning it as text
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
pg_read_file(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||||
|
int64 seek_offset = PG_GETARG_INT64(1);
|
||||||
|
int64 bytes_to_read = PG_GETARG_INT64(2);
|
||||||
|
|
||||||
|
if (bytes_to_read < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("requested length cannot be negative")));
|
||||||
|
|
||||||
|
PG_RETURN_TEXT_P(read_text_file(filename_t, seek_offset, bytes_to_read));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the whole of a file, returning it as text
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
pg_read_file_all(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||||
|
|
||||||
|
PG_RETURN_TEXT_P(read_text_file(filename_t, 0, -1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read a section of a file, returning it as bytea
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
pg_read_binary_file(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||||
|
int64 seek_offset = PG_GETARG_INT64(1);
|
||||||
|
int64 bytes_to_read = PG_GETARG_INT64(2);
|
||||||
|
|
||||||
|
if (bytes_to_read < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("requested length cannot be negative")));
|
||||||
|
|
||||||
|
PG_RETURN_BYTEA_P(read_binary_file(filename_t, seek_offset, bytes_to_read));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the whole of a file, returning it as bytea
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
pg_read_binary_file_all(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||||
|
|
||||||
|
PG_RETURN_BYTEA_P(read_binary_file(filename_t, 0, -1));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -53,6 +53,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* yyyymmddN */
|
/* yyyymmddN */
|
||||||
#define CATALOG_VERSION_NO 201012131
|
#define CATALOG_VERSION_NO 201012161
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -3403,6 +3403,12 @@ DATA(insert OID = 2623 ( pg_stat_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2249
|
|||||||
DESCR("return file information");
|
DESCR("return file information");
|
||||||
DATA(insert OID = 2624 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 25 "25 20 20" _null_ _null_ _null_ _null_ pg_read_file _null_ _null_ _null_ ));
|
DATA(insert OID = 2624 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 25 "25 20 20" _null_ _null_ _null_ _null_ pg_read_file _null_ _null_ _null_ ));
|
||||||
DESCR("read text from a file");
|
DESCR("read text from a file");
|
||||||
|
DATA(insert OID = 3826 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_read_file_all _null_ _null_ _null_ ));
|
||||||
|
DESCR("read text from a file");
|
||||||
|
DATA(insert OID = 3827 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 17 "25 20 20" _null_ _null_ _null_ _null_ pg_read_binary_file _null_ _null_ _null_ ));
|
||||||
|
DESCR("read bytea from a file");
|
||||||
|
DATA(insert OID = 3828 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 17 "25" _null_ _null_ _null_ _null_ pg_read_binary_file_all _null_ _null_ _null_ ));
|
||||||
|
DESCR("read bytea from a file");
|
||||||
DATA(insert OID = 2625 ( pg_ls_dir PGNSP PGUID 12 1 1000 0 f f f t t v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_ls_dir _null_ _null_ _null_ ));
|
DATA(insert OID = 2625 ( pg_ls_dir PGNSP PGUID 12 1 1000 0 f f f t t v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_ls_dir _null_ _null_ _null_ ));
|
||||||
DESCR("list all files in a directory");
|
DESCR("list all files in a directory");
|
||||||
DATA(insert OID = 2626 ( pg_sleep PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "701" _null_ _null_ _null_ _null_ pg_sleep _null_ _null_ _null_ ));
|
DATA(insert OID = 2626 ( pg_sleep PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "701" _null_ _null_ _null_ _null_ pg_sleep _null_ _null_ _null_ ));
|
||||||
|
@ -442,6 +442,9 @@ extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
|
|||||||
/* genfile.c */
|
/* genfile.c */
|
||||||
extern Datum pg_stat_file(PG_FUNCTION_ARGS);
|
extern Datum pg_stat_file(PG_FUNCTION_ARGS);
|
||||||
extern Datum pg_read_file(PG_FUNCTION_ARGS);
|
extern Datum pg_read_file(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum pg_read_file_all(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum pg_read_binary_file(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum pg_read_binary_file_all(PG_FUNCTION_ARGS);
|
||||||
extern Datum pg_ls_dir(PG_FUNCTION_ARGS);
|
extern Datum pg_ls_dir(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
/* misc.c */
|
/* misc.c */
|
||||||
|
Reference in New Issue
Block a user