mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Fix psql's \copy to accept table names containing schemas, as well as
a column list. Bring its parsing of quoted names and quoted strings somewhat up to speed --- I believe it now handles all non-error cases the same way the backend would, but weird boundary conditions are not necessarily done the same way.
This commit is contained in:
parent
4cff161703
commit
44dc9c1faa
@ -1,5 +1,5 @@
|
|||||||
<!--
|
<!--
|
||||||
$Header: /cvsroot/pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.78 2002/10/11 23:03:48 petere Exp $
|
$Header: /cvsroot/pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.79 2002/10/19 00:22:14 tgl Exp $
|
||||||
PostgreSQL documentation
|
PostgreSQL documentation
|
||||||
-->
|
-->
|
||||||
|
|
||||||
@ -692,6 +692,7 @@ testdb=>
|
|||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><literal>\copy <replaceable class="parameter">table</replaceable>
|
<term><literal>\copy <replaceable class="parameter">table</replaceable>
|
||||||
|
[ ( <replaceable class="parameter">column_list</replaceable> ) ]
|
||||||
{ <literal>from</literal> | <literal>to</literal> }
|
{ <literal>from</literal> | <literal>to</literal> }
|
||||||
<replaceable class="parameter">filename</replaceable> | stdin | stdout
|
<replaceable class="parameter">filename</replaceable> | stdin | stdout
|
||||||
[ <literal>with</literal> ]
|
[ <literal>with</literal> ]
|
||||||
@ -705,11 +706,12 @@ testdb=>
|
|||||||
Performs a frontend (client) copy. This is an operation that
|
Performs a frontend (client) copy. This is an operation that
|
||||||
runs an <acronym>SQL</acronym> <xref linkend="SQL-COPY"
|
runs an <acronym>SQL</acronym> <xref linkend="SQL-COPY"
|
||||||
endterm="SQL-COPY-title"> command, but instead of the backend's
|
endterm="SQL-COPY-title"> command, but instead of the backend's
|
||||||
reading or writing the specified file, and consequently
|
reading or writing the specified file,
|
||||||
requiring backend access and special user privilege, as well as
|
|
||||||
being bound to the file system accessible by the backend,
|
|
||||||
<application>psql</application> reads or writes the file and
|
<application>psql</application> reads or writes the file and
|
||||||
routes the data between the backend and the local file system.
|
routes the data between the backend and the local file system.
|
||||||
|
This means that file accessibility and privileges are those
|
||||||
|
of the local user, not the server, and no SQL superuser
|
||||||
|
privileges are required.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright 2000 by PostgreSQL Global Development Group
|
* Copyright 2000 by PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $Header: /cvsroot/pgsql/src/bin/psql/copy.c,v 1.27 2002/10/15 02:24:16 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/bin/psql/copy.c,v 1.28 2002/10/19 00:22:14 tgl Exp $
|
||||||
*/
|
*/
|
||||||
#include "postgres_fe.h"
|
#include "postgres_fe.h"
|
||||||
#include "copy.h"
|
#include "copy.h"
|
||||||
@ -38,11 +38,15 @@ bool copy_in_state;
|
|||||||
* parse_slash_copy
|
* parse_slash_copy
|
||||||
* -- parses \copy command line
|
* -- parses \copy command line
|
||||||
*
|
*
|
||||||
* Accepted syntax: \copy table|"table" [with oids] from|to filename|'filename' [with ] [ oids ] [ delimiter '<char>'] [ null as 'string' ]
|
* Accepted syntax: \copy table [(columnlist)] [with oids] from|to filename [with ] [ oids ] [ delimiter char] [ null as string ]
|
||||||
* (binary is not here yet)
|
* (binary is not here yet)
|
||||||
*
|
*
|
||||||
* Old syntax for backward compatibility: (2002-06-19):
|
* Old syntax for backward compatibility: (2002-06-19):
|
||||||
* \copy table|"table" [with oids] from|to filename|'filename' [ using delimiters '<char>'] [ with null as 'string' ]
|
* \copy table [(columnlist)] [with oids] from|to filename [ using delimiters char] [ with null as string ]
|
||||||
|
*
|
||||||
|
* table name can be double-quoted and can have a schema part.
|
||||||
|
* column names can be double-quoted.
|
||||||
|
* filename, char, and string can be single-quoted like SQL literals.
|
||||||
*
|
*
|
||||||
* returns a malloc'ed structure with the options, or NULL on parsing error
|
* returns a malloc'ed structure with the options, or NULL on parsing error
|
||||||
*/
|
*/
|
||||||
@ -50,6 +54,7 @@ bool copy_in_state;
|
|||||||
struct copy_options
|
struct copy_options
|
||||||
{
|
{
|
||||||
char *table;
|
char *table;
|
||||||
|
char *column_list;
|
||||||
char *file; /* NULL = stdin/stdout */
|
char *file; /* NULL = stdin/stdout */
|
||||||
bool from;
|
bool from;
|
||||||
bool binary;
|
bool binary;
|
||||||
@ -65,6 +70,7 @@ free_copy_options(struct copy_options * ptr)
|
|||||||
if (!ptr)
|
if (!ptr)
|
||||||
return;
|
return;
|
||||||
free(ptr->table);
|
free(ptr->table);
|
||||||
|
free(ptr->column_list);
|
||||||
free(ptr->file);
|
free(ptr->file);
|
||||||
free(ptr->delim);
|
free(ptr->delim);
|
||||||
free(ptr->null);
|
free(ptr->null);
|
||||||
@ -72,14 +78,32 @@ free_copy_options(struct copy_options * ptr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* catenate "more" onto "var", freeing the original value of *var */
|
||||||
|
static void
|
||||||
|
xstrcat(char **var, const char *more)
|
||||||
|
{
|
||||||
|
char *newvar;
|
||||||
|
|
||||||
|
newvar = (char *) malloc(strlen(*var) + strlen(more) + 1);
|
||||||
|
if (!newvar)
|
||||||
|
{
|
||||||
|
psql_error("out of memory\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
strcpy(newvar, *var);
|
||||||
|
strcat(newvar, more);
|
||||||
|
free(*var);
|
||||||
|
*var = newvar;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct copy_options *
|
static struct copy_options *
|
||||||
parse_slash_copy(const char *args)
|
parse_slash_copy(const char *args)
|
||||||
{
|
{
|
||||||
struct copy_options *result;
|
struct copy_options *result;
|
||||||
char *line;
|
char *line;
|
||||||
char *token;
|
char *token;
|
||||||
bool error = false;
|
const char *whitespace = " \t\n\r";
|
||||||
char quote;
|
|
||||||
|
|
||||||
if (args)
|
if (args)
|
||||||
line = xstrdup(args);
|
line = xstrdup(args);
|
||||||
@ -95,152 +119,183 @@ parse_slash_copy(const char *args)
|
|||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
token = strtokx(line, " \t\n\r", "\"", '\\', "e, NULL, pset.encoding);
|
token = strtokx(line, whitespace, ".,()", "\"",
|
||||||
|
0, false, pset.encoding);
|
||||||
if (!token)
|
if (!token)
|
||||||
error = true;
|
goto error;
|
||||||
else
|
|
||||||
{
|
|
||||||
#ifdef NOT_USED
|
#ifdef NOT_USED
|
||||||
/* this is not implemented yet */
|
/* this is not implemented yet */
|
||||||
if (!quote && strcasecmp(token, "binary") == 0)
|
if (strcasecmp(token, "binary") == 0)
|
||||||
|
{
|
||||||
|
result->binary = true;
|
||||||
|
token = strtokx(NULL, whitespace, ".,()", "\"",
|
||||||
|
0, false, pset.encoding);
|
||||||
|
if (!token)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
result->table = xstrdup(token);
|
||||||
|
|
||||||
|
token = strtokx(NULL, whitespace, ".,()", "\"",
|
||||||
|
0, false, pset.encoding);
|
||||||
|
if (!token)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* strtokx() will not have returned a multi-character token starting with
|
||||||
|
* '.', so we don't need strcmp() here. Likewise for '(', etc, below.
|
||||||
|
*/
|
||||||
|
if (token[0] == '.')
|
||||||
|
{
|
||||||
|
/* handle schema . table */
|
||||||
|
xstrcat(&result->table, token);
|
||||||
|
token = strtokx(NULL, whitespace, ".,()", "\"",
|
||||||
|
0, false, pset.encoding);
|
||||||
|
if (!token)
|
||||||
|
goto error;
|
||||||
|
xstrcat(&result->table, token);
|
||||||
|
token = strtokx(NULL, whitespace, ".,()", "\"",
|
||||||
|
0, false, pset.encoding);
|
||||||
|
if (!token)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token[0] == '(')
|
||||||
|
{
|
||||||
|
/* handle parenthesized column list */
|
||||||
|
result->column_list = xstrdup(token);
|
||||||
|
for (;;)
|
||||||
{
|
{
|
||||||
result->binary = true;
|
token = strtokx(NULL, whitespace, ".,()", "\"",
|
||||||
token = strtokx(NULL, " \t\n\r", "\"", '\\', "e, NULL, pset.encoding);
|
0, false, pset.encoding);
|
||||||
|
if (!token || strchr(".,()", token[0]))
|
||||||
|
goto error;
|
||||||
|
xstrcat(&result->column_list, token);
|
||||||
|
token = strtokx(NULL, whitespace, ".,()", "\"",
|
||||||
|
0, false, pset.encoding);
|
||||||
if (!token)
|
if (!token)
|
||||||
error = true;
|
goto error;
|
||||||
|
xstrcat(&result->column_list, token);
|
||||||
|
if (token[0] == ')')
|
||||||
|
break;
|
||||||
|
if (token[0] != ',')
|
||||||
|
goto error;
|
||||||
}
|
}
|
||||||
if (token)
|
token = strtokx(NULL, whitespace, ".,()", "\"",
|
||||||
#endif
|
0, false, pset.encoding);
|
||||||
result->table = xstrdup(token);
|
if (!token)
|
||||||
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_ASSERT_CHECKING
|
/*
|
||||||
assert(error || result->table);
|
* Allows old COPY syntax for backward compatibility
|
||||||
#endif
|
* 2002-06-19
|
||||||
|
*/
|
||||||
if (!error)
|
if (strcasecmp(token, "with") == 0)
|
||||||
{
|
{
|
||||||
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
|
token = strtokx(NULL, whitespace, NULL, NULL,
|
||||||
|
0, false, pset.encoding);
|
||||||
|
if (!token || strcasecmp(token, "oids") != 0)
|
||||||
|
goto error;
|
||||||
|
result->oids = true;
|
||||||
|
|
||||||
|
token = strtokx(NULL, whitespace, NULL, NULL,
|
||||||
|
0, false, pset.encoding);
|
||||||
if (!token)
|
if (!token)
|
||||||
error = true;
|
goto error;
|
||||||
else
|
}
|
||||||
|
|
||||||
|
if (strcasecmp(token, "from") == 0)
|
||||||
|
result->from = true;
|
||||||
|
else if (strcasecmp(token, "to") == 0)
|
||||||
|
result->from = false;
|
||||||
|
else
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
token = strtokx(NULL, whitespace, NULL, "'",
|
||||||
|
'\\', true, pset.encoding);
|
||||||
|
if (!token)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (strcasecmp(token, "stdin") == 0 ||
|
||||||
|
strcasecmp(token, "stdout") == 0)
|
||||||
|
result->file = NULL;
|
||||||
|
else
|
||||||
|
result->file = xstrdup(token);
|
||||||
|
|
||||||
|
token = strtokx(NULL, whitespace, NULL, NULL,
|
||||||
|
0, false, pset.encoding);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allows old COPY syntax for backward compatibility
|
||||||
|
* 2002-06-19
|
||||||
|
*/
|
||||||
|
if (token && strcasecmp(token, "using") == 0)
|
||||||
|
{
|
||||||
|
token = strtokx(NULL, whitespace, NULL, NULL,
|
||||||
|
0, false, pset.encoding);
|
||||||
|
if (!(token && strcasecmp(token, "delimiters") == 0))
|
||||||
|
goto error;
|
||||||
|
token = strtokx(NULL, whitespace, NULL, "'",
|
||||||
|
'\\', false, pset.encoding);
|
||||||
|
if (!token)
|
||||||
|
goto error;
|
||||||
|
result->delim = xstrdup(token);
|
||||||
|
token = strtokx(NULL, whitespace, NULL, NULL,
|
||||||
|
0, false, pset.encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token)
|
||||||
|
{
|
||||||
|
if (strcasecmp(token, "with") != 0)
|
||||||
|
goto error;
|
||||||
|
while ((token = strtokx(NULL, whitespace, NULL, NULL,
|
||||||
|
0, false, pset.encoding)) != NULL)
|
||||||
{
|
{
|
||||||
/*
|
if (strcasecmp(token, "delimiter") == 0)
|
||||||
* Allows old COPY syntax for backward compatibility
|
|
||||||
* 2002-06-19
|
|
||||||
*/
|
|
||||||
if (strcasecmp(token, "with") == 0)
|
|
||||||
{
|
{
|
||||||
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
|
token = strtokx(NULL, whitespace, NULL, "'",
|
||||||
if (!token || strcasecmp(token, "oids") != 0)
|
'\\', false, pset.encoding);
|
||||||
error = true;
|
if (token && strcasecmp(token, "as") == 0)
|
||||||
|
token = strtokx(NULL, whitespace, NULL, "'",
|
||||||
|
'\\', false, pset.encoding);
|
||||||
|
if (token)
|
||||||
|
result->delim = xstrdup(token);
|
||||||
else
|
else
|
||||||
result->oids = true;
|
goto error;
|
||||||
|
}
|
||||||
if (!error)
|
else if (strcasecmp(token, "null") == 0)
|
||||||
{
|
{
|
||||||
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
|
token = strtokx(NULL, whitespace, NULL, "'",
|
||||||
if (!token)
|
'\\', false, pset.encoding);
|
||||||
error = true;
|
if (token && strcasecmp(token, "as") == 0)
|
||||||
}
|
token = strtokx(NULL, whitespace, NULL, "'",
|
||||||
|
'\\', false, pset.encoding);
|
||||||
|
if (token)
|
||||||
|
result->null = xstrdup(token);
|
||||||
|
else
|
||||||
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!error && strcasecmp(token, "from") == 0)
|
|
||||||
result->from = true;
|
|
||||||
else if (!error && strcasecmp(token, "to") == 0)
|
|
||||||
result->from = false;
|
|
||||||
else
|
else
|
||||||
error = true;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!error)
|
|
||||||
{
|
|
||||||
token = strtokx(NULL, " \t\n\r", "'", '\\', "e, NULL, pset.encoding);
|
|
||||||
if (!token)
|
|
||||||
error = true;
|
|
||||||
else if (!quote && (strcasecmp(token, "stdin") == 0 || strcasecmp(token, "stdout") == 0))
|
|
||||||
result->file = NULL;
|
|
||||||
else
|
|
||||||
result->file = xstrdup(token);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!error)
|
|
||||||
{
|
|
||||||
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
|
|
||||||
if (token)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Allows old COPY syntax for backward compatibility
|
|
||||||
* 2002-06-19
|
|
||||||
*/
|
|
||||||
if (strcasecmp(token, "using") == 0)
|
|
||||||
{
|
|
||||||
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
|
|
||||||
if (token && strcasecmp(token, "delimiters") == 0)
|
|
||||||
{
|
|
||||||
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
|
|
||||||
if (token)
|
|
||||||
{
|
|
||||||
result->delim = xstrdup(token);
|
|
||||||
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
error = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
error = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!error && token)
|
|
||||||
{
|
|
||||||
if (strcasecmp(token, "with") == 0)
|
|
||||||
{
|
|
||||||
while (!error && (token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding)))
|
|
||||||
{
|
|
||||||
if (strcasecmp(token, "delimiter") == 0)
|
|
||||||
{
|
|
||||||
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
|
|
||||||
if (token && strcasecmp(token, "as") == 0)
|
|
||||||
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
|
|
||||||
if (token)
|
|
||||||
result->delim = xstrdup(token);
|
|
||||||
else
|
|
||||||
error = true;
|
|
||||||
}
|
|
||||||
else if (strcasecmp(token, "null") == 0)
|
|
||||||
{
|
|
||||||
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
|
|
||||||
if (token && strcasecmp(token, "as") == 0)
|
|
||||||
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
|
|
||||||
if (token)
|
|
||||||
result->null = xstrdup(token);
|
|
||||||
else
|
|
||||||
error = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
error = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
error = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(line);
|
free(line);
|
||||||
|
|
||||||
if (error)
|
return result;
|
||||||
{
|
|
||||||
if (token)
|
error:
|
||||||
psql_error("\\copy: parse error at '%s'\n", token);
|
if (token)
|
||||||
else
|
psql_error("\\copy: parse error at '%s'\n", token);
|
||||||
psql_error("\\copy: parse error at end of line\n");
|
|
||||||
free_copy_options(result);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
return result;
|
psql_error("\\copy: parse error at end of line\n");
|
||||||
|
free_copy_options(result);
|
||||||
|
free(line);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -272,7 +327,11 @@ do_copy(const char *args)
|
|||||||
if (options->binary)
|
if (options->binary)
|
||||||
appendPQExpBuffer(&query, "BINARY ");
|
appendPQExpBuffer(&query, "BINARY ");
|
||||||
|
|
||||||
appendPQExpBuffer(&query, "\"%s\" ", options->table);
|
appendPQExpBuffer(&query, "%s ", options->table);
|
||||||
|
|
||||||
|
if (options->column_list)
|
||||||
|
appendPQExpBuffer(&query, "%s ", options->column_list);
|
||||||
|
|
||||||
/* Uses old COPY syntax for backward compatibility 2002-06-19 */
|
/* Uses old COPY syntax for backward compatibility 2002-06-19 */
|
||||||
if (options->oids)
|
if (options->oids)
|
||||||
appendPQExpBuffer(&query, "WITH OIDS ");
|
appendPQExpBuffer(&query, "WITH OIDS ");
|
||||||
@ -285,10 +344,22 @@ do_copy(const char *args)
|
|||||||
|
|
||||||
/* Uses old COPY syntax for backward compatibility 2002-06-19 */
|
/* Uses old COPY syntax for backward compatibility 2002-06-19 */
|
||||||
if (options->delim)
|
if (options->delim)
|
||||||
appendPQExpBuffer(&query, " USING DELIMITERS '%s'", options->delim);
|
{
|
||||||
|
if (options->delim[0] == '\'')
|
||||||
|
appendPQExpBuffer(&query, " USING DELIMITERS %s",
|
||||||
|
options->delim);
|
||||||
|
else
|
||||||
|
appendPQExpBuffer(&query, " USING DELIMITERS '%s'",
|
||||||
|
options->delim);
|
||||||
|
}
|
||||||
|
|
||||||
if (options->null)
|
if (options->null)
|
||||||
appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
|
{
|
||||||
|
if (options->null[0] == '\'')
|
||||||
|
appendPQExpBuffer(&query, " WITH NULL AS %s", options->null);
|
||||||
|
else
|
||||||
|
appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
|
||||||
|
}
|
||||||
|
|
||||||
if (options->from)
|
if (options->from)
|
||||||
{
|
{
|
||||||
|
@ -1,45 +1,61 @@
|
|||||||
/*
|
/*
|
||||||
* psql - the PostgreSQL interactive terminal
|
* psql - the PostgreSQL interactive terminal
|
||||||
*
|
*
|
||||||
* Copyright 2000 by PostgreSQL Global Development Group
|
* Copyright 2000-2002 by PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.30 2002/08/27 20:16:49 petere Exp $
|
* $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.31 2002/10/19 00:22:14 tgl Exp $
|
||||||
*/
|
*/
|
||||||
#include "postgres_fe.h"
|
#include "postgres_fe.h"
|
||||||
#include "stringutils.h"
|
|
||||||
#include "settings.h"
|
|
||||||
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
#include "libpq-fe.h"
|
#include "libpq-fe.h"
|
||||||
|
#include "settings.h"
|
||||||
|
#include "stringutils.h"
|
||||||
|
|
||||||
|
|
||||||
|
static void strip_quotes(char *source, char quote, char escape, int encoding);
|
||||||
static void unescape_quotes(char *source, int quote, int escape);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Replacement for strtok() (a.k.a. poor man's flex)
|
* Replacement for strtok() (a.k.a. poor man's flex)
|
||||||
*
|
*
|
||||||
* The calling convention is similar to that of strtok.
|
* Splits a string into tokens, returning one token per call, then NULL
|
||||||
|
* when no more tokens exist in the given string.
|
||||||
|
*
|
||||||
|
* The calling convention is similar to that of strtok, but with more
|
||||||
|
* frammishes.
|
||||||
|
*
|
||||||
* s - string to parse, if NULL continue parsing the last string
|
* s - string to parse, if NULL continue parsing the last string
|
||||||
* delim - set of characters that delimit tokens (usually whitespace)
|
* whitespace - set of whitespace characters that separate tokens
|
||||||
* quote - set of characters that quote stuff, they're not part of the token
|
* delim - set of non-whitespace separator characters (or NULL)
|
||||||
* escape - character than can quote quotes
|
* quote - set of characters that can quote a token (NULL if none)
|
||||||
* was_quoted - if not NULL, stores the quoting character if any was encountered
|
* escape - character that can quote quotes (0 if none)
|
||||||
* token_pos - if not NULL, receives a count to the start of the token in the
|
* del_quotes - if TRUE, strip quotes from the returned token, else return
|
||||||
* parsed string
|
* it exactly as found in the string
|
||||||
|
* encoding - the active character-set encoding
|
||||||
|
*
|
||||||
|
* Characters in 'delim', if any, will be returned as single-character
|
||||||
|
* tokens unless part of a quoted token.
|
||||||
|
*
|
||||||
|
* Double occurences of the quoting character are always taken to represent
|
||||||
|
* a single quote character in the data. If escape isn't 0, then escape
|
||||||
|
* followed by anything (except \0) is a data character too.
|
||||||
*
|
*
|
||||||
* Note that the string s is _not_ overwritten in this implementation.
|
* Note that the string s is _not_ overwritten in this implementation.
|
||||||
|
*
|
||||||
|
* NB: it's okay to vary delim, quote, and escape from one call to the
|
||||||
|
* next on a single source string, but changing whitespace is a bad idea
|
||||||
|
* since you might lose data.
|
||||||
*/
|
*/
|
||||||
char *
|
char *
|
||||||
strtokx(const char *s,
|
strtokx(const char *s,
|
||||||
|
const char *whitespace,
|
||||||
const char *delim,
|
const char *delim,
|
||||||
const char *quote,
|
const char *quote,
|
||||||
int escape,
|
char escape,
|
||||||
char *was_quoted,
|
bool del_quotes,
|
||||||
unsigned int *token_pos,
|
|
||||||
int encoding)
|
int encoding)
|
||||||
{
|
{
|
||||||
static char *storage = NULL;/* store the local copy of the users
|
static char *storage = NULL;/* store the local copy of the users
|
||||||
@ -50,23 +66,32 @@ strtokx(const char *s,
|
|||||||
/* variously abused variables: */
|
/* variously abused variables: */
|
||||||
unsigned int offset;
|
unsigned int offset;
|
||||||
char *start;
|
char *start;
|
||||||
char *cp = NULL;
|
char *p;
|
||||||
|
|
||||||
if (s)
|
if (s)
|
||||||
{
|
{
|
||||||
free(storage);
|
free(storage);
|
||||||
storage = strdup(s);
|
/*
|
||||||
|
* We may need extra space to insert delimiter nulls for adjacent
|
||||||
|
* tokens. 2X the space is a gross overestimate, but it's
|
||||||
|
* unlikely that this code will be used on huge strings anyway.
|
||||||
|
*/
|
||||||
|
storage = (char *) malloc(2 * strlen(s) + 1);
|
||||||
|
if (!storage)
|
||||||
|
return NULL; /* really "out of memory" */
|
||||||
|
strcpy(storage, s);
|
||||||
string = storage;
|
string = storage;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!storage)
|
if (!storage)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* skip leading "whitespace" */
|
/* skip leading whitespace */
|
||||||
offset = strspn(string, delim);
|
offset = strspn(string, whitespace);
|
||||||
|
start = &string[offset];
|
||||||
|
|
||||||
/* end of string reached */
|
/* end of string reached? */
|
||||||
if (string[offset] == '\0')
|
if (*start == '\0')
|
||||||
{
|
{
|
||||||
/* technically we don't need to free here, but we're nice */
|
/* technically we don't need to free here, but we're nice */
|
||||||
free(storage);
|
free(storage);
|
||||||
@ -75,118 +100,165 @@ strtokx(const char *s,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* test if quoting character */
|
/* test if delimiter character */
|
||||||
if (quote)
|
if (delim && strchr(delim, *start))
|
||||||
cp = strchr(quote, string[offset]);
|
|
||||||
|
|
||||||
if (cp)
|
|
||||||
{
|
{
|
||||||
/* okay, we have a quoting character, now scan for the closer */
|
/*
|
||||||
char *p;
|
* If not at end of string, we need to insert a null to terminate
|
||||||
|
* the returned token. We can just overwrite the next character
|
||||||
start = &string[offset + 1];
|
* if it happens to be in the whitespace set ... otherwise move over
|
||||||
|
* the rest of the string to make room. (This is why we allocated
|
||||||
if (token_pos)
|
* extra space above).
|
||||||
*token_pos = start - storage;
|
*/
|
||||||
|
p = start + 1;
|
||||||
for (p = start;
|
|
||||||
*p && (*p != *cp || *(p - 1) == escape);
|
|
||||||
p += PQmblen(p, encoding)
|
|
||||||
);
|
|
||||||
|
|
||||||
/* not yet end of string? */
|
|
||||||
if (*p != '\0')
|
if (*p != '\0')
|
||||||
{
|
{
|
||||||
|
if (!strchr(whitespace, *p))
|
||||||
|
memmove(p + 1, p, strlen(p) + 1);
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
string = p + 1;
|
string = p + 1;
|
||||||
if (was_quoted)
|
|
||||||
*was_quoted = *cp;
|
|
||||||
unescape_quotes(start, *cp, escape);
|
|
||||||
return start;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (was_quoted)
|
/* at end of string, so no extra work */
|
||||||
*was_quoted = *cp;
|
|
||||||
string = p;
|
string = p;
|
||||||
|
|
||||||
unescape_quotes(start, *cp, escape);
|
|
||||||
return start;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* otherwise no quoting character. scan till next delimiter */
|
|
||||||
start = &string[offset];
|
|
||||||
|
|
||||||
if (token_pos)
|
|
||||||
*token_pos = start - storage;
|
|
||||||
|
|
||||||
offset = strcspn(start, delim);
|
|
||||||
if (was_quoted)
|
|
||||||
*was_quoted = 0;
|
|
||||||
|
|
||||||
if (start[offset] != '\0')
|
|
||||||
{
|
|
||||||
start[offset] = '\0';
|
|
||||||
string = &start[offset] + 1;
|
|
||||||
|
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* test if quoting character */
|
||||||
|
if (quote && strchr(quote, *start))
|
||||||
|
{
|
||||||
|
/* okay, we have a quoted token, now scan for the closer */
|
||||||
|
char thisquote = *start;
|
||||||
|
|
||||||
|
for (p = start + 1; *p; p += PQmblen(p, encoding))
|
||||||
|
{
|
||||||
|
if (*p == escape && p[1] != '\0')
|
||||||
|
p++; /* process escaped anything */
|
||||||
|
else if (*p == thisquote && p[1] == thisquote)
|
||||||
|
p++; /* process doubled quote */
|
||||||
|
else if (*p == thisquote)
|
||||||
|
{
|
||||||
|
p++; /* skip trailing quote */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If not at end of string, we need to insert a null to terminate
|
||||||
|
* the returned token. See notes above.
|
||||||
|
*/
|
||||||
|
if (*p != '\0')
|
||||||
|
{
|
||||||
|
if (!strchr(whitespace, *p))
|
||||||
|
memmove(p + 1, p, strlen(p) + 1);
|
||||||
|
*p = '\0';
|
||||||
|
string = p + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* at end of string, so no extra work */
|
||||||
|
string = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clean up the token if caller wants that */
|
||||||
|
if (del_quotes)
|
||||||
|
strip_quotes(start, thisquote, escape, encoding);
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Otherwise no quoting character. Scan till next whitespace,
|
||||||
|
* delimiter or quote. NB: at this point, *start is known not to be
|
||||||
|
* '\0', whitespace, delim, or quote, so we will consume at least
|
||||||
|
* one character.
|
||||||
|
*/
|
||||||
|
offset = strcspn(start, whitespace);
|
||||||
|
|
||||||
|
if (delim)
|
||||||
|
{
|
||||||
|
unsigned int offset2 = strcspn(start, delim);
|
||||||
|
|
||||||
|
if (offset > offset2)
|
||||||
|
offset = offset2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (quote)
|
||||||
|
{
|
||||||
|
unsigned int offset2 = strcspn(start, quote);
|
||||||
|
|
||||||
|
if (offset > offset2)
|
||||||
|
offset = offset2;
|
||||||
|
}
|
||||||
|
|
||||||
|
p = start + offset;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If not at end of string, we need to insert a null to terminate
|
||||||
|
* the returned token. See notes above.
|
||||||
|
*/
|
||||||
|
if (*p != '\0')
|
||||||
|
{
|
||||||
|
if (!strchr(whitespace, *p))
|
||||||
|
memmove(p + 1, p, strlen(p) + 1);
|
||||||
|
*p = '\0';
|
||||||
|
string = p + 1;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
string = &start[offset];
|
/* at end of string, so no extra work */
|
||||||
return start;
|
string = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* unescape_quotes
|
* strip_quotes
|
||||||
*
|
*
|
||||||
* Resolves escaped quotes. Used by strtokx above.
|
* Remove quotes from the string at *source. Leading and trailing occurrences
|
||||||
|
* of 'quote' are removed; embedded double occurrences of 'quote' are reduced
|
||||||
|
* to single occurrences; if 'escape' is not 0 then 'escape' removes special
|
||||||
|
* significance of next character.
|
||||||
|
*
|
||||||
|
* Note that the source string is overwritten in-place.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
unescape_quotes(char *source, int quote, int escape)
|
strip_quotes(char *source, char quote, char escape, int encoding)
|
||||||
{
|
{
|
||||||
char *p;
|
char *src;
|
||||||
char *destination,
|
char *dst;
|
||||||
*tmp;
|
|
||||||
|
|
||||||
#ifdef USE_ASSERT_CHECKING
|
#ifdef USE_ASSERT_CHECKING
|
||||||
assert(source);
|
assert(source);
|
||||||
|
assert(quote);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
destination = calloc(1, strlen(source) + 1);
|
src = dst = source;
|
||||||
if (!destination)
|
|
||||||
|
if (*src && *src == quote)
|
||||||
|
src++; /* skip leading quote */
|
||||||
|
|
||||||
|
while (*src)
|
||||||
{
|
{
|
||||||
perror("calloc");
|
char c = *src;
|
||||||
exit(EXIT_FAILURE);
|
int i;
|
||||||
|
|
||||||
|
if (c == quote && src[1] == '\0')
|
||||||
|
break; /* skip trailing quote */
|
||||||
|
else if (c == quote && src[1] == quote)
|
||||||
|
src++; /* process doubled quote */
|
||||||
|
else if (c == escape && src[1] != '\0')
|
||||||
|
src++; /* process escaped character */
|
||||||
|
|
||||||
|
i = PQmblen(src, encoding);
|
||||||
|
while (i--)
|
||||||
|
*dst++ = *src++;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp = destination;
|
*dst = '\0';
|
||||||
|
|
||||||
for (p = source; *p; p++)
|
|
||||||
{
|
|
||||||
char c;
|
|
||||||
|
|
||||||
if (*p == escape && *(p + 1) && quote == *(p + 1))
|
|
||||||
{
|
|
||||||
c = *(p + 1);
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
c = *p;
|
|
||||||
|
|
||||||
*tmp = c;
|
|
||||||
tmp++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Terminating null character */
|
|
||||||
*tmp = '\0';
|
|
||||||
|
|
||||||
strcpy(source, destination);
|
|
||||||
|
|
||||||
free(destination);
|
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
/*
|
/*
|
||||||
* psql - the PostgreSQL interactive terminal
|
* psql - the PostgreSQL interactive terminal
|
||||||
*
|
*
|
||||||
* Copyright 2000 by PostgreSQL Global Development Group
|
* Copyright 2000-2002 by PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $Header: /cvsroot/pgsql/src/bin/psql/stringutils.h,v 1.17 2001/11/05 17:46:31 momjian Exp $
|
* $Id: stringutils.h,v 1.18 2002/10/19 00:22:14 tgl Exp $
|
||||||
*/
|
*/
|
||||||
#ifndef STRINGUTILS_H
|
#ifndef STRINGUTILS_H
|
||||||
#define STRINGUTILS_H
|
#define STRINGUTILS_H
|
||||||
@ -11,11 +11,11 @@
|
|||||||
/* The cooler version of strtok() which knows about quotes and doesn't
|
/* The cooler version of strtok() which knows about quotes and doesn't
|
||||||
* overwrite your input */
|
* overwrite your input */
|
||||||
extern char *strtokx(const char *s,
|
extern char *strtokx(const char *s,
|
||||||
|
const char *whitespace,
|
||||||
const char *delim,
|
const char *delim,
|
||||||
const char *quote,
|
const char *quote,
|
||||||
int escape,
|
char escape,
|
||||||
char *was_quoted,
|
bool del_quotes,
|
||||||
unsigned int *token_pos,
|
|
||||||
int encoding);
|
int encoding);
|
||||||
|
|
||||||
#endif /* STRINGUTILS_H */
|
#endif /* STRINGUTILS_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user