1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-19 13:42:17 +03:00

Fix psql's \copy to accept table names containing schemas, as well as

a column list.  Bring its parsing of quoted names and quoted strings
somewhat up to speed --- I believe it now handles all non-error cases
the same way the backend would, but weird boundary conditions are not
necessarily done the same way.
This commit is contained in:
Tom Lane
2002-10-19 00:22:14 +00:00
parent 4cff161703
commit 44dc9c1faa
4 changed files with 393 additions and 248 deletions

View File

@@ -3,7 +3,7 @@
*
* Copyright 2000 by PostgreSQL Global Development Group
*
* $Header: /cvsroot/pgsql/src/bin/psql/copy.c,v 1.27 2002/10/15 02:24:16 tgl Exp $
* $Header: /cvsroot/pgsql/src/bin/psql/copy.c,v 1.28 2002/10/19 00:22:14 tgl Exp $
*/
#include "postgres_fe.h"
#include "copy.h"
@@ -38,11 +38,15 @@ bool copy_in_state;
* parse_slash_copy
* -- parses \copy command line
*
* Accepted syntax: \copy table|"table" [with oids] from|to filename|'filename' [with ] [ oids ] [ delimiter '<char>'] [ null as 'string' ]
* Accepted syntax: \copy table [(columnlist)] [with oids] from|to filename [with ] [ oids ] [ delimiter char] [ null as string ]
* (binary is not here yet)
*
* Old syntax for backward compatibility: (2002-06-19):
* \copy table|"table" [with oids] from|to filename|'filename' [ using delimiters '<char>'] [ with null as 'string' ]
* \copy table [(columnlist)] [with oids] from|to filename [ using delimiters char] [ with null as string ]
*
* table name can be double-quoted and can have a schema part.
* column names can be double-quoted.
* filename, char, and string can be single-quoted like SQL literals.
*
* returns a malloc'ed structure with the options, or NULL on parsing error
*/
@@ -50,6 +54,7 @@ bool copy_in_state;
struct copy_options
{
char *table;
char *column_list;
char *file; /* NULL = stdin/stdout */
bool from;
bool binary;
@@ -65,6 +70,7 @@ free_copy_options(struct copy_options * ptr)
if (!ptr)
return;
free(ptr->table);
free(ptr->column_list);
free(ptr->file);
free(ptr->delim);
free(ptr->null);
@@ -72,14 +78,32 @@ free_copy_options(struct copy_options * ptr)
}
/* catenate "more" onto "var", freeing the original value of *var */
static void
xstrcat(char **var, const char *more)
{
char *newvar;
newvar = (char *) malloc(strlen(*var) + strlen(more) + 1);
if (!newvar)
{
psql_error("out of memory\n");
exit(EXIT_FAILURE);
}
strcpy(newvar, *var);
strcat(newvar, more);
free(*var);
*var = newvar;
}
static struct copy_options *
parse_slash_copy(const char *args)
{
struct copy_options *result;
char *line;
char *token;
bool error = false;
char quote;
const char *whitespace = " \t\n\r";
if (args)
line = xstrdup(args);
@@ -95,152 +119,183 @@ parse_slash_copy(const char *args)
exit(EXIT_FAILURE);
}
token = strtokx(line, " \t\n\r", "\"", '\\', &quote, NULL, pset.encoding);
token = strtokx(line, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token)
error = true;
else
{
goto error;
#ifdef NOT_USED
/* this is not implemented yet */
if (!quote && strcasecmp(token, "binary") == 0)
/* this is not implemented yet */
if (strcasecmp(token, "binary") == 0)
{
result->binary = true;
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token)
goto error;
}
#endif
result->table = xstrdup(token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token)
goto error;
/*
* strtokx() will not have returned a multi-character token starting with
* '.', so we don't need strcmp() here. Likewise for '(', etc, below.
*/
if (token[0] == '.')
{
/* handle schema . table */
xstrcat(&result->table, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token)
goto error;
xstrcat(&result->table, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token)
goto error;
}
if (token[0] == '(')
{
/* handle parenthesized column list */
result->column_list = xstrdup(token);
for (;;)
{
result->binary = true;
token = strtokx(NULL, " \t\n\r", "\"", '\\', &quote, NULL, pset.encoding);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token || strchr(".,()", token[0]))
goto error;
xstrcat(&result->column_list, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token)
error = true;
goto error;
xstrcat(&result->column_list, token);
if (token[0] == ')')
break;
if (token[0] != ',')
goto error;
}
if (token)
#endif
result->table = xstrdup(token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, pset.encoding);
if (!token)
goto error;
}
#ifdef USE_ASSERT_CHECKING
assert(error || result->table);
#endif
if (!error)
/*
* Allows old COPY syntax for backward compatibility
* 2002-06-19
*/
if (strcasecmp(token, "with") == 0)
{
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
if (!token || strcasecmp(token, "oids") != 0)
goto error;
result->oids = true;
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
if (!token)
error = true;
else
goto error;
}
if (strcasecmp(token, "from") == 0)
result->from = true;
else if (strcasecmp(token, "to") == 0)
result->from = false;
else
goto error;
token = strtokx(NULL, whitespace, NULL, "'",
'\\', true, pset.encoding);
if (!token)
goto error;
if (strcasecmp(token, "stdin") == 0 ||
strcasecmp(token, "stdout") == 0)
result->file = NULL;
else
result->file = xstrdup(token);
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
/*
* Allows old COPY syntax for backward compatibility
* 2002-06-19
*/
if (token && strcasecmp(token, "using") == 0)
{
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
if (!(token && strcasecmp(token, "delimiters") == 0))
goto error;
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (!token)
goto error;
result->delim = xstrdup(token);
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
}
if (token)
{
if (strcasecmp(token, "with") != 0)
goto error;
while ((token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding)) != NULL)
{
/*
* Allows old COPY syntax for backward compatibility
* 2002-06-19
*/
if (strcasecmp(token, "with") == 0)
if (strcasecmp(token, "delimiter") == 0)
{
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
if (!token || strcasecmp(token, "oids") != 0)
error = true;
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token)
result->delim = xstrdup(token);
else
result->oids = true;
if (!error)
{
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
if (!token)
error = true;
}
goto error;
}
else if (strcasecmp(token, "null") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token)
result->null = xstrdup(token);
else
goto error;
}
if (!error && strcasecmp(token, "from") == 0)
result->from = true;
else if (!error && strcasecmp(token, "to") == 0)
result->from = false;
else
error = true;
goto error;
}
}
if (!error)
{
token = strtokx(NULL, " \t\n\r", "'", '\\', &quote, NULL, pset.encoding);
if (!token)
error = true;
else if (!quote && (strcasecmp(token, "stdin") == 0 || strcasecmp(token, "stdout") == 0))
result->file = NULL;
else
result->file = xstrdup(token);
}
if (!error)
{
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
if (token)
{
/*
* Allows old COPY syntax for backward compatibility
* 2002-06-19
*/
if (strcasecmp(token, "using") == 0)
{
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
if (token && strcasecmp(token, "delimiters") == 0)
{
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
if (token)
{
result->delim = xstrdup(token);
token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding);
}
else
error = true;
}
else
error = true;
}
}
}
if (!error && token)
{
if (strcasecmp(token, "with") == 0)
{
while (!error && (token = strtokx(NULL, " \t\n\r", NULL, '\\', NULL, NULL, pset.encoding)))
{
if (strcasecmp(token, "delimiter") == 0)
{
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
if (token)
result->delim = xstrdup(token);
else
error = true;
}
else if (strcasecmp(token, "null") == 0)
{
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, " \t\n\r", "'", '\\', NULL, NULL, pset.encoding);
if (token)
result->null = xstrdup(token);
else
error = true;
}
else
error = true;
}
}
else
error = true;
}
free(line);
if (error)
{
if (token)
psql_error("\\copy: parse error at '%s'\n", token);
else
psql_error("\\copy: parse error at end of line\n");
free_copy_options(result);
return NULL;
}
return result;
error:
if (token)
psql_error("\\copy: parse error at '%s'\n", token);
else
return result;
psql_error("\\copy: parse error at end of line\n");
free_copy_options(result);
free(line);
return NULL;
}
@@ -272,7 +327,11 @@ do_copy(const char *args)
if (options->binary)
appendPQExpBuffer(&query, "BINARY ");
appendPQExpBuffer(&query, "\"%s\" ", options->table);
appendPQExpBuffer(&query, "%s ", options->table);
if (options->column_list)
appendPQExpBuffer(&query, "%s ", options->column_list);
/* Uses old COPY syntax for backward compatibility 2002-06-19 */
if (options->oids)
appendPQExpBuffer(&query, "WITH OIDS ");
@@ -285,10 +344,22 @@ do_copy(const char *args)
/* Uses old COPY syntax for backward compatibility 2002-06-19 */
if (options->delim)
appendPQExpBuffer(&query, " USING DELIMITERS '%s'", options->delim);
{
if (options->delim[0] == '\'')
appendPQExpBuffer(&query, " USING DELIMITERS %s",
options->delim);
else
appendPQExpBuffer(&query, " USING DELIMITERS '%s'",
options->delim);
}
if (options->null)
appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
{
if (options->null[0] == '\'')
appendPQExpBuffer(&query, " WITH NULL AS %s", options->null);
else
appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
}
if (options->from)
{

View File

@@ -1,45 +1,61 @@
/*
* psql - the PostgreSQL interactive terminal
*
* Copyright 2000 by PostgreSQL Global Development Group
* Copyright 2000-2002 by PostgreSQL Global Development Group
*
* $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.30 2002/08/27 20:16:49 petere Exp $
* $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.31 2002/10/19 00:22:14 tgl Exp $
*/
#include "postgres_fe.h"
#include "stringutils.h"
#include "settings.h"
#include <ctype.h>
#include <assert.h>
#include <ctype.h>
#include "libpq-fe.h"
#include "settings.h"
#include "stringutils.h"
static void unescape_quotes(char *source, int quote, int escape);
static void strip_quotes(char *source, char quote, char escape, int encoding);
/*
* Replacement for strtok() (a.k.a. poor man's flex)
*
* The calling convention is similar to that of strtok.
* Splits a string into tokens, returning one token per call, then NULL
* when no more tokens exist in the given string.
*
* The calling convention is similar to that of strtok, but with more
* frammishes.
*
* s - string to parse, if NULL continue parsing the last string
* delim - set of characters that delimit tokens (usually whitespace)
* quote - set of characters that quote stuff, they're not part of the token
* escape - character than can quote quotes
* was_quoted - if not NULL, stores the quoting character if any was encountered
* token_pos - if not NULL, receives a count to the start of the token in the
* parsed string
* whitespace - set of whitespace characters that separate tokens
* delim - set of non-whitespace separator characters (or NULL)
* quote - set of characters that can quote a token (NULL if none)
* escape - character that can quote quotes (0 if none)
* del_quotes - if TRUE, strip quotes from the returned token, else return
* it exactly as found in the string
* encoding - the active character-set encoding
*
* Characters in 'delim', if any, will be returned as single-character
* tokens unless part of a quoted token.
*
* Double occurences of the quoting character are always taken to represent
* a single quote character in the data. If escape isn't 0, then escape
* followed by anything (except \0) is a data character too.
*
* Note that the string s is _not_ overwritten in this implementation.
*
* NB: it's okay to vary delim, quote, and escape from one call to the
* next on a single source string, but changing whitespace is a bad idea
* since you might lose data.
*/
char *
strtokx(const char *s,
const char *whitespace,
const char *delim,
const char *quote,
int escape,
char *was_quoted,
unsigned int *token_pos,
char escape,
bool del_quotes,
int encoding)
{
static char *storage = NULL;/* store the local copy of the users
@@ -50,23 +66,32 @@ strtokx(const char *s,
/* variously abused variables: */
unsigned int offset;
char *start;
char *cp = NULL;
char *p;
if (s)
{
free(storage);
storage = strdup(s);
/*
* We may need extra space to insert delimiter nulls for adjacent
* tokens. 2X the space is a gross overestimate, but it's
* unlikely that this code will be used on huge strings anyway.
*/
storage = (char *) malloc(2 * strlen(s) + 1);
if (!storage)
return NULL; /* really "out of memory" */
strcpy(storage, s);
string = storage;
}
if (!storage)
return NULL;
/* skip leading "whitespace" */
offset = strspn(string, delim);
/* skip leading whitespace */
offset = strspn(string, whitespace);
start = &string[offset];
/* end of string reached */
if (string[offset] == '\0')
/* end of string reached? */
if (*start == '\0')
{
/* technically we don't need to free here, but we're nice */
free(storage);
@@ -75,118 +100,165 @@ strtokx(const char *s,
return NULL;
}
/* test if quoting character */
if (quote)
cp = strchr(quote, string[offset]);
if (cp)
/* test if delimiter character */
if (delim && strchr(delim, *start))
{
/* okay, we have a quoting character, now scan for the closer */
char *p;
start = &string[offset + 1];
if (token_pos)
*token_pos = start - storage;
for (p = start;
*p && (*p != *cp || *(p - 1) == escape);
p += PQmblen(p, encoding)
);
/* not yet end of string? */
/*
* If not at end of string, we need to insert a null to terminate
* the returned token. We can just overwrite the next character
* if it happens to be in the whitespace set ... otherwise move over
* the rest of the string to make room. (This is why we allocated
* extra space above).
*/
p = start + 1;
if (*p != '\0')
{
if (!strchr(whitespace, *p))
memmove(p + 1, p, strlen(p) + 1);
*p = '\0';
string = p + 1;
if (was_quoted)
*was_quoted = *cp;
unescape_quotes(start, *cp, escape);
return start;
}
else
{
if (was_quoted)
*was_quoted = *cp;
/* at end of string, so no extra work */
string = p;
unescape_quotes(start, *cp, escape);
return start;
}
}
/* otherwise no quoting character. scan till next delimiter */
start = &string[offset];
if (token_pos)
*token_pos = start - storage;
offset = strcspn(start, delim);
if (was_quoted)
*was_quoted = 0;
if (start[offset] != '\0')
{
start[offset] = '\0';
string = &start[offset] + 1;
return start;
}
/* test if quoting character */
if (quote && strchr(quote, *start))
{
/* okay, we have a quoted token, now scan for the closer */
char thisquote = *start;
for (p = start + 1; *p; p += PQmblen(p, encoding))
{
if (*p == escape && p[1] != '\0')
p++; /* process escaped anything */
else if (*p == thisquote && p[1] == thisquote)
p++; /* process doubled quote */
else if (*p == thisquote)
{
p++; /* skip trailing quote */
break;
}
}
/*
* If not at end of string, we need to insert a null to terminate
* the returned token. See notes above.
*/
if (*p != '\0')
{
if (!strchr(whitespace, *p))
memmove(p + 1, p, strlen(p) + 1);
*p = '\0';
string = p + 1;
}
else
{
/* at end of string, so no extra work */
string = p;
}
/* Clean up the token if caller wants that */
if (del_quotes)
strip_quotes(start, thisquote, escape, encoding);
return start;
}
/*
* Otherwise no quoting character. Scan till next whitespace,
* delimiter or quote. NB: at this point, *start is known not to be
* '\0', whitespace, delim, or quote, so we will consume at least
* one character.
*/
offset = strcspn(start, whitespace);
if (delim)
{
unsigned int offset2 = strcspn(start, delim);
if (offset > offset2)
offset = offset2;
}
if (quote)
{
unsigned int offset2 = strcspn(start, quote);
if (offset > offset2)
offset = offset2;
}
p = start + offset;
/*
* If not at end of string, we need to insert a null to terminate
* the returned token. See notes above.
*/
if (*p != '\0')
{
if (!strchr(whitespace, *p))
memmove(p + 1, p, strlen(p) + 1);
*p = '\0';
string = p + 1;
}
else
{
string = &start[offset];
return start;
/* at end of string, so no extra work */
string = p;
}
return start;
}
/*
* unescape_quotes
* strip_quotes
*
* Resolves escaped quotes. Used by strtokx above.
* Remove quotes from the string at *source. Leading and trailing occurrences
* of 'quote' are removed; embedded double occurrences of 'quote' are reduced
* to single occurrences; if 'escape' is not 0 then 'escape' removes special
* significance of next character.
*
* Note that the source string is overwritten in-place.
*/
static void
unescape_quotes(char *source, int quote, int escape)
strip_quotes(char *source, char quote, char escape, int encoding)
{
char *p;
char *destination,
*tmp;
char *src;
char *dst;
#ifdef USE_ASSERT_CHECKING
assert(source);
assert(quote);
#endif
destination = calloc(1, strlen(source) + 1);
if (!destination)
src = dst = source;
if (*src && *src == quote)
src++; /* skip leading quote */
while (*src)
{
perror("calloc");
exit(EXIT_FAILURE);
char c = *src;
int i;
if (c == quote && src[1] == '\0')
break; /* skip trailing quote */
else if (c == quote && src[1] == quote)
src++; /* process doubled quote */
else if (c == escape && src[1] != '\0')
src++; /* process escaped character */
i = PQmblen(src, encoding);
while (i--)
*dst++ = *src++;
}
tmp = destination;
for (p = source; *p; p++)
{
char c;
if (*p == escape && *(p + 1) && quote == *(p + 1))
{
c = *(p + 1);
p++;
}
else
c = *p;
*tmp = c;
tmp++;
}
/* Terminating null character */
*tmp = '\0';
strcpy(source, destination);
free(destination);
*dst = '\0';
}

View File

@@ -1,9 +1,9 @@
/*
* psql - the PostgreSQL interactive terminal
*
* Copyright 2000 by PostgreSQL Global Development Group
* Copyright 2000-2002 by PostgreSQL Global Development Group
*
* $Header: /cvsroot/pgsql/src/bin/psql/stringutils.h,v 1.17 2001/11/05 17:46:31 momjian Exp $
* $Id: stringutils.h,v 1.18 2002/10/19 00:22:14 tgl Exp $
*/
#ifndef STRINGUTILS_H
#define STRINGUTILS_H
@@ -11,11 +11,11 @@
/* The cooler version of strtok() which knows about quotes and doesn't
* overwrite your input */
extern char *strtokx(const char *s,
const char *whitespace,
const char *delim,
const char *quote,
int escape,
char *was_quoted,
unsigned int *token_pos,
char escape,
bool del_quotes,
int encoding);
#endif /* STRINGUTILS_H */