mirror of
https://github.com/postgres/postgres.git
synced 2025-05-28 05:21:27 +03:00
Avoid breaking SJIS encoding while de-backslashing Windows paths.
When running on Windows, canonicalize_path() converts '\' to '/' to prevent confusing the Windows command processor. It was doing that in a non-encoding-aware fashion; but in SJIS there are valid two-byte characters whose second byte matches '\'. So encoding corruption ensues if such a character is used in the path. We can fairly easily fix this if we know which encoding is in use, but a lot of our utilities don't have much of a clue about that. After some discussion we decided we'd settle for fixing this only in psql, and assuming that its value of client_encoding matches what the user is typing. It seems hopeless to get the server to deal with the problematic characters in database path names, so we'll just declare that case to be unsupported. That means nothing need be done in the server, nor in utility programs whose only contact with file path names is for database paths. But psql frequently deals with client-side file paths, so it'd be good if it didn't mess those up. Bug: #18735 Reported-by: Koichi Suzuki <koichi.suzuki@enterprisedb.com> Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Koichi Suzuki <koichi.suzuki@enterprisedb.com> Discussion: https://postgr.es/m/18735-4acdb3998bb9f2b1@postgresql.org Backpatch-through: 13
This commit is contained in:
parent
6555fe1979
commit
0b713b94b3
@ -1110,7 +1110,7 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch,
|
|||||||
expand_tilde(&fname);
|
expand_tilde(&fname);
|
||||||
if (fname)
|
if (fname)
|
||||||
{
|
{
|
||||||
canonicalize_path(fname);
|
canonicalize_path_enc(fname, pset.encoding);
|
||||||
/* Always clear buffer if the file isn't modified */
|
/* Always clear buffer if the file isn't modified */
|
||||||
discard_on_quit = true;
|
discard_on_quit = true;
|
||||||
}
|
}
|
||||||
@ -2694,7 +2694,7 @@ exec_command_write(PsqlScanState scan_state, bool active_branch,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
canonicalize_path(fname);
|
canonicalize_path_enc(fname, pset.encoding);
|
||||||
fd = fopen(fname, "w");
|
fd = fopen(fname, "w");
|
||||||
}
|
}
|
||||||
if (!fd)
|
if (!fd)
|
||||||
@ -4298,7 +4298,7 @@ process_file(char *filename, bool use_relative_path)
|
|||||||
}
|
}
|
||||||
else if (strcmp(filename, "-") != 0)
|
else if (strcmp(filename, "-") != 0)
|
||||||
{
|
{
|
||||||
canonicalize_path(filename);
|
canonicalize_path_enc(filename, pset.encoding);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we were asked to resolve the pathname relative to the location
|
* If we were asked to resolve the pathname relative to the location
|
||||||
@ -4312,7 +4312,7 @@ process_file(char *filename, bool use_relative_path)
|
|||||||
strlcpy(relpath, pset.inputfile, sizeof(relpath));
|
strlcpy(relpath, pset.inputfile, sizeof(relpath));
|
||||||
get_parent_directory(relpath);
|
get_parent_directory(relpath);
|
||||||
join_path_components(relpath, relpath, filename);
|
join_path_components(relpath, relpath, filename);
|
||||||
canonicalize_path(relpath);
|
canonicalize_path_enc(relpath, pset.encoding);
|
||||||
|
|
||||||
filename = relpath;
|
filename = relpath;
|
||||||
}
|
}
|
||||||
|
@ -280,7 +280,7 @@ do_copy(const char *args)
|
|||||||
|
|
||||||
/* prepare to read or write the target file */
|
/* prepare to read or write the target file */
|
||||||
if (options->file && !options->program)
|
if (options->file && !options->program)
|
||||||
canonicalize_path(options->file);
|
canonicalize_path_enc(options->file, pset.encoding);
|
||||||
|
|
||||||
if (options->from)
|
if (options->from)
|
||||||
{
|
{
|
||||||
|
@ -53,6 +53,7 @@ extern char *first_path_var_separator(const char *pathlist);
|
|||||||
extern void join_path_components(char *ret_path,
|
extern void join_path_components(char *ret_path,
|
||||||
const char *head, const char *tail);
|
const char *head, const char *tail);
|
||||||
extern void canonicalize_path(char *path);
|
extern void canonicalize_path(char *path);
|
||||||
|
extern void canonicalize_path_enc(char *path, int encoding);
|
||||||
extern void make_native_path(char *filename);
|
extern void make_native_path(char *filename);
|
||||||
extern void cleanup_path(char *path);
|
extern void cleanup_path(char *path);
|
||||||
extern bool path_contains_parent_reference(const char *path);
|
extern bool path_contains_parent_reference(const char *path);
|
||||||
|
105
src/port/path.c
105
src/port/path.c
@ -35,6 +35,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "mb/pg_wchar.h"
|
||||||
#include "pg_config_paths.h"
|
#include "pg_config_paths.h"
|
||||||
|
|
||||||
|
|
||||||
@ -44,6 +45,10 @@
|
|||||||
#define IS_PATH_VAR_SEP(ch) ((ch) == ';')
|
#define IS_PATH_VAR_SEP(ch) ((ch) == ';')
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
static void debackslash_path(char *path, int encoding);
|
||||||
|
static int pg_sjis_mblen(const unsigned char *s);
|
||||||
|
#endif
|
||||||
static void make_relative_path(char *ret_path, const char *target_path,
|
static void make_relative_path(char *ret_path, const char *target_path,
|
||||||
const char *bin_path, const char *my_exec_path);
|
const char *bin_path, const char *my_exec_path);
|
||||||
static char *trim_directory(char *path);
|
static char *trim_directory(char *path);
|
||||||
@ -148,10 +153,73 @@ last_dir_separator(const char *filename)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* make_native_path - on WIN32, change / to \ in the path
|
* Convert '\' to '/' within the given path, assuming the path
|
||||||
|
* is in the specified encoding.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
debackslash_path(char *path, int encoding)
|
||||||
|
{
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Of the supported encodings, only Shift-JIS has multibyte characters
|
||||||
|
* that can include a byte equal to '\' (0x5C). So rather than implement
|
||||||
|
* a fully encoding-aware conversion, we special-case SJIS. (Invoking the
|
||||||
|
* general encoding-aware logic in wchar.c is impractical here for
|
||||||
|
* assorted reasons.)
|
||||||
|
*/
|
||||||
|
if (encoding == PG_SJIS)
|
||||||
|
{
|
||||||
|
for (p = path; *p; p += pg_sjis_mblen((const unsigned char *) p))
|
||||||
|
{
|
||||||
|
if (*p == '\\')
|
||||||
|
*p = '/';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (p = path; *p; p++)
|
||||||
|
{
|
||||||
|
if (*p == '\\')
|
||||||
|
*p = '/';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SJIS character length
|
||||||
*
|
*
|
||||||
* This effectively undoes canonicalize_path.
|
* This must match the behavior of
|
||||||
|
* pg_encoding_mblen_bounded(PG_SJIS, s)
|
||||||
|
* In particular, unlike the version of pg_sjis_mblen in src/common/wchar.c,
|
||||||
|
* do not allow caller to accidentally step past end-of-string.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
pg_sjis_mblen(const unsigned char *s)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
|
||||||
|
if (*s >= 0xa1 && *s <= 0xdf)
|
||||||
|
len = 1; /* 1 byte kana? */
|
||||||
|
else if (IS_HIGHBIT_SET(*s) && s[1] != '\0')
|
||||||
|
len = 2; /* kanji? */
|
||||||
|
else
|
||||||
|
len = 1; /* should be ASCII */
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* WIN32 */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* make_native_path - on WIN32, change '/' to '\' in the path
|
||||||
|
*
|
||||||
|
* This reverses the '\'-to-'/' transformation of debackslash_path.
|
||||||
|
* We need not worry about encodings here, since '/' does not appear
|
||||||
|
* as a byte of a multibyte character in any supported encoding.
|
||||||
*
|
*
|
||||||
* This is required because WIN32 COPY is an internal CMD.EXE
|
* This is required because WIN32 COPY is an internal CMD.EXE
|
||||||
* command and doesn't process forward slashes in the same way
|
* command and doesn't process forward slashes in the same way
|
||||||
@ -181,13 +249,14 @@ make_native_path(char *filename)
|
|||||||
* on Windows. We need them to use filenames without spaces, for which a
|
* on Windows. We need them to use filenames without spaces, for which a
|
||||||
* short filename is the safest equivalent, eg:
|
* short filename is the safest equivalent, eg:
|
||||||
* C:/Progra~1/
|
* C:/Progra~1/
|
||||||
|
*
|
||||||
|
* Presently, this is only used on paths that we can assume are in a
|
||||||
|
* server-safe encoding, so there's no need for an encoding-aware variant.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
cleanup_path(char *path)
|
cleanup_path(char *path)
|
||||||
{
|
{
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
char *ptr;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetShortPathName() will fail if the path does not exist, or short names
|
* GetShortPathName() will fail if the path does not exist, or short names
|
||||||
* are disabled on this file system. In both cases, we just return the
|
* are disabled on this file system. In both cases, we just return the
|
||||||
@ -197,11 +266,8 @@ cleanup_path(char *path)
|
|||||||
GetShortPathName(path, path, MAXPGPATH - 1);
|
GetShortPathName(path, path, MAXPGPATH - 1);
|
||||||
|
|
||||||
/* Replace '\' with '/' */
|
/* Replace '\' with '/' */
|
||||||
for (ptr = path; *ptr; ptr++)
|
/* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
|
||||||
{
|
debackslash_path(path, PG_SQL_ASCII);
|
||||||
if (*ptr == '\\')
|
|
||||||
*ptr = '/';
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,6 +318,8 @@ typedef enum
|
|||||||
} canonicalize_state;
|
} canonicalize_state;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* canonicalize_path()
|
||||||
|
*
|
||||||
* Clean up path by:
|
* Clean up path by:
|
||||||
* o make Win32 path use Unix slashes
|
* o make Win32 path use Unix slashes
|
||||||
* o remove trailing quote on Win32
|
* o remove trailing quote on Win32
|
||||||
@ -259,9 +327,20 @@ typedef enum
|
|||||||
* o remove duplicate (adjacent) separators
|
* o remove duplicate (adjacent) separators
|
||||||
* o remove '.' (unless path reduces to only '.')
|
* o remove '.' (unless path reduces to only '.')
|
||||||
* o process '..' ourselves, removing it if possible
|
* o process '..' ourselves, removing it if possible
|
||||||
|
* Modifies path in-place.
|
||||||
|
*
|
||||||
|
* This comes in two variants: encoding-aware and not. The non-aware version
|
||||||
|
* is only safe to use on strings that are in a server-safe encoding.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
canonicalize_path(char *path)
|
canonicalize_path(char *path)
|
||||||
|
{
|
||||||
|
/* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
|
||||||
|
canonicalize_path_enc(path, PG_SQL_ASCII);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
canonicalize_path_enc(char *path, int encoding)
|
||||||
{
|
{
|
||||||
char *p,
|
char *p,
|
||||||
*to_p;
|
*to_p;
|
||||||
@ -277,17 +356,15 @@ canonicalize_path(char *path)
|
|||||||
/*
|
/*
|
||||||
* The Windows command processor will accept suitably quoted paths with
|
* The Windows command processor will accept suitably quoted paths with
|
||||||
* forward slashes, but barfs badly with mixed forward and back slashes.
|
* forward slashes, but barfs badly with mixed forward and back slashes.
|
||||||
|
* Hence, start by converting all back slashes to forward slashes.
|
||||||
*/
|
*/
|
||||||
for (p = path; *p; p++)
|
debackslash_path(path, encoding);
|
||||||
{
|
|
||||||
if (*p == '\\')
|
|
||||||
*p = '/';
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In Win32, if you do: prog.exe "a b" "\c\d\" the system will pass \c\d"
|
* In Win32, if you do: prog.exe "a b" "\c\d\" the system will pass \c\d"
|
||||||
* as argv[2], so trim off trailing quote.
|
* as argv[2], so trim off trailing quote.
|
||||||
*/
|
*/
|
||||||
|
p = path + strlen(path);
|
||||||
if (p > path && *(p - 1) == '"')
|
if (p > path && *(p - 1) == '"')
|
||||||
*(p - 1) = '/';
|
*(p - 1) = '/';
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user