From 93e8ff8701a65a70ea8826bdde7fdbbd9c285477 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 21 Dec 2020 09:37:11 +0900 Subject: [PATCH] Refactor logic to check for ASCII-only characters in string The same logic was present for collation commands, SASLprep and pgcrypto, so this removes some code. Author: Michael Paquier Reviewed-by: Stephen Frost, Heikki Linnakangas Discussion: https://postgr.es/m/X9womIn6rne6Gud2@paquier.xyz --- contrib/pgcrypto/pgp-pgsql.c | 18 +++--------------- src/backend/commands/collationcmds.c | 24 ++++-------------------- src/common/saslprep.c | 19 ++----------------- src/common/string.c | 16 ++++++++++++++++ src/include/common/string.h | 1 + 5 files changed, 26 insertions(+), 52 deletions(-) diff --git a/contrib/pgcrypto/pgp-pgsql.c b/contrib/pgcrypto/pgp-pgsql.c index 62a2f351e43..0536bfb8921 100644 --- a/contrib/pgcrypto/pgp-pgsql.c +++ b/contrib/pgcrypto/pgp-pgsql.c @@ -32,6 +32,7 @@ #include "postgres.h" #include "catalog/pg_type.h" +#include "common/string.h" #include "funcapi.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" @@ -92,19 +93,6 @@ convert_to_utf8(text *src) return convert_charset(src, GetDatabaseEncoding(), PG_UTF8); } -static bool -string_is_ascii(const char *str) -{ - const char *p; - - for (p = str; *p; p++) - { - if (IS_HIGHBIT_SET(*p)) - return false; - } - return true; -} - static void clear_and_pfree(text *p) { @@ -814,7 +802,7 @@ parse_key_value_arrays(ArrayType *key_array, ArrayType *val_array, v = TextDatumGetCString(key_datums[i]); - if (!string_is_ascii(v)) + if (!pg_is_ascii(v)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("header key must not contain non-ASCII characters"))); @@ -836,7 +824,7 @@ parse_key_value_arrays(ArrayType *key_array, ArrayType *val_array, v = TextDatumGetCString(val_datums[i]); - if (!string_is_ascii(v)) + if (!pg_is_ascii(v)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("header value must not contain non-ASCII characters"))); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 32b2183598d..d62c8defbab 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -27,6 +27,7 @@ #include "commands/comment.h" #include "commands/dbcommands.h" #include "commands/defrem.h" +#include "common/string.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/acl.h" @@ -286,23 +287,6 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) #define READ_LOCALE_A_OUTPUT #endif -#if defined(READ_LOCALE_A_OUTPUT) || defined(USE_ICU) -/* - * Check a string to see if it is pure ASCII - */ -static bool -is_all_ascii(const char *str) -{ - while (*str) - { - if (IS_HIGHBIT_SET(*str)) - return false; - str++; - } - return true; -} -#endif /* READ_LOCALE_A_OUTPUT || USE_ICU */ - #ifdef READ_LOCALE_A_OUTPUT /* * "Normalize" a libc locale name, stripping off encoding tags such as @@ -396,7 +380,7 @@ get_icu_locale_comment(const char *localename) if (U_FAILURE(status)) return NULL; /* no good reason to raise an error */ - /* Check for non-ASCII comment (can't use is_all_ascii for this) */ + /* Check for non-ASCII comment (can't use pg_is_ascii for this) */ for (i = 0; i < len_uchar; i++) { if (displayname[i] > 127) @@ -477,7 +461,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * interpret the non-ASCII characters. We can't do much with * those, so we filter them out. */ - if (!is_all_ascii(localebuf)) + if (!pg_is_ascii(localebuf)) { elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); continue; @@ -623,7 +607,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * Be paranoid about not allowing any non-ASCII strings into * pg_collation */ - if (!is_all_ascii(langtag) || !is_all_ascii(collcollate)) + if (!pg_is_ascii(langtag) || !pg_is_ascii(collcollate)) continue; collid = CollationCreate(psprintf("%s-x-icu", langtag), diff --git a/src/common/saslprep.c b/src/common/saslprep.c index d60452f75f2..48eb1ee9bb9 100644 --- a/src/common/saslprep.c +++ b/src/common/saslprep.c @@ -26,6 +26,7 @@ #endif #include "common/saslprep.h" +#include "common/string.h" #include "common/unicode_norm.h" #include "mb/pg_wchar.h" @@ -47,7 +48,6 @@ static int codepoint_range_cmp(const void *a, const void *b); static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize); static int pg_utf8_string_len(const char *source); -static bool pg_is_ascii_string(const char *p); /* * Stringprep Mapping Tables. @@ -1019,21 +1019,6 @@ pg_utf8_string_len(const char *source) return num_chars; } -/* - * Returns true if the input string is pure ASCII. - */ -static bool -pg_is_ascii_string(const char *p) -{ - while (*p) - { - if (IS_HIGHBIT_SET(*p)) - return false; - p++; - } - return true; -} - /* * pg_saslprep - Normalize a password with SASLprep. @@ -1076,7 +1061,7 @@ pg_saslprep(const char *input, char **output) * Quick check if the input is pure ASCII. An ASCII string requires no * further processing. */ - if (pg_is_ascii_string(input)) + if (pg_is_ascii(input)) { *output = STRDUP(input); if (!(*output)) diff --git a/src/common/string.c b/src/common/string.c index bcbbfb813db..3e76e2c59fb 100644 --- a/src/common/string.c +++ b/src/common/string.c @@ -92,6 +92,22 @@ pg_clean_ascii(char *str) } +/* + * pg_is_ascii -- Check if string is made only of ASCII characters + */ +bool +pg_is_ascii(const char *str) +{ + while (*str) + { + if (IS_HIGHBIT_SET(*str)) + return false; + str++; + } + return true; +} + + /* * pg_strip_crlf -- Remove any trailing newline and carriage return * diff --git a/src/include/common/string.h b/src/include/common/string.h index 6a4baa6f359..655ccc05708 100644 --- a/src/include/common/string.h +++ b/src/include/common/string.h @@ -18,6 +18,7 @@ extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base); extern void pg_clean_ascii(char *str); extern int pg_strip_crlf(char *str); +extern bool pg_is_ascii(const char *str); /* functions in src/common/pg_get_line.c */ extern char *pg_get_line(FILE *stream);