1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-21978 Add %sQ, %sB, %uE & %sT to my_vsnprintf

This is the first part of
MDEV-21978 make my_vsnprintf to use gcc-compatible format extensions,
which adds these alternatives to the MySQL extensions.
There’s also the escapes `%sS` & `%uU` for any hippies needing them.
These suffixes are compatible with the C standard and
therefore as well as `printf` tools such as GCC checks.

The old extension formats (e.g., `%M`) are now effectively deprecated,
although they’re left intact for now. For a more sequential
MDEV-21978 process, a separate commit will delete them after we migrate
all `my_vsnprintf` usages to the new preferred syntax. The service’s
major version bumped nonetheless for the new significance of suffixes.

[Breaking] This commit may fail
* on places needing the aforementioned escapes
* because of the major version bump

Reviewed-by: Andrew Hutchings <andrew@mariadb.org>
Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com>
This commit is contained in:
ParadoxV5
2024-07-25 21:57:06 -06:00
committed by Sergei Golubchik
parent 06851e7f77
commit b668a960cd
4 changed files with 143 additions and 39 deletions

View File

@@ -39,40 +39,41 @@
@post @post
The syntax of a format string is generally the same: The syntax of a format string is generally the same:
% <flag> <width> <precision> <length modifier> <format> % <flag> <width> <.precision> <length modifier> <format> <format extension>
where everything but the format is optional. where everything but the format is optional.
Three one-character flags are recognized: Two one-character flags are recognized:
'0' has the standard zero-padding semantics; '0' has the standard zero-padding semantics;
'-' is parsed, but silently ignored; '-' is parsed, but silently ignored;
'`' (backtick) is only supported for strings (%s) and means that the
string will be quoted according to MySQL identifier quoting rules.
Both <width> and <precision> can be specified as numbers or '*'. Both <width> and <precision> can be specified as numbers or '*'.
If an asterisk is used, an argument of type int is consumed. If an asterisk is used, an argument of type int is consumed.
<length modifier> can be 'l', 'll', or 'z'. <length modifier> can be 'l', 'll', or 'z'.
Supported formats are 's' (null pointer is accepted, printed as Supported formats are 's' (null pointer is accepted, printed as "(null)"),
"(null)"), 'b' (extension, see below), 'c', 'd', 'i', 'u', 'x', 'o', 'c', 'd', 'i', 'u', 'x', 'X', 'o', 'p' (works as 0x%x), 'f', and 'g'.
'X', 'p' (works as 0x%x), 'f', 'g', 'M' (extension, see below),
'T' (extension, see below).
Standard syntax for positional arguments $n is supported. Standard syntax for positional arguments $n is supported.
Extensions: Format extensions:
Flag '`' (backtick): see above. Format 'sQ': quotes the string according to MySQL identifier quoting rules.
Format 'b': binary buffer, prints exactly <precision> bytes from the Format 'sB': binary buffer, prints exactly <precision> bytes from the
argument, without stopping at '\0'. argument, without stopping at '\0'. The behavior for unspecified <precision>
is not yet defined.
Format 'M': takes one integer, prints this integer, space, double quote Format 'uE': takes one integer, prints this integer, space, double quote,
error message, double quote. In other words error message corresponding to this integer errno, double quote. In other words:
printf("%M", n) === printf("%d \"%s\"", n, strerror(n)) printf("%uE", n) === printf("%d \"%sT\"", n, strerror(n))
Format 'T': takes string and print it like s but if the strints should be Format 'sT': takes string and print it like s but if the strings should be
truncated puts "..." at the end. truncated puts "..." at the end.
Format 'sS' and 'uU': prints synonymously as s and u respectively. These two
escape simple s and u from consuming the following plain text as one of the
above extension suffixes; for example, "Data size: %uUEiB".
*/ */
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -32,7 +32,7 @@
#define VERSION_my_print_error 0x0100 #define VERSION_my_print_error 0x0100
#define VERSION_my_sha1 0x0101 #define VERSION_my_sha1 0x0101
#define VERSION_my_sha2 0x0100 #define VERSION_my_sha2 0x0100
#define VERSION_my_snprintf 0x0100 #define VERSION_my_snprintf 0x0200
#define VERSION_progress_report 0x0100 #define VERSION_progress_report 0x0100
#define VERSION_thd_alloc 0x0200 #define VERSION_thd_alloc 0x0200
#define VERSION_thd_autoinc 0x0100 #define VERSION_thd_autoinc 0x0100

View File

@@ -234,7 +234,7 @@ static char *process_str_arg(CHARSET_INFO *cs, char *to, const char *end,
size_t length; size_t length;
/* /*
The sign of the length argument specific the string should be right The sign of the length argument specify whether the string should be right
or left adjusted or left adjusted
*/ */
if (length_arg < 0) if (length_arg < 0)
@@ -477,12 +477,13 @@ start:
if (!*fmt) /* End of format string */ if (!*fmt) /* End of format string */
{ {
char arg_type;
uint i; uint i;
print_arr[idx].end= fmt; print_arr[idx].end= fmt;
/* Obtain parameters from the list */ /* Obtain parameters from the list */
for (i= 0 ; i < arg_count; i++) for (i= 0 ; i < arg_count; i++)
{ {
switch (args_arr[i].arg_type) { switch (arg_type= args_arr[i].arg_type) {
case 's': case 's':
case 'b': case 'b':
case 'T': case 'T':
@@ -501,7 +502,7 @@ start:
case 'p': case 'p':
if (args_arr[i].have_longlong) if (args_arr[i].have_longlong)
args_arr[i].longlong_arg= va_arg(ap,longlong); args_arr[i].longlong_arg= va_arg(ap,longlong);
else if (args_arr[i].arg_type == 'd' || args_arr[i].arg_type == 'i') else if (arg_type == 'd' || arg_type == 'i')
args_arr[i].longlong_arg= va_arg(ap, int); args_arr[i].longlong_arg= va_arg(ap, int);
else else
args_arr[i].longlong_arg= va_arg(ap, uint); args_arr[i].longlong_arg= va_arg(ap, uint);
@@ -518,14 +519,30 @@ start:
for (i= 0; i <= idx; i++) for (i= 0; i <= idx; i++)
{ {
size_t width= 0, length= 0; size_t width= 0, length= 0;
switch (print_arr[i].arg_type) { switch (arg_type= print_arr[i].arg_type) {
case 's': case 's':
case 'T': case 'T':
case 'b': case 'b':
{ {
char *par= args_arr[print_arr[i].arg_idx].str_arg; char *par= args_arr[print_arr[i].arg_idx].str_arg;
my_bool suffix_b= print_arr[i].arg_type == 'b'; my_bool suffix_b= arg_type == 'b', suffix_t= arg_type == 'T';
my_bool suffix_t= print_arr[i].arg_type == 'T'; switch (*print_arr[i].begin) // look at the start of the next chunk
{
case 'Q':
print_arr[i].flags|= ESCAPED_ARG;
++print_arr[i].begin;
break;
case 'B':
suffix_b= TRUE;
++print_arr[i].begin;
break;
case 'T':
suffix_t= TRUE;
// fall-through
case 'S': // escape
++print_arr[i].begin; // roll forward to consume the char
break;
}
width= (print_arr[i].flags & WIDTH_ARG) width= (print_arr[i].flags & WIDTH_ARG)
? (size_t)args_arr[print_arr[i].width].longlong_arg ? (size_t)args_arr[print_arr[i].width].longlong_arg
: print_arr[i].width; : print_arr[i].width;
@@ -554,7 +571,7 @@ start:
double d= args_arr[print_arr[i].arg_idx].double_arg; double d= args_arr[print_arr[i].arg_idx].double_arg;
width= (print_arr[i].flags & WIDTH_ARG) ? width= (print_arr[i].flags & WIDTH_ARG) ?
(uint)args_arr[print_arr[i].width].longlong_arg : print_arr[i].width; (uint)args_arr[print_arr[i].width].longlong_arg : print_arr[i].width;
to= process_dbl_arg(to, end, width, d, print_arr[i].arg_type); to= process_dbl_arg(to, end, width, d, arg_type);
break; break;
} }
case 'd': case 'd':
@@ -568,7 +585,17 @@ start:
{ {
/* Integer parameter */ /* Integer parameter */
longlong larg= args_arr[print_arr[i].arg_idx].longlong_arg; longlong larg= args_arr[print_arr[i].arg_idx].longlong_arg;
my_bool suffix_e= print_arr[i].arg_type == 'M'; my_bool suffix_e= arg_type == 'M';
if (arg_type == 'u')
switch (*print_arr[idx].begin) // look at the start of the next chunk
{
case 'E':
suffix_e= TRUE;
// fall-through
case 'U': // escape
++print_arr[idx].begin; // roll forward to consume the char
break;
}
if (suffix_e) if (suffix_e)
{ {
const char *real_end; const char *real_end;
@@ -593,7 +620,7 @@ start:
length= (print_arr[i].flags & LENGTH_ARG) length= (print_arr[i].flags & LENGTH_ARG)
? (size_t)args_arr[print_arr[i].length].longlong_arg ? (size_t)args_arr[print_arr[i].length].longlong_arg
: print_arr[i].length; : print_arr[i].length;
to= process_int_arg(to, end, length, larg, print_arr[i].arg_type, to= process_int_arg(to, end, length, larg, arg_type,
print_arr[i].flags); print_arr[i].flags);
} }
break; break;
@@ -654,7 +681,7 @@ start:
size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n, size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
const char* fmt, va_list ap) const char* fmt, va_list ap)
{ {
char *start=to, *end=to+n-1; char *start=to, *end=to+n-1, arg_type;
size_t length, width; size_t length, width;
uint print_type, have_longlong; uint print_type, have_longlong;
@@ -717,14 +744,31 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
fmt= check_longlong(fmt, &have_longlong); fmt= check_longlong(fmt, &have_longlong);
switch(*fmt) { switch (arg_type= *fmt) {
case 's': case 's':
case 'T': case 'T':
case 'b': case 'b':
{ {
/* String parameter */ /* String parameter */
reg2 char *par= va_arg(ap, char *); reg2 char *par= va_arg(ap, char *);
my_bool suffix_b= *fmt == 'b', suffix_t= *fmt == 'T'; my_bool suffix_b= arg_type == 'b', suffix_t= arg_type == 'T';
switch (fmt[1]) // look-ahead (will at most land on the terminating `\0`)
{
case 'Q':
print_type|= ESCAPED_ARG;
++fmt;
break;
case 'B':
suffix_b= TRUE;
++fmt;
break;
case 'T':
suffix_t= TRUE;
// fall-through
case 'S': // escape
++fmt;
break;
}
to= (suffix_b) to= (suffix_b)
? process_bin_arg(to, end, width, par) ? process_bin_arg(to, end, width, par)
: process_str_arg(cs, to, end, (longlong) length, width, par, : process_str_arg(cs, to, end, (longlong) length, width, par,
@@ -742,7 +786,7 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
#if __has_feature(memory_sanitizer) /* QQ: MSAN has double trouble? */ #if __has_feature(memory_sanitizer) /* QQ: MSAN has double trouble? */
__msan_unpoison(&d, sizeof(double)); __msan_unpoison(&d, sizeof(double));
#endif #endif
to= process_dbl_arg(to, end, width, d, *fmt); to= process_dbl_arg(to, end, width, d, arg_type);
continue; continue;
} }
case 'd': case 'd':
@@ -756,13 +800,23 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
{ {
/* Integer parameter */ /* Integer parameter */
longlong larg; longlong larg;
my_bool suffix_e= *fmt == 'M'; my_bool suffix_e= arg_type == 'M';
if (have_longlong) if (have_longlong)
larg = va_arg(ap,longlong); larg= va_arg(ap,longlong);
else if (*fmt == 'd' || *fmt == 'i' || suffix_e) else if (arg_type == 'd' || arg_type == 'i' || suffix_e)
larg = va_arg(ap, int); larg= va_arg(ap, int);
else else
larg= va_arg(ap, uint); larg= va_arg(ap, uint);
if (arg_type == 'u')
switch (fmt[1]) // look-ahead
{
case 'E':
suffix_e= TRUE;
// fall-through
case 'U': // escape
++fmt;
break;
}
if (suffix_e) if (suffix_e)
{ {
const char *real_end= MY_MIN(to + width, end); const char *real_end= MY_MIN(to + width, end);
@@ -780,7 +834,7 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
} }
} }
else else
to= process_int_arg(to, end, length, larg, *fmt, print_type); to= process_int_arg(to, end, length, larg, arg_type, print_type);
continue; continue;
} }
case 'c': /* Character parameter */ case 'c': /* Character parameter */

View File

@@ -61,10 +61,12 @@ static void test_many(const char **res, const char *fmt, ...)
int main(void) int main(void)
{ {
plan(48); plan(62);
test1("Constant string", test1("Constant string",
"Constant string"); "Constant string");
test_w_len("small buf",
10, "small buffer");
test1("Format specifier s works", test1("Format specifier s works",
"Format specifier s %s", "works"); "Format specifier s %s", "works");
@@ -107,6 +109,49 @@ int main(void)
test1("Precision works for strings <ab...>", test1("Precision works for strings <ab...>",
"Precision works for strings <%.5T>", "abcdef!"); "Precision works for strings <%.5T>", "abcdef!");
// MDEV-21978, tests based on those for their previous incarnations
test1("MariaDB extension escape sS works",
"MariaDB extension escape sS %sS", "works");
test1("MariaDB extension escape uU 2",
"MariaDB extension escape uU %uU", 2);
test1("MariaDB extension sQ works: `abcd` `op``q`",
"MariaDB extension sQ works: %sQ %.4sQ", "abcd", "op`qrst");
{
// Copied from ::test1
const char *res= "12034";
size_t len= my_snprintf(buf, sizeof(buf)-1, "\1%.3sB\4", "\2\0\3");
// Shift the chars so they (namely `\0`) are printable
for (unsigned int i= 0; i < len; ++i)
buf[i]+= '0';
ok(strlen(res) == len && strcmp(buf, res) == 0,
"\"MariaDB extension sB works: %s\"", buf);
}
{
// Test that %uE works
const char *results[]=
{
"MariaDB extension uE works: 1 \"Operation not permitted\"", // Linux
"MariaDB extension uE works: 1 \"Not owner\"", // Solaris
NullS
};
test_many(results, "MariaDB extension uE works: %uE", 1);
}
test1("uE with 0 errno: 0 \"Internal error/check (Not system error)\"",
"uE with 0 errno: %uE", 0);
test1("uE with width: <0 \"Internal error...>",
"uE with width: <%.20uE>", 0);
test_w_len("uE with small buf: 0 \"..",
25, "uE with small buf: %uE", 0);
test1("MariaDB extension sT works: <abcd> <op...>",
"MariaDB extension sT %sT: <%.5sT> <%.5sT>", "works", "abcd", "opqrst");
test1("sT with small width: <.> <...>",
"sT with small width: <%.1sT> <%.3sT>", "abcd", "opqrst");
test1("Flag '`' (backtick) works: `abcd` `op``q` (mysql extension)", test1("Flag '`' (backtick) works: `abcd` `op``q` (mysql extension)",
"Flag '`' (backtick) works: %`s %`.4s (mysql extension)", "Flag '`' (backtick) works: %`s %`.4s (mysql extension)",
"abcd", "op`qrst"); "abcd", "op`qrst");
@@ -146,13 +191,17 @@ int main(void)
test1("Positional arguments work: on the dark side they are", test1("Positional arguments work: on the dark side they are",
"Positional arguments work: %3$s %1$s %2$s", "Positional arguments work: %3$s %1$s %2$s",
"they", "are", "on the dark side"); "they", "are", "on the dark side");
test1("Positional arguments work with sS: on the dark side they are",
"Positional arguments work with sS: %3$sS %1$sS %2$sS",
"they", "are", "on the dark side");
test1("Asterisk '*' as a width works: < 4>", test1("Asterisk '*' as a width works: < 4>",
"Asterisk '*' as a width works: <%*d>", 5, 4); "Asterisk '*' as a width works: <%*d>", 5, 4);
test1("Asterisk '*' as a precision works: <qwerty>", test1("Asterisk '*' as a precision works: <qwerty>",
"Asterisk '*' as a precision works: <%.*s>", 6, "qwertyuiop"); "Asterisk '*' as a precision works: <%.*s>", 6, "qwertyuiop");
test1("Asterisk '*' as a precision works: <qwe...>",
"Asterisk '*' as a precision works: <%.*sT>", 6, "qwertyuiop");
test1("Asterisk '*' as a precision works: <qwe...>", test1("Asterisk '*' as a precision works: <qwe...>",
"Asterisk '*' as a precision works: <%.*T>", 6, "qwertyuiop"); "Asterisk '*' as a precision works: <%.*T>", 6, "qwertyuiop");
@@ -161,7 +210,8 @@ int main(void)
test1("Positional arguments for a precision: <qwerty>", test1("Positional arguments for a precision: <qwerty>",
"Positional arguments for a precision: <%1$.*2$s>", "qwertyuiop", 6); "Positional arguments for a precision: <%1$.*2$s>", "qwertyuiop", 6);
test1("Positional arguments for a precision: <qwe...>",
"Positional arguments for a precision: <%1$.*2$sT>", "qwertyuiop", 6);
test1("Positional arguments for a precision: <qwe...>", test1("Positional arguments for a precision: <qwe...>",
"Positional arguments for a precision: <%1$.*2$T>", "qwertyuiop", 6); "Positional arguments for a precision: <%1$.*2$T>", "qwertyuiop", 6);
@@ -172,7 +222,6 @@ int main(void)
"Positional arguments octal: <%1$o>", 07777); "Positional arguments octal: <%1$o>", 07777);
/* Can't use int arguments, as they may be different size from pointers */ /* Can't use int arguments, as they may be different size from pointers */
test1("Padding and %p <0x12> <0x034> <0x0000ab> < 0xcd>", test1("Padding and %p <0x12> <0x034> <0x0000ab> < 0xcd>",
"Padding and %%p <%04p> <%05p> <%08p> <%8p>", "Padding and %%p <%04p> <%05p> <%08p> <%8p>",
(void*) 0x12, (void*) 0x34, (void*) 0xab, (void*) 0xcd); (void*) 0x12, (void*) 0x34, (void*) 0xab, (void*) 0xcd);