1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-21978 Add %sQ, %sB, %uE & %sT to my_vsnprintf

This is the first part of
MDEV-21978 make my_vsnprintf to use gcc-compatible format extensions,
which adds these alternatives to the MySQL extensions.
There’s also the escapes `%sS` & `%uU` for any hippies needing them.
These suffixes are compatible with the C standard and
therefore as well as `printf` tools such as GCC checks.

The old extension formats (e.g., `%M`) are now effectively deprecated,
although they’re left intact for now. For a more sequential
MDEV-21978 process, a separate commit will delete them after we migrate
all `my_vsnprintf` usages to the new preferred syntax. The service’s
major version bumped nonetheless for the new significance of suffixes.

[Breaking] This commit may fail
* on places needing the aforementioned escapes
* because of the major version bump

Reviewed-by: Andrew Hutchings <andrew@mariadb.org>
Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com>
This commit is contained in:
ParadoxV5
2024-07-25 21:57:06 -06:00
committed by Sergei Golubchik
parent 06851e7f77
commit b668a960cd
4 changed files with 143 additions and 39 deletions

View File

@@ -39,40 +39,41 @@
@post
The syntax of a format string is generally the same:
% <flag> <width> <precision> <length modifier> <format>
% <flag> <width> <.precision> <length modifier> <format> <format extension>
where everything but the format is optional.
Three one-character flags are recognized:
Two one-character flags are recognized:
'0' has the standard zero-padding semantics;
'-' is parsed, but silently ignored;
'`' (backtick) is only supported for strings (%s) and means that the
string will be quoted according to MySQL identifier quoting rules.
Both <width> and <precision> can be specified as numbers or '*'.
If an asterisk is used, an argument of type int is consumed.
<length modifier> can be 'l', 'll', or 'z'.
Supported formats are 's' (null pointer is accepted, printed as
"(null)"), 'b' (extension, see below), 'c', 'd', 'i', 'u', 'x', 'o',
'X', 'p' (works as 0x%x), 'f', 'g', 'M' (extension, see below),
'T' (extension, see below).
Supported formats are 's' (null pointer is accepted, printed as "(null)"),
'c', 'd', 'i', 'u', 'x', 'X', 'o', 'p' (works as 0x%x), 'f', and 'g'.
Standard syntax for positional arguments $n is supported.
Extensions:
Format extensions:
Flag '`' (backtick): see above.
Format 'sQ': quotes the string according to MySQL identifier quoting rules.
Format 'b': binary buffer, prints exactly <precision> bytes from the
argument, without stopping at '\0'.
Format 'sB': binary buffer, prints exactly <precision> bytes from the
argument, without stopping at '\0'. The behavior for unspecified <precision>
is not yet defined.
Format 'M': takes one integer, prints this integer, space, double quote
error message, double quote. In other words
printf("%M", n) === printf("%d \"%s\"", n, strerror(n))
Format 'uE': takes one integer, prints this integer, space, double quote,
error message corresponding to this integer errno, double quote. In other words:
printf("%uE", n) === printf("%d \"%sT\"", n, strerror(n))
Format 'T': takes string and print it like s but if the strints should be
Format 'sT': takes string and print it like s but if the strings should be
truncated puts "..." at the end.
Format 'sS' and 'uU': prints synonymously as s and u respectively. These two
escape simple s and u from consuming the following plain text as one of the
above extension suffixes; for example, "Data size: %uUEiB".
*/
#ifdef __cplusplus

View File

@@ -32,7 +32,7 @@
#define VERSION_my_print_error 0x0100
#define VERSION_my_sha1 0x0101
#define VERSION_my_sha2 0x0100
#define VERSION_my_snprintf 0x0100
#define VERSION_my_snprintf 0x0200
#define VERSION_progress_report 0x0100
#define VERSION_thd_alloc 0x0200
#define VERSION_thd_autoinc 0x0100

View File

@@ -234,7 +234,7 @@ static char *process_str_arg(CHARSET_INFO *cs, char *to, const char *end,
size_t length;
/*
The sign of the length argument specific the string should be right
The sign of the length argument specify whether the string should be right
or left adjusted
*/
if (length_arg < 0)
@@ -477,12 +477,13 @@ start:
if (!*fmt) /* End of format string */
{
char arg_type;
uint i;
print_arr[idx].end= fmt;
/* Obtain parameters from the list */
for (i= 0 ; i < arg_count; i++)
{
switch (args_arr[i].arg_type) {
switch (arg_type= args_arr[i].arg_type) {
case 's':
case 'b':
case 'T':
@@ -501,7 +502,7 @@ start:
case 'p':
if (args_arr[i].have_longlong)
args_arr[i].longlong_arg= va_arg(ap,longlong);
else if (args_arr[i].arg_type == 'd' || args_arr[i].arg_type == 'i')
else if (arg_type == 'd' || arg_type == 'i')
args_arr[i].longlong_arg= va_arg(ap, int);
else
args_arr[i].longlong_arg= va_arg(ap, uint);
@@ -518,14 +519,30 @@ start:
for (i= 0; i <= idx; i++)
{
size_t width= 0, length= 0;
switch (print_arr[i].arg_type) {
switch (arg_type= print_arr[i].arg_type) {
case 's':
case 'T':
case 'b':
{
char *par= args_arr[print_arr[i].arg_idx].str_arg;
my_bool suffix_b= print_arr[i].arg_type == 'b';
my_bool suffix_t= print_arr[i].arg_type == 'T';
my_bool suffix_b= arg_type == 'b', suffix_t= arg_type == 'T';
switch (*print_arr[i].begin) // look at the start of the next chunk
{
case 'Q':
print_arr[i].flags|= ESCAPED_ARG;
++print_arr[i].begin;
break;
case 'B':
suffix_b= TRUE;
++print_arr[i].begin;
break;
case 'T':
suffix_t= TRUE;
// fall-through
case 'S': // escape
++print_arr[i].begin; // roll forward to consume the char
break;
}
width= (print_arr[i].flags & WIDTH_ARG)
? (size_t)args_arr[print_arr[i].width].longlong_arg
: print_arr[i].width;
@@ -554,7 +571,7 @@ start:
double d= args_arr[print_arr[i].arg_idx].double_arg;
width= (print_arr[i].flags & WIDTH_ARG) ?
(uint)args_arr[print_arr[i].width].longlong_arg : print_arr[i].width;
to= process_dbl_arg(to, end, width, d, print_arr[i].arg_type);
to= process_dbl_arg(to, end, width, d, arg_type);
break;
}
case 'd':
@@ -568,7 +585,17 @@ start:
{
/* Integer parameter */
longlong larg= args_arr[print_arr[i].arg_idx].longlong_arg;
my_bool suffix_e= print_arr[i].arg_type == 'M';
my_bool suffix_e= arg_type == 'M';
if (arg_type == 'u')
switch (*print_arr[idx].begin) // look at the start of the next chunk
{
case 'E':
suffix_e= TRUE;
// fall-through
case 'U': // escape
++print_arr[idx].begin; // roll forward to consume the char
break;
}
if (suffix_e)
{
const char *real_end;
@@ -593,7 +620,7 @@ start:
length= (print_arr[i].flags & LENGTH_ARG)
? (size_t)args_arr[print_arr[i].length].longlong_arg
: print_arr[i].length;
to= process_int_arg(to, end, length, larg, print_arr[i].arg_type,
to= process_int_arg(to, end, length, larg, arg_type,
print_arr[i].flags);
}
break;
@@ -654,7 +681,7 @@ start:
size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
const char* fmt, va_list ap)
{
char *start=to, *end=to+n-1;
char *start=to, *end=to+n-1, arg_type;
size_t length, width;
uint print_type, have_longlong;
@@ -717,14 +744,31 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
fmt= check_longlong(fmt, &have_longlong);
switch(*fmt) {
switch (arg_type= *fmt) {
case 's':
case 'T':
case 'b':
{
/* String parameter */
reg2 char *par= va_arg(ap, char *);
my_bool suffix_b= *fmt == 'b', suffix_t= *fmt == 'T';
my_bool suffix_b= arg_type == 'b', suffix_t= arg_type == 'T';
switch (fmt[1]) // look-ahead (will at most land on the terminating `\0`)
{
case 'Q':
print_type|= ESCAPED_ARG;
++fmt;
break;
case 'B':
suffix_b= TRUE;
++fmt;
break;
case 'T':
suffix_t= TRUE;
// fall-through
case 'S': // escape
++fmt;
break;
}
to= (suffix_b)
? process_bin_arg(to, end, width, par)
: process_str_arg(cs, to, end, (longlong) length, width, par,
@@ -742,7 +786,7 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
#if __has_feature(memory_sanitizer) /* QQ: MSAN has double trouble? */
__msan_unpoison(&d, sizeof(double));
#endif
to= process_dbl_arg(to, end, width, d, *fmt);
to= process_dbl_arg(to, end, width, d, arg_type);
continue;
}
case 'd':
@@ -756,13 +800,23 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
{
/* Integer parameter */
longlong larg;
my_bool suffix_e= *fmt == 'M';
my_bool suffix_e= arg_type == 'M';
if (have_longlong)
larg = va_arg(ap,longlong);
else if (*fmt == 'd' || *fmt == 'i' || suffix_e)
larg = va_arg(ap, int);
larg= va_arg(ap,longlong);
else if (arg_type == 'd' || arg_type == 'i' || suffix_e)
larg= va_arg(ap, int);
else
larg= va_arg(ap, uint);
if (arg_type == 'u')
switch (fmt[1]) // look-ahead
{
case 'E':
suffix_e= TRUE;
// fall-through
case 'U': // escape
++fmt;
break;
}
if (suffix_e)
{
const char *real_end= MY_MIN(to + width, end);
@@ -780,7 +834,7 @@ size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
}
}
else
to= process_int_arg(to, end, length, larg, *fmt, print_type);
to= process_int_arg(to, end, length, larg, arg_type, print_type);
continue;
}
case 'c': /* Character parameter */

View File

@@ -61,10 +61,12 @@ static void test_many(const char **res, const char *fmt, ...)
int main(void)
{
plan(48);
plan(62);
test1("Constant string",
"Constant string");
test_w_len("small buf",
10, "small buffer");
test1("Format specifier s works",
"Format specifier s %s", "works");
@@ -107,6 +109,49 @@ int main(void)
test1("Precision works for strings <ab...>",
"Precision works for strings <%.5T>", "abcdef!");
// MDEV-21978, tests based on those for their previous incarnations
test1("MariaDB extension escape sS works",
"MariaDB extension escape sS %sS", "works");
test1("MariaDB extension escape uU 2",
"MariaDB extension escape uU %uU", 2);
test1("MariaDB extension sQ works: `abcd` `op``q`",
"MariaDB extension sQ works: %sQ %.4sQ", "abcd", "op`qrst");
{
// Copied from ::test1
const char *res= "12034";
size_t len= my_snprintf(buf, sizeof(buf)-1, "\1%.3sB\4", "\2\0\3");
// Shift the chars so they (namely `\0`) are printable
for (unsigned int i= 0; i < len; ++i)
buf[i]+= '0';
ok(strlen(res) == len && strcmp(buf, res) == 0,
"\"MariaDB extension sB works: %s\"", buf);
}
{
// Test that %uE works
const char *results[]=
{
"MariaDB extension uE works: 1 \"Operation not permitted\"", // Linux
"MariaDB extension uE works: 1 \"Not owner\"", // Solaris
NullS
};
test_many(results, "MariaDB extension uE works: %uE", 1);
}
test1("uE with 0 errno: 0 \"Internal error/check (Not system error)\"",
"uE with 0 errno: %uE", 0);
test1("uE with width: <0 \"Internal error...>",
"uE with width: <%.20uE>", 0);
test_w_len("uE with small buf: 0 \"..",
25, "uE with small buf: %uE", 0);
test1("MariaDB extension sT works: <abcd> <op...>",
"MariaDB extension sT %sT: <%.5sT> <%.5sT>", "works", "abcd", "opqrst");
test1("sT with small width: <.> <...>",
"sT with small width: <%.1sT> <%.3sT>", "abcd", "opqrst");
test1("Flag '`' (backtick) works: `abcd` `op``q` (mysql extension)",
"Flag '`' (backtick) works: %`s %`.4s (mysql extension)",
"abcd", "op`qrst");
@@ -146,13 +191,17 @@ int main(void)
test1("Positional arguments work: on the dark side they are",
"Positional arguments work: %3$s %1$s %2$s",
"they", "are", "on the dark side");
test1("Positional arguments work with sS: on the dark side they are",
"Positional arguments work with sS: %3$sS %1$sS %2$sS",
"they", "are", "on the dark side");
test1("Asterisk '*' as a width works: < 4>",
"Asterisk '*' as a width works: <%*d>", 5, 4);
test1("Asterisk '*' as a precision works: <qwerty>",
"Asterisk '*' as a precision works: <%.*s>", 6, "qwertyuiop");
test1("Asterisk '*' as a precision works: <qwe...>",
"Asterisk '*' as a precision works: <%.*sT>", 6, "qwertyuiop");
test1("Asterisk '*' as a precision works: <qwe...>",
"Asterisk '*' as a precision works: <%.*T>", 6, "qwertyuiop");
@@ -161,7 +210,8 @@ int main(void)
test1("Positional arguments for a precision: <qwerty>",
"Positional arguments for a precision: <%1$.*2$s>", "qwertyuiop", 6);
test1("Positional arguments for a precision: <qwe...>",
"Positional arguments for a precision: <%1$.*2$sT>", "qwertyuiop", 6);
test1("Positional arguments for a precision: <qwe...>",
"Positional arguments for a precision: <%1$.*2$T>", "qwertyuiop", 6);
@@ -172,7 +222,6 @@ int main(void)
"Positional arguments octal: <%1$o>", 07777);
/* Can't use int arguments, as they may be different size from pointers */
test1("Padding and %p <0x12> <0x034> <0x0000ab> < 0xcd>",
"Padding and %%p <%04p> <%05p> <%08p> <%8p>",
(void*) 0x12, (void*) 0x34, (void*) 0xab, (void*) 0xcd);