1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

fix(ubsan): MCOL-5844 - iron out UBSAN reports

The most important fix here is the fix of possible buffer overrun in
DATEFORMAT() function. A "%W" format, repeated enough times, would
overflow the 256-bytes buffer for result. Now we use ostringstream to
construct result and we are safe.

Changes in date/time projection functions made me fix difference between
us and server behavior. The new, better behavior is reflected in changes
in tests' results.

Also, there was incorrect logic in TRUNCATE() and ROUND() functions in
computing the decimal "shift."
This commit is contained in:
Serguey Zefirov
2024-11-21 14:52:01 +03:00
committed by Leonid Fedorov
parent 3bcc2e2fda
commit 39a976c39a
26 changed files with 207 additions and 133 deletions

View File

@ -38,20 +38,21 @@ namespace funcexp
{
namespace helpers
{
const string IDB_date_format(const DateTime& dt, const string& format)
const string IDB_date_format(const DateTime& dt, const string& format, bool& isNull)
{
// assume 256 is enough. assume not allowing incomplete date
// XXX: imagine %W gets repeated 60 times and day of week is "wednesday"..
std::ostringstream oss;
char buf[256];
char* ptr = buf;
uint32_t weekday = 0;
uint32_t dayval = 0;
uint32_t weekval = 0;
uint32_t weekyear = 0;
for (uint32_t i = 0; i < format.length(); i++)
for (uint32_t i = 0; !isNull && i < format.length(); i++)
{
if (format[i] != '%')
*ptr++ = format[i];
oss << format[i];
else
{
i++;
@ -59,176 +60,170 @@ const string IDB_date_format(const DateTime& dt, const string& format)
switch (format[i])
{
case 'M':
sprintf(ptr, "%s", helpers::monthFullNames[dt.month].c_str());
ptr += helpers::monthFullNames[dt.month].length();
oss << helpers::monthFullNames[dt.month];
break;
case 'b':
sprintf(ptr, "%s", helpers::monthAbNames[dt.month].c_str());
ptr += helpers::monthAbNames[dt.month].length();
oss << helpers::monthAbNames[dt.month].c_str();
break;
case 'W':
weekday = helpers::calc_mysql_weekday(dt.year, dt.month, dt.day, false);
sprintf(ptr, "%s", helpers::weekdayFullNames[weekday].c_str());
ptr += helpers::weekdayFullNames[weekday].length();
weekday = helpers::calc_mysql_weekday(dt.year, dt.month, dt.day, false, isNull);
oss << helpers::weekdayFullNames[weekday];
break;
case 'w':
weekday = helpers::calc_mysql_weekday(dt.year, dt.month, dt.day, true);
sprintf(ptr, "%01d", weekday);
ptr += 1;
weekday = helpers::calc_mysql_weekday(dt.year, dt.month, dt.day, true, isNull);
sprintf(buf, "%01d", weekday);
oss << buf;
break;
case 'a':
weekday = helpers::calc_mysql_weekday(dt.year, dt.month, dt.day, false);
sprintf(ptr, "%s", helpers::weekdayAbNames[weekday].c_str());
ptr += helpers::weekdayAbNames[weekday].length();
weekday = helpers::calc_mysql_weekday(dt.year, dt.month, dt.day, false, isNull);
oss << helpers::weekdayAbNames[weekday];
break;
case 'D':
sprintf(ptr, "%s", helpers::dayOfMonth[dt.day].c_str());
ptr += helpers::dayOfMonth[dt.day].length();
oss << helpers::dayOfMonth[dt.day].c_str();
break;
case 'Y':
sprintf(ptr, "%04d", dt.year);
ptr += 4;
sprintf(buf, "%04d", dt.year);
oss << buf;
break;
case 'y':
sprintf(ptr, "%02d", dt.year % 100);
ptr += 2;
sprintf(buf, "%02d", dt.year % 100);
oss << buf;
break;
case 'm':
sprintf(ptr, "%02d", dt.month);
ptr += 2;
sprintf(buf, "%02d", dt.month);
oss << buf;
break;
case 'c':
sprintf(ptr, "%d", dt.month);
ptr = ptr + (dt.month >= 10 ? 2 : 1);
sprintf(buf, "%d", dt.month);
oss << buf;
break;
case 'd':
sprintf(ptr, "%02d", dt.day);
ptr += 2;
sprintf(buf, "%02d", dt.day);
oss << buf;
break;
case 'e':
sprintf(ptr, "%d", dt.day);
ptr = ptr + (dt.day >= 10 ? 2 : 1);
sprintf(buf, "%d", dt.day);
oss << buf;
break;
case 'f':
sprintf(ptr, "%06d", dt.msecond);
ptr += 6;
sprintf(buf, "%06d", dt.msecond);
oss << buf;
break;
case 'H':
sprintf(ptr, "%02d", dt.hour);
ptr += 2;
sprintf(buf, "%02d", dt.hour);
oss << buf;
break;
case 'h':
case 'I':
sprintf(ptr, "%02d", (dt.hour % 24 + 11) % 12 + 1);
ptr += 2;
sprintf(buf, "%02d", (dt.hour % 24 + 11) % 12 + 1);
oss << buf;
break;
case 'i': /* minutes */
sprintf(ptr, "%02d", dt.minute);
ptr += 2;
sprintf(buf, "%02d", dt.minute);
oss << buf;
break;
case 'j':
dayval = helpers::calc_mysql_daynr(dt.year, dt.month, dt.day) -
helpers::calc_mysql_daynr(dt.year, 1, 1) + 1;
sprintf(ptr, "%03d", dayval);
ptr += 3;
sprintf(buf, "%03d", dayval);
oss << buf;
break;
case 'k':
sprintf(ptr, "%d", dt.hour);
ptr += (dt.hour >= 10 ? 2 : 1);
sprintf(buf, "%d", dt.hour);
oss << buf;
break;
case 'l':
sprintf(ptr, "%d", (dt.hour % 24 + 11) % 12 + 1);
ptr += ((dt.hour % 24 + 11) % 12 + 1 >= 10 ? 2 : 1);
sprintf(buf, "%d", (dt.hour % 24 + 11) % 12 + 1);
oss << buf;
break;
case 'p':
sprintf(ptr, "%s", (dt.hour % 24 < 12 ? "AM" : "PM"));
ptr += 2;
sprintf(buf, "%s", (dt.hour % 24 < 12 ? "AM" : "PM"));
oss << buf;
break;
case 'r':
sprintf(ptr, (dt.hour % 24 < 12 ? "%02d:%02d:%02d AM" : "%02d:%02d:%02d PM"),
sprintf(buf, (dt.hour % 24 < 12 ? "%02d:%02d:%02d AM" : "%02d:%02d:%02d PM"),
(dt.hour + 11) % 12 + 1, dt.minute, dt.second);
ptr += 11;
oss << buf;
break;
case 'S':
case 's':
sprintf(ptr, "%02d", dt.second);
ptr += 2;
sprintf(buf, "%02d", dt.second);
oss << buf;
break;
case 'T':
sprintf(ptr, "%02d:%02d:%02d", dt.hour, dt.minute, dt.second);
ptr += 8;
sprintf(buf, "%02d:%02d:%02d", dt.hour, dt.minute, dt.second);
oss << buf;
break;
case 'U':
weekval = helpers::calc_mysql_week(dt.year, dt.month, dt.day, 0);
sprintf(ptr, "%02d", weekval);
ptr += 2;
sprintf(buf, "%02d", weekval);
oss << buf;
break;
case 'V':
weekval = helpers::calc_mysql_week(dt.year, dt.month, dt.day, helpers::WEEK_NO_ZERO);
sprintf(ptr, "%02d", weekval);
ptr += 2;
sprintf(buf, "%02d", weekval);
oss << buf;
break;
case 'u':
weekval = helpers::calc_mysql_week(dt.year, dt.month, dt.day,
helpers::WEEK_MONDAY_FIRST | helpers::WEEK_GT_THREE_DAYS);
sprintf(ptr, "%02d", weekval);
ptr += 2;
sprintf(buf, "%02d", weekval);
oss << buf;
break;
case 'v':
weekval = helpers::calc_mysql_week(
dt.year, dt.month, dt.day,
helpers::WEEK_NO_ZERO | helpers::WEEK_MONDAY_FIRST | helpers::WEEK_GT_THREE_DAYS);
sprintf(ptr, "%02d", weekval);
ptr += 2;
sprintf(buf, "%02d", weekval);
oss << buf;
break;
case 'x':
helpers::calc_mysql_week(
dt.year, dt.month, dt.day,
helpers::WEEK_NO_ZERO | helpers::WEEK_MONDAY_FIRST | helpers::WEEK_GT_THREE_DAYS, &weekyear);
sprintf(ptr, "%04d", weekyear);
ptr += 4;
sprintf(buf, "%04d", weekyear);
oss << buf;
break;
case 'X':
helpers::calc_mysql_week(dt.year, dt.month, dt.day, helpers::WEEK_NO_ZERO, &weekyear);
sprintf(ptr, "%04d", weekyear);
ptr += 4;
sprintf(buf, "%04d", weekyear);
oss << buf;
break;
default: *ptr++ = format[i];
default: oss << format[i];
}
}
}
*ptr = 0;
return string(buf);
return oss.str();
}
} // namespace helpers
@ -394,7 +389,7 @@ string Func_date_format::getStrVal(rowgroup::Row& row, FunctionParm& parm, bool&
const string& format = parm[1]->data()->getStrVal(row, isNull).safeString("");
return helpers::IDB_date_format(dt, format);
return helpers::IDB_date_format(dt, format, isNull);
}
int32_t Func_date_format::getDateIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,

View File

@ -163,7 +163,7 @@ int64_t Func_dayname::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& is
default: isNull = true; return -1;
}
dayofweek = helpers::calc_mysql_weekday(year, month, day, false);
dayofweek = helpers::calc_mysql_weekday(year, month, day, false, isNull);
return dayofweek;
}

View File

@ -170,7 +170,7 @@ int64_t Func_dayofweek::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool&
default: isNull = true; return -1;
}
return helpers::calc_mysql_weekday(year, month, day, true) + 1;
return helpers::calc_mysql_weekday(year, month, day, true, isNull) + 1;
}
} // namespace funcexp

View File

@ -156,6 +156,11 @@ int64_t Func_dayofyear::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool&
default: isNull = true; return -1;
}
if (year == 0 && month == 0 && day == 0)
{
isNull = true;
return 0;
}
return helpers::calc_mysql_daynr(year, month, day) - helpers::calc_mysql_daynr(year, 1, 1) + 1;
}

View File

@ -109,11 +109,11 @@ long long dateGet(uint64_t time, IntervalColumn::interval_type unit, bool dateTy
long long timeGet(uint64_t time, IntervalColumn::interval_type unit)
{
int32_t hour = 0, min = 0, sec = 0, msec = 0, day = 0;
int64_t hour = 0, min = 0, sec = 0, msec = 0, day = 0;
min = (int32_t)((time >> 32) & 0xff);
sec = (int32_t)((time >> 24) & 0xff);
msec = (int32_t)((time & 0xfffff));
min = (int64_t)((time >> 32) & 0xff);
sec = (int64_t)((time >> 24) & 0xff);
msec = (int64_t)((time & 0xfffff));
// If negative, mask so it doesn't turn positive
int64_t mask = 0;

View File

@ -125,7 +125,7 @@ string Func_from_unixtime::getStrVal(rowgroup::Row& row, FunctionParm& parm, boo
if (parm.size() == 2)
{
const auto& format = parm[1]->data()->getStrVal(row, isNull);
return helpers::IDB_date_format(dt, format.safeString(""));
return helpers::IDB_date_format(dt, format.safeString(""), isNull);
}
char buf[256] = {0};

View File

@ -389,19 +389,36 @@ IDB_Decimal Func_round::getDecimalVal(Row& row, FunctionParm& parm, bool& isNull
//@Bug 3101 - GCC 4.5.1 optimizes too aggressively here. Mark as volatile.
volatile int128_t p = 1;
if (isNull)
break;
if (!isNull && parm.size() > 1) // round(X, D)
{
int128_t nvp = p;
d = parm[1]->data()->getIntVal(row, isNull);
if (!isNull)
helpers::decimalPlaceDec(d, nvp, decimal.scale);
if (isNull)
break;
int64_t expectedScale = decimal.scale - d;
// prevent overflow.
if (expectedScale > datatypes::INT128MAXPRECISION)
{
decimal.s128Value = 0;
break;
}
// also do not allow for incorrect behavior due to underflow.
if (expectedScale < 0)
{
d += expectedScale;
}
helpers::decimalPlaceDec(d, nvp, decimal.scale);
p = nvp;
}
if (isNull)
break;
if (d < -datatypes::INT128MAXPRECISION)
{

View File

@ -137,7 +137,8 @@ string Func_timediff::getStrVal(rowgroup::Row& row, FunctionParm& parm, bool& is
isNull = true;
break;
}
val1 = parm[0]->data()->getDatetimeIntVal(row, isNull);
val1 = isTime1 ? parm[0]->data()->getTimeIntVal(row, isNull)
: parm[0]->data()->getDatetimeIntVal(row, isNull);
break;
case execplan::CalpontSystemCatalog::TIMESTAMP:
@ -225,7 +226,8 @@ string Func_timediff::getStrVal(rowgroup::Row& row, FunctionParm& parm, bool& is
isTime2 = true;
/* fall through */
case execplan::CalpontSystemCatalog::DATETIME:
val2 = parm[1]->data()->getDatetimeIntVal(row, isNull);
val2 = isTime2 ? parm[1]->data()->getTimeIntVal(row, isNull)
: parm[1]->data()->getDatetimeIntVal(row, isNull);
break;
case execplan::CalpontSystemCatalog::TIMESTAMP:

View File

@ -315,25 +315,25 @@ IDB_Decimal Func_truncate::getDecimalVal(Row& row, FunctionParm& parm, bool& isN
int64_t d = 0;
decimal = parm[0]->data()->getDecimalVal(row, isNull);
if (isNull)
{
break;
}
if (!op_ct.isWideDecimalType())
{
//@Bug 3101 - GCC 4.5.1 optimizes too aggressively here. Mark as volatile.
volatile int64_t p = 1;
if (!isNull)
{
int64_t nvp = p;
d = parm[1]->data()->getIntVal(row, isNull);
if (!isNull)
helpers::decimalPlaceDec(d, nvp, decimal.scale);
p = nvp;
}
int64_t nvp = p;
d = parm[1]->data()->getIntVal(row, isNull);
if (isNull)
break;
helpers::decimalPlaceDec(d, nvp, decimal.scale);
p = nvp;
int64_t x = decimal.value;
if (d > 0)
@ -371,20 +371,33 @@ IDB_Decimal Func_truncate::getDecimalVal(Row& row, FunctionParm& parm, bool& isN
//@Bug 3101 - GCC 4.5.1 optimizes too aggressively here. Mark as volatile.
volatile int128_t p = 1;
if (isNull)
break;
if (!isNull)
{
int128_t nvp = p;
d = parm[1]->data()->getIntVal(row, isNull);
if (!isNull)
helpers::decimalPlaceDec(d, nvp, decimal.scale);
int64_t expectedScale = decimal.scale - d;
// prevent overflow.
if (expectedScale > datatypes::INT128MAXPRECISION)
{
decimal.s128Value = 0;
break;
}
// also do not allow for incorrect behavior due to underflow.
if (expectedScale < 0)
{
d += expectedScale;
}
helpers::decimalPlaceDec(d, nvp, decimal.scale);
p = nvp;
}
if (isNull)
break;
if (d < -datatypes::INT128MAXPRECISION)
{
decimal.s128Value = 0;

View File

@ -160,7 +160,7 @@ int64_t Func_weekday::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& is
default: isNull = true; return -1;
}
return helpers::calc_mysql_weekday(year, month, day, false);
return helpers::calc_mysql_weekday(year, month, day, false, isNull);
}
} // namespace funcexp

View File

@ -125,7 +125,7 @@ inline uint32_t calc_mysql_daynr(uint32_t year, uint32_t month, uint32_t day)
int y = year;
long delsum;
if (!dataconvert::isDateValid(day, month, year))
if (!dataconvert::isDateValid(day, month, year) || (day == 0 && month == 0 && year == 0))
return 0;
delsum = (long)(365 * y + 31 * ((int)month - 1) + (int)day);
@ -204,10 +204,13 @@ inline void get_date_from_mysql_daynr(long daynr, dataconvert::DateTime& dateTim
// else:
// 0 = Monday, 1 = Tuesday, ..., 6 = Sunday
// This is a mirror of calc_weekday, at a later date we should use sql_time.h
inline uint32_t calc_mysql_weekday(uint32_t year, uint32_t month, uint32_t day, bool sundayFirst)
inline uint32_t calc_mysql_weekday(uint32_t year, uint32_t month, uint32_t day, bool sundayFirst, bool& isNull)
{
if (!dataconvert::isDateValid(day, month, year))
if (!dataconvert::isDateValid(day, month, year) || (day == 0 && month == 0 && year == 0))
{
isNull = true;
return 0;
}
uint32_t daynr = calc_mysql_daynr(year, month, day);
return ((int)((daynr + 5L + (sundayFirst ? 1L : 0L)) % 7));
@ -252,7 +255,8 @@ inline uint32_t calc_mysql_week(uint32_t year, uint32_t month, uint32_t day, int
bool week_year = modeflags & WEEK_NO_ZERO;
bool first_weekday = modeflags & WEEK_GT_THREE_DAYS;
uint32_t weekday = calc_mysql_weekday(year, 1, 1, !monday_first);
bool isNullDummy = false;
uint32_t weekday = calc_mysql_weekday(year, 1, 1, !monday_first, isNullDummy);
if (weekyear)
{
@ -351,7 +355,7 @@ inline bool calc_time_diff(int64_t time1, int64_t time2, int l_sign, long long*
days -= l_sign * calc_mysql_daynr(year2, month2, day2);
microseconds = ((long long)days * (long)(86400) + (long long)(hour1 * 3600L + min1 * 60L + sec1) -
microseconds = (int128_t(days) * (86400) + (long long)(hour1 * 3600L + min1 * 60L + sec1) -
l_sign * (long long)(hour2 * 3600L + min2 * 60L + sec2)) *
(long long)(1000000) +
(long long)msec1 - l_sign * (long long)msec2;
@ -683,7 +687,7 @@ inline string longDoubleToString(long double ld)
uint64_t dateAdd(uint64_t time, const std::string& expr, execplan::IntervalColumn::interval_type unit,
bool dateType, execplan::OpType funcType);
const std::string IDB_date_format(const dataconvert::DateTime&, const std::string&);
const std::string IDB_date_format(const dataconvert::DateTime&, const std::string&, bool& isNull);
const std::string timediff(int64_t, int64_t, bool isDateTime = true);
const char* convNumToStr(int64_t, char*, int);

View File

@ -275,8 +275,9 @@ class FuncExpTest : public CppUnit::TestFixture
for (unsigned i = 0; i < sizeof(date_tests) / sizeof(DateCheck); i++)
{
boost::gregorian::date d(date_tests[i].date.year, date_tests[i].date.month, date_tests[i].date.day);
bool isNullDummy = false;
uint32_t dayofweek = helpers::calc_mysql_weekday(date_tests[i].date.year, date_tests[i].date.month,
date_tests[i].date.day, false);
date_tests[i].date.day, false, isNullDummy);
bool check = (strcmp(helpers::weekdayFullNames[dayofweek].c_str(), date_tests[i].dayname) == 0);

View File

@ -69,7 +69,8 @@ class TimeExtractor
uint32_t yearfirst = helpers::calc_mysql_daynr(dateTime.year, 1, 1);
// figure out which day of week Jan-01 is
uint32_t firstweekday = helpers::calc_mysql_weekday(dateTime.year, 1, 1, sundayFirst);
bool isNullDummy = false;
uint32_t firstweekday = helpers::calc_mysql_weekday(dateTime.year, 1, 1, sundayFirst, isNullDummy);
// calculate the offset to the first week starting day
uint32_t firstoffset = firstweekday ? (7 - firstweekday) : 0;

View File

@ -42,6 +42,8 @@ using namespace config;
#include "installdir.h"
#include "format.h"
#include "mcs_int128.h"
namespace
{
boost::mutex mx;
@ -127,6 +129,11 @@ void Message::Args::add(uint64_t u64)
fArgs.push_back(u64);
}
void Message::Args::add(int128_t i128)
{
fArgs.push_back(datatypes::TSInt128(i128).toString());
}
void Message::Args::add(const string& s)
{
fArgs.push_back(s);

View File

@ -65,6 +65,14 @@ class Message
*/
void add(uint64_t i);
/* define types to not to include mcs_numeric_limits.h */
using int128_t = __int128;
using uint128_t = unsigned __int128;
/** @brief add an 128 bit int arg to the message
*/
void add(int128_t i128);
/** @brief add a float arg to the message
*/
void add(double d);