1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-23 07:05:36 +03:00
Sergey Zefirov b53c231ca6 MCOL-271 empty strings should not be NULLs (#2794)
This patch improves handling of NULLs in textual fields in ColumnStore.
Previously empty strings were considered NULLs and it could be a problem
if data scheme allows for empty strings. It was also one of major
reasons of behavior difference between ColumnStore and other engines in
MariaDB family.

Also, this patch fixes some other bugs and incorrect behavior, for
example, incorrect comparison for "column <= ''" which evaluates to
constant True for all purposes before this patch.
2023-03-30 21:18:29 +03:00

652 lines
14 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/****************************************************************************
* $Id$
*
*
****************************************************************************/
#include <cstdlib>
#include <string>
using namespace std;
#include "functor_str.h"
#include "funchelpers.h"
#include "functioncolumn.h"
using namespace execplan;
#include "sha.h"
namespace funcexp
{
/*
* class SHA1 definition
*
* Copyright (C) 1998, 2009
* Paul E. Jones <paulej@packetizer.com>
* All Rights Reserved.
*
* Description:
* This class implements the Secure Hashing Standard as defined
* in FIPS PUB 180-1 published April 17, 1995.
*
* The Secure Hashing Standard, which uses the Secure Hashing
* Algorithm (SHA), produces a 160-bit message digest for a
* given data stream. In theory, it is highly improbable that
* two messages will produce the same message digest. Therefore,
* this algorithm can serve as a means of providing a "fingerprint"
* for a message.
*
* Portability Issues:
* SHA-1 is defined in terms of 32-bit "words". This code was
* written with the expectation that the processor has at least
* a 32-bit machine word size. If the machine word size is larger,
* the code should still function properly. One caveat to that
* is that the input functions taking characters and character arrays
* assume that only 8 bits of information are stored in each character.
*
* Caveats:
* SHA-1 is designed to work with messages less than 2^64 bits long.
* Although SHA-1 allows a message digest to be generated for
* messages of any number of bits less than 2^64, this implementation
* only works with messages with a length that is a multiple of 8
* bits.
*
*/
/*
* SHA1
*
* Description:
* This is the constructor for the sha1 class.
*
* Parameters:
* None.
*
* Returns:
* Nothing.
*
* Comments:
*
*/
SHA1::SHA1()
{
Reset();
}
/*
* ~SHA1
*
* Description:
* This is the destructor for the sha1 class
*
* Parameters:
* None.
*
* Returns:
* Nothing.
*
* Comments:
*
*/
SHA1::~SHA1()
{
// The destructor does nothing
}
/*
* Reset
*
* Description:
* This function will initialize the sha1 class member variables
* in preparation for computing a new message digest.
*
* Parameters:
* None.
*
* Returns:
* Nothing.
*
* Comments:
*
*/
void SHA1::Reset()
{
Length_Low = 0;
Length_High = 0;
Message_Block_Index = 0;
H[0] = 0x67452301;
H[1] = 0xEFCDAB89;
H[2] = 0x98BADCFE;
H[3] = 0x10325476;
H[4] = 0xC3D2E1F0;
Computed = false;
Corrupted = false;
}
/*
* Result
*
* Description:
* This function will return the 160-bit message digest into the
* array provided.
*
* Parameters:
* message_digest_array: [out]
* This is an array of five unsigned integers which will be filled
* with the message digest that has been computed.
*
* Returns:
* True if successful, false if it failed.
*
* Comments:
*
*/
bool SHA1::Result(unsigned* message_digest_array)
{
int i; // Counter
if (Corrupted)
{
return false;
}
if (!Computed)
{
PadMessage();
Computed = true;
}
for (i = 0; i < 5; i++)
{
message_digest_array[i] = H[i];
}
return true;
}
/*
* Input
*
* Description:
* This function accepts an array of octets as the next portion of
* the message.
*
* Parameters:
* message_array: [in]
* An array of characters representing the next portion of the
* message.
*
* Returns:
* Nothing.
*
* Comments:
*
*/
void SHA1::Input(const unsigned char* message_array, unsigned length)
{
if (!length)
{
return;
}
if (Computed || Corrupted)
{
Corrupted = true;
return;
}
while (length-- && !Corrupted)
{
Message_Block[Message_Block_Index++] = (*message_array & 0xFF);
Length_Low += 8;
Length_Low &= 0xFFFFFFFF; // Force it to 32 bits
if (Length_Low == 0)
{
Length_High++;
Length_High &= 0xFFFFFFFF; // Force it to 32 bits
if (Length_High == 0)
{
Corrupted = true; // Message is too long
}
}
if (Message_Block_Index == 64)
{
ProcessMessageBlock();
}
message_array++;
}
}
/*
* Input
*
* Description:
* This function accepts an array of octets as the next portion of
* the message.
*
* Parameters:
* message_array: [in]
* An array of characters representing the next portion of the
* message.
* length: [in]
* The length of the message_array
*
* Returns:
* Nothing.
*
* Comments:
*
*/
void SHA1::Input(const char* message_array, unsigned length)
{
Input((unsigned char*)message_array, length);
}
/*
* Input
*
* Description:
* This function accepts a single octets as the next message element.
*
* Parameters:
* message_element: [in]
* The next octet in the message.
*
* Returns:
* Nothing.
*
* Comments:
*
*/
void SHA1::Input(unsigned char message_element)
{
Input(&message_element, 1);
}
/*
* Input
*
* Description:
* This function accepts a single octet as the next message element.
*
* Parameters:
* message_element: [in]
* The next octet in the message.
*
* Returns:
* Nothing.
*
* Comments:
*
*/
void SHA1::Input(char message_element)
{
Input((unsigned char*)&message_element, 1);
}
/*
* operator<<
*
* Description:
* This operator makes it convenient to provide character strings to
* the SHA1 object for processing.
*
* Parameters:
* message_array: [in]
* The character array to take as input.
*
* Returns:
* A reference to the SHA1 object.
*
* Comments:
* Each character is assumed to hold 8 bits of information.
*
*/
SHA1& SHA1::operator<<(const char* message_array)
{
const char* p = message_array;
while (*p)
{
Input(*p);
p++;
}
return *this;
}
/*
* operator<<
*
* Description:
* This operator makes it convenient to provide character strings to
* the SHA1 object for processing.
*
* Parameters:
* message_array: [in]
* The character array to take as input.
*
* Returns:
* A reference to the SHA1 object.
*
* Comments:
* Each character is assumed to hold 8 bits of information.
*
*/
SHA1& SHA1::operator<<(const unsigned char* message_array)
{
const unsigned char* p = message_array;
while (*p)
{
Input(*p);
p++;
}
return *this;
}
/*
* operator<<
*
* Description:
* This function provides the next octet in the message.
*
* Parameters:
* message_element: [in]
* The next octet in the message
*
* Returns:
* A reference to the SHA1 object.
*
* Comments:
* The character is assumed to hold 8 bits of information.
*
*/
SHA1& SHA1::operator<<(const char message_element)
{
Input((unsigned char*)&message_element, 1);
return *this;
}
/*
* operator<<
*
* Description:
* This function provides the next octet in the message.
*
* Parameters:
* message_element: [in]
* The next octet in the message
*
* Returns:
* A reference to the SHA1 object.
*
* Comments:
* The character is assumed to hold 8 bits of information.
*
*/
SHA1& SHA1::operator<<(const unsigned char message_element)
{
Input(&message_element, 1);
return *this;
}
/*
* ProcessMessageBlock
*
* Description:
* This function will process the next 512 bits of the message
* stored in the Message_Block array.
*
* Parameters:
* None.
*
* Returns:
* Nothing.
*
* Comments:
* Many of the variable names in this function, especially the single
* character names, were used because those were the names used
* in the publication.
*
*/
void SHA1::ProcessMessageBlock()
{
const unsigned K[] = // Constants defined for SHA-1
{0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6};
int t; // Loop counter
unsigned temp; // Temporary word value
unsigned W[80]; // Word sequence
unsigned A, B, C, D, E; // Word buffers
/*
* Initialize the first 16 words in the array W
*/
for (t = 0; t < 16; t++)
{
W[t] = ((unsigned)Message_Block[t * 4]) << 24;
W[t] |= ((unsigned)Message_Block[t * 4 + 1]) << 16;
W[t] |= ((unsigned)Message_Block[t * 4 + 2]) << 8;
W[t] |= ((unsigned)Message_Block[t * 4 + 3]);
}
for (t = 16; t < 80; t++)
{
W[t] = CircularShift(1, W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]);
}
A = H[0];
B = H[1];
C = H[2];
D = H[3];
E = H[4];
for (t = 0; t < 20; t++)
{
temp = CircularShift(5, A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30, B);
B = A;
A = temp;
}
for (t = 20; t < 40; t++)
{
temp = CircularShift(5, A) + (B ^ C ^ D) + E + W[t] + K[1];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30, B);
B = A;
A = temp;
}
for (t = 40; t < 60; t++)
{
temp = CircularShift(5, A) + ((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30, B);
B = A;
A = temp;
}
for (t = 60; t < 80; t++)
{
temp = CircularShift(5, A) + (B ^ C ^ D) + E + W[t] + K[3];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30, B);
B = A;
A = temp;
}
H[0] = (H[0] + A) & 0xFFFFFFFF;
H[1] = (H[1] + B) & 0xFFFFFFFF;
H[2] = (H[2] + C) & 0xFFFFFFFF;
H[3] = (H[3] + D) & 0xFFFFFFFF;
H[4] = (H[4] + E) & 0xFFFFFFFF;
Message_Block_Index = 0;
}
/*
* PadMessage
*
* Description:
* According to the standard, the message must be padded to an even
* 512 bits. The first padding bit must be a '1'. The last 64 bits
* represent the length of the original message. All bits in between
* should be 0. This function will pad the message according to those
* rules by filling the message_block array accordingly. It will also
* call ProcessMessageBlock() appropriately. When it returns, it
* can be assumed that the message digest has been computed.
*
* Parameters:
* None.
*
* Returns:
* Nothing.
*
* Comments:
*
*/
void SHA1::PadMessage()
{
/*
* Check to see if the current message block is too small to hold
* the initial padding bits and length. If so, we will pad the
* block, process it, and then continue padding into a second block.
*/
if (Message_Block_Index > 55)
{
Message_Block[Message_Block_Index++] = 0x80;
while (Message_Block_Index < 64)
{
Message_Block[Message_Block_Index++] = 0;
}
ProcessMessageBlock();
while (Message_Block_Index < 56)
{
Message_Block[Message_Block_Index++] = 0;
}
}
else
{
Message_Block[Message_Block_Index++] = 0x80;
while (Message_Block_Index < 56)
{
Message_Block[Message_Block_Index++] = 0;
}
}
/*
* Store the message length as the last 8 octets
*/
Message_Block[56] = (Length_High >> 24) & 0xFF;
Message_Block[57] = (Length_High >> 16) & 0xFF;
Message_Block[58] = (Length_High >> 8) & 0xFF;
Message_Block[59] = (Length_High)&0xFF;
Message_Block[60] = (Length_Low >> 24) & 0xFF;
Message_Block[61] = (Length_Low >> 16) & 0xFF;
Message_Block[62] = (Length_Low >> 8) & 0xFF;
Message_Block[63] = (Length_Low)&0xFF;
ProcessMessageBlock();
}
/*
* CircularShift
*
* Description:
* This member function will perform a circular shifting operation.
*
* Parameters:
* bits: [in]
* The number of bits to shift (1-31)
* word: [in]
* The value to shift (assumes a 32-bit integer)
*
* Returns:
* The shifted value.
*
* Comments:
*
*/
unsigned SHA1::CircularShift(int bits, unsigned word)
{
return ((word << bits) & 0xFFFFFFFF) | ((word & 0xFFFFFFFF) >> (32 - bits));
}
/** Definition of class Func_sha */
CalpontSystemCatalog::ColType Func_sha::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return resultType;
}
string Func_sha::getStrVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
CalpontSystemCatalog::ColType&)
{
SHA1 sha;
uint32_t message_digest[5];
// Input is always treated as sring
sha.Reset();
sha << parm[0]->data()->getStrVal(row, isNull).safeString("").c_str();
// can not compute
if (!sha.Result(message_digest))
{
isNull = true;
return "";
}
// result length is always 40+1
char result[41];
snprintf(result, 41, "%08x", message_digest[0]);
snprintf(result + 8, 41 - 8, "%08x", message_digest[1]);
snprintf(result + 16, 41 - 16, "%08x", message_digest[2]);
snprintf(result + 24, 41 - 24, "%08x", message_digest[3]);
snprintf(result + 32, 41 - 32, "%08x", message_digest[4]);
result[40] = 0;
return result;
}
} // namespace funcexp