1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-3514 Add support for S3 to cpimport

cpimport now has the ability to use libmarias3 to read an object from an
S3 bucket instead of a file on local disk.

This also moves libmarias3 to utils/libmarias3.
This commit is contained in:
Andrew Hutchings
2019-09-24 10:31:22 +01:00
parent 0852e316f8
commit 8633859dd4
16 changed files with 511 additions and 131 deletions

View File

@ -2039,6 +2039,106 @@ int BulkLoadBuffer::parseDictSection(ColumnInfo& columnInfo,
return rc;
}
int BulkLoadBuffer::fillFromMemory(
const BulkLoadBuffer& overFlowBufIn,
const char* input, size_t length, size_t *parse_length, RID& totalReadRows,
RID& correctTotalRows, const boost::ptr_vector<ColumnInfo>& columnsInfo,
unsigned int allowedErrCntThisCall )
{
boost::mutex::scoped_lock lock(fSyncUpdatesBLB);
reset();
copyOverflow( overFlowBufIn );
size_t readSize = 0;
// Copy the overflow data from the last buffer, that did not get written
if (fOverflowSize != 0)
{
memcpy( fData, fOverflowBuf, fOverflowSize );
if (fOverflowBuf != NULL)
{
delete [] fOverflowBuf;
fOverflowBuf = NULL;
}
}
readSize = fBufferSize - fOverflowSize;
if (readSize > (length - *parse_length))
{
readSize = length - *parse_length;
}
memcpy(fData + fOverflowSize, input + *parse_length, readSize);
*parse_length += readSize;
bool bEndOfData = false;
if (length == *parse_length)
{
bEndOfData = true;
}
if ( bEndOfData && // @bug 3516: Add '\n' if missing from last record
(fImportDataMode == IMPORT_DATA_TEXT) ) // Only applies to ascii mode
{
if ( (fOverflowSize > 0) | (readSize > 0) )
{
if ( fData[ fOverflowSize + readSize - 1 ] != '\n' )
{
// Should be safe to add byte to fData w/o risk of overflowing,
// since we hit EOF. That should mean fread() did not read all
// the bytes we requested, meaning we have room to add a byte.
fData[ fOverflowSize + readSize ] = '\n';
readSize++;
}
}
}
// Lazy allocation of fToken memory as needed
if (fTokens == 0)
{
resizeTokenArray();
}
if ((readSize > 0) || (fOverflowSize > 0))
{
if (fOverflowBuf != NULL)
{
delete [] fOverflowBuf;
fOverflowBuf = NULL;
}
fReadSize = readSize + fOverflowSize;
fStartRow = correctTotalRows;
fStartRowForLogging = totalReadRows;
if (fImportDataMode == IMPORT_DATA_TEXT)
{
tokenize( columnsInfo, allowedErrCntThisCall );
}
else
{
int rc = tokenizeBinary( columnsInfo, allowedErrCntThisCall,
bEndOfData );
if (rc != NO_ERROR)
return rc;
}
// If we read a full buffer without hitting any new lines, then
// terminate import because row size is greater than read buffer size.
if ((fTotalReadRowsForLog == 0) && (fReadSize == fBufferSize))
{
return ERR_BULK_ROW_FILL_BUFFER;
}
totalReadRows += fTotalReadRowsForLog;
correctTotalRows += fTotalReadRows;
}
return NO_ERROR;
}
//------------------------------------------------------------------------------
// Read the next set of rows from the input import file (for the specified
// table), into "this" BulkLoadBuffer.