1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-07 03:22:57 +03:00

MCOL-5746 Cpimport: convert blob data from ascii hex when reads from STDIN

This commit is contained in:
Denis Khalikov
2024-05-16 15:17:50 +00:00
committed by Leonid Fedorov
parent 0a118292eb
commit 180e1f793f
3 changed files with 27 additions and 19 deletions

View File

@@ -1661,7 +1661,8 @@ int ColumnInfo::closeDctnryStore(bool bAbort)
// Update dictionary store file with string column parquet data, and return the assigned // Update dictionary store file with string column parquet data, and return the assigned
// tokens (tokenbuf) to be stored in the corresponding column token file. // tokens (tokenbuf) to be stored in the corresponding column token file.
//-------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------
int ColumnInfo::updateDctnryStoreParquet(std::shared_ptr<arrow::Array> columnData, int tokenPos, const int totalRow, char* tokenBuf) int ColumnInfo::updateDctnryStoreParquet(std::shared_ptr<arrow::Array> columnData, int tokenPos,
const int totalRow, char* tokenBuf)
{ {
long long truncCount = 0; long long truncCount = 0;
@@ -1688,7 +1689,6 @@ int ColumnInfo::updateDctnryStoreParquet(std::shared_ptr<arrow::Array> columnDat
} }
incSaturatedCnt(truncCount); incSaturatedCnt(truncCount);
return NO_ERROR; return NO_ERROR;
} }
@@ -1707,7 +1707,7 @@ int ColumnInfo::updateDctnryStore(char* buf, ColPosPair** pos, const int totalRo
// column. // column.
// This only applies to default text mode. This step is bypassed for // This only applies to default text mode. This step is bypassed for
// binary imports, because in that case, the data is already true binary. // binary imports, because in that case, the data is already true binary.
if (((curCol.colType == WR_VARBINARY) || (curCol.colType == WR_BLOB)) && if (((curCol.colType == WR_VARBINARY) || (curCol.colType == WR_BLOB && fpTableInfo->readFromSTDIN())) &&
(fpTableInfo->getImportDataMode() == IMPORT_DATA_TEXT)) (fpTableInfo->getImportDataMode() == IMPORT_DATA_TEXT))
{ {
#ifdef PROFILE #ifdef PROFILE

View File

@@ -2463,5 +2463,10 @@ int TableInfo::allocateBRMColumnExtent(OID columnOID, uint16_t dbRoot, uint32_t&
return rc; return rc;
} }
bool TableInfo::readFromSTDIN()
{
return fReadFromStdin;
}
} // namespace WriteEngine } // namespace WriteEngine
// end of namespace // end of namespace

View File

@@ -80,7 +80,7 @@ class TableInfo : public WeUIDGID
FILE* fHandle; // Handle to the input load file FILE* fHandle; // Handle to the input load file
int fCurrentReadBuffer; // Id of current buffer being popu- int fCurrentReadBuffer; // Id of current buffer being popu-
// lated by the read thread // lated by the read thread
RID fTotalReadRows; // Total number of rows read RID fTotalReadRows; // Total number of rows read
unsigned fTotalErrRows; // Total error rows among all input unsigned fTotalErrRows; // Total error rows among all input
// for this table. Is volatile to // for this table. Is volatile to
// insure parser & reader threads // insure parser & reader threads
@@ -174,24 +174,25 @@ class TableInfo : public WeUIDGID
boost::uuids::uuid fJobUUID; // Job UUID boost::uuids::uuid fJobUUID; // Job UUID
std::vector<BRM::LBID_t> fDictFlushBlks; // dict blks to be flushed from cache std::vector<BRM::LBID_t> fDictFlushBlks; // dict blks to be flushed from cache
std::shared_ptr<arrow::RecordBatchReader> fParquetReader; // Batch reader to read batches of data std::shared_ptr<arrow::RecordBatchReader> fParquetReader; // Batch reader to read batches of data
std::unique_ptr<parquet::arrow::FileReader> fReader; // Reader to read parquet file std::unique_ptr<parquet::arrow::FileReader> fReader; // Reader to read parquet file
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
// Private Functions // Private Functions
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
int changeTableLockState(); // Change state of table lock to cleanup int changeTableLockState(); // Change state of table lock to cleanup
void closeTableFile(); // Close current tbl file; free buffer void closeTableFile(); // Close current tbl file; free buffer
void closeOpenDbFiles(); // Close DB files left open at job's end void closeOpenDbFiles(); // Close DB files left open at job's end
int confirmDBFileChanges(); // Confirm DB file changes (on HDFS) int confirmDBFileChanges(); // Confirm DB file changes (on HDFS)
void deleteTempDBFileChanges(); // Delete DB temp swap files (on HDFS) void deleteTempDBFileChanges(); // Delete DB temp swap files (on HDFS)
int finishBRM(); // Finish reporting updates for BRM int finishBRM(); // Finish reporting updates for BRM
void freeProcessingBuffers(); // Free up Processing Buffers void freeProcessingBuffers(); // Free up Processing Buffers
bool isBufferAvailable(bool report); // Is tbl buffer available for reading bool isBufferAvailable(bool report); // Is tbl buffer available for reading
int openTableFileParquet(int64_t &totalRowsParquet); // Open parquet data file and set batch reader for each buffer int openTableFileParquet(
int openTableFile(); // Open data file and set the buffer int64_t& totalRowsParquet); // Open parquet data file and set batch reader for each buffer
void reportTotals(double elapsedSec); // Report summary totals int openTableFile(); // Open data file and set the buffer
void sleepMS(long int ms); // Sleep method void reportTotals(double elapsedSec); // Report summary totals
void sleepMS(long int ms); // Sleep method
// Compare column HWM with the examplar HWM. // Compare column HWM with the examplar HWM.
int compareHWMs(const int smallestColumnId, const int widerColumnId, const uint32_t smallerColumnWidth, int compareHWMs(const int smallestColumnId, const int widerColumnId, const uint32_t smallerColumnWidth,
const uint32_t widerColumnWidth, const std::vector<DBRootExtentInfo>& segFileInfo, const uint32_t widerColumnWidth, const std::vector<DBRootExtentInfo>& segFileInfo,
@@ -465,6 +466,8 @@ class TableInfo : public WeUIDGID
void setJobUUID(const boost::uuids::uuid& jobUUID); void setJobUUID(const boost::uuids::uuid& jobUUID);
bool readFromSTDIN();
public: public:
friend class BulkLoad; friend class BulkLoad;
friend class ColumnInfo; friend class ColumnInfo;