You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-5505 add parquet support for cpimport and add mcs_parquet_ddl and mcs_parquet_gen tools
This commit is contained in:
@ -30,7 +30,7 @@
|
||||
#include "we_columninfo.h"
|
||||
#include "calpontsystemcatalog.h"
|
||||
#include "dataconvert.h"
|
||||
|
||||
#include <arrow/api.h>
|
||||
namespace WriteEngine
|
||||
{
|
||||
class Log;
|
||||
@ -84,6 +84,9 @@ class BulkLoadBuffer
|
||||
char* fOverflowBuf; // Overflow data held for next buffer
|
||||
unsigned fOverflowSize; // Current size of fOverflowBuf
|
||||
|
||||
std::shared_ptr<arrow::RecordBatch> fParquetBatch; // Batch of parquet file to be parsed
|
||||
std::shared_ptr<arrow::RecordBatch> fParquetBatchParser; // for temporary use by parser
|
||||
std::shared_ptr<::arrow::RecordBatchReader> fParquetReader; // Reader for read batches of parquet data
|
||||
// Information about the locker and status for each column in this buffer.
|
||||
// Note that TableInfo::fSyncUpdatesTI mutex is used to synchronize
|
||||
// access to fColumnLocks and fParseComplete from both read and parse
|
||||
@ -174,6 +177,19 @@ class BulkLoadBuffer
|
||||
void convert(char* field, int fieldLength, bool nullFlag, unsigned char* output, const JobColumn& column,
|
||||
BLBufferStats& bufStats);
|
||||
|
||||
/** @brief Parse a batch of parquet data in read buffer for a nonDictionary column
|
||||
*/
|
||||
int parseColParquet(ColumnInfo& columnInfo);
|
||||
|
||||
/** @brief Convert batch parquet data depending upon the data type
|
||||
*/
|
||||
void convertParquet(std::shared_ptr<arrow::Array> columnData, unsigned char* buf, const JobColumn& column,
|
||||
BLBufferStats& bufStats, RID& lastInputRowInExtent, ColumnInfo& columnInfo,
|
||||
bool& updateCPInfoPendingFlag, ColumnBufferSection* section);
|
||||
|
||||
|
||||
inline void updateCPMinMax(ColumnInfo& columnInfo, RID& lastInputRowInExtent, BLBufferStats& bufStats,
|
||||
bool& updateCPInfoPendingFlag, ColumnBufferSection* section, uint32_t curRow);
|
||||
/** @brief Copy the overflow data
|
||||
*/
|
||||
void copyOverflow(const BulkLoadBuffer& buffer);
|
||||
@ -263,6 +279,11 @@ class BulkLoadBuffer
|
||||
fStatusBLB = status;
|
||||
}
|
||||
|
||||
void setParquetReader(std::shared_ptr<::arrow::RecordBatchReader> reader)
|
||||
{
|
||||
fParquetReader = reader;
|
||||
}
|
||||
|
||||
/** @brief Try to lock a column for the buffer
|
||||
* TableInfo::fSyncUpdatesTI mutex should be locked when calling this
|
||||
* function (see fColumnLocks discussion).
|
||||
@ -273,6 +294,10 @@ class BulkLoadBuffer
|
||||
size_t* parse_length, RID& totalReadRows, RID& correctTotalRows,
|
||||
const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall);
|
||||
|
||||
/** @brief Read the batch data into the buffer
|
||||
*/
|
||||
int fillFromFileParquet(RID& totalReadRows, RID& correctTotalRows);
|
||||
|
||||
/** @brief Read the table data into the buffer
|
||||
*/
|
||||
int fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, RID& totalRows, RID& correctTotalRows,
|
||||
|
Reference in New Issue
Block a user