1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
2023-03-02 15:59:42 +00:00

259 lines
6.0 KiB
C++

#include <iostream>
#include <inttypes.h>
#include <string>
#include <cstring>
#include <cstdlib>
//#define NDEBUG
#include <cassert>
using namespace std;
#include <boost/tokenizer.hpp>
#include <boost/static_assert.hpp>
using namespace boost;
// 1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular
// courts above the|
#pragma pack(1)
struct lineitem_image
{
int32_t l_orderkey;
int32_t l_partkey;
int32_t l_suppkey; // 12
int64_t l_linenumber;
int64_t l_quantity;
int64_t l_extendedprice;
int64_t l_discount;
int64_t l_tax; // 40
char l_returnflag;
char l_linestatus; // 2
int32_t l_shipdate;
int32_t l_commitdate;
int32_t l_receiptdate; // 12
char l_shipinstruct[25];
char l_shipmode[10];
char l_comment[44]; // 79
};
BOOST_STATIC_ASSERT(sizeof(struct lineitem_image) == 145);
// version of lineitem_image that treats some integer columns as unsigned ints.
// Decimal, date, and date/time columns are still treated as integer.
struct unsigned_lineitem_image
{
uint32_t l_orderkey;
uint32_t l_partkey;
uint32_t l_suppkey; // 12
uint64_t l_linenumber;
int64_t l_quantity;
int64_t l_extendedprice;
int64_t l_discount;
int64_t l_tax; // 40
char l_returnflag;
char l_linestatus; // 2
int32_t l_shipdate;
int32_t l_commitdate;
int32_t l_receiptdate; // 12
char l_shipinstruct[25];
char l_shipmode[10];
char l_comment[44]; // 79
};
BOOST_STATIC_ASSERT(sizeof(struct unsigned_lineitem_image) == 145);
struct Date
{
unsigned spare : 6;
unsigned day : 6;
unsigned month : 4;
unsigned year : 16;
};
BOOST_STATIC_ASSERT(sizeof(struct Date) == 4);
union date_image
{
struct Date d;
int32_t i;
};
namespace
{
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
int64_t idbbigdec2(const string& str)
{
int64_t val = 0;
string::size_type idx = string::npos;
string tmp(str);
idx = tmp.find('.');
if (idx != string::npos)
tmp.erase(idx, 1);
else
tmp.append("00");
val = atoll(tmp.c_str());
return val;
}
int32_t idbdate(const string& str)
{
date_image di;
di.i = 0;
boost::char_separator<char> sep("-");
tokenizer tokens(str, sep);
tokenizer::iterator tok_iter = tokens.begin();
// Note that the spare bits must be set, instead of leaving them
// initialized to 0; to be compatible with internal date format.
if (distance(tok_iter, tokens.end()) == 3)
{
di.d.spare = 0x3E;
di.d.year = atoi(tok_iter->c_str());
++tok_iter;
di.d.month = atoi(tok_iter->c_str());
++tok_iter;
di.d.day = atoi(tok_iter->c_str());
++tok_iter;
}
return di.i;
}
int parseinto(lineitem_image& img, const string& buf)
{
memset(&img, 0, sizeof(img));
boost::char_separator<char> sep("|");
tokenizer tokens(buf, sep);
tokenizer::iterator tok_iter = tokens.begin();
if (distance(tok_iter, tokens.end()) < 16)
return -1;
img.l_orderkey = atoi(tok_iter->c_str());
++tok_iter;
img.l_partkey = atoi(tok_iter->c_str());
++tok_iter;
img.l_suppkey = atoi(tok_iter->c_str());
++tok_iter;
img.l_linenumber = atoll(tok_iter->c_str());
++tok_iter;
img.l_quantity = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_extendedprice = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_discount = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_tax = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_returnflag = tok_iter->at(0);
++tok_iter;
img.l_linestatus = tok_iter->at(0);
++tok_iter;
img.l_shipdate = idbdate(tok_iter->c_str());
++tok_iter;
img.l_commitdate = idbdate(tok_iter->c_str());
++tok_iter;
img.l_receiptdate = idbdate(tok_iter->c_str());
++tok_iter;
memcpy(&img.l_shipinstruct[0], tok_iter->c_str(), tok_iter->size());
++tok_iter;
memcpy(&img.l_shipmode[0], tok_iter->c_str(), tok_iter->size());
++tok_iter;
memcpy(&img.l_comment[0], tok_iter->c_str(), tok_iter->size());
++tok_iter;
return 0;
}
int unsigned_parseinto(unsigned_lineitem_image& img, const string& buf)
{
memset(&img, 0, sizeof(img));
boost::char_separator<char> sep("|");
tokenizer tokens(buf, sep);
tokenizer::iterator tok_iter = tokens.begin();
if (distance(tok_iter, tokens.end()) < 16)
return -1;
img.l_orderkey = strtoul(tok_iter->c_str(), 0, 10);
++tok_iter;
img.l_partkey = strtoul(tok_iter->c_str(), 0, 10);
++tok_iter;
img.l_suppkey = strtoul(tok_iter->c_str(), 0, 10);
++tok_iter;
img.l_linenumber = strtoull(tok_iter->c_str(), 0, 10);
++tok_iter;
img.l_quantity = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_extendedprice = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_discount = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_tax = idbbigdec2(tok_iter->c_str());
++tok_iter;
img.l_returnflag = tok_iter->at(0);
++tok_iter;
img.l_linestatus = tok_iter->at(0);
++tok_iter;
img.l_shipdate = idbdate(tok_iter->c_str());
++tok_iter;
img.l_commitdate = idbdate(tok_iter->c_str());
++tok_iter;
img.l_receiptdate = idbdate(tok_iter->c_str());
++tok_iter;
memcpy(&img.l_shipinstruct[0], tok_iter->c_str(), tok_iter->size());
++tok_iter;
memcpy(&img.l_shipmode[0], tok_iter->c_str(), tok_iter->size());
++tok_iter;
memcpy(&img.l_comment[0], tok_iter->c_str(), tok_iter->size());
++tok_iter;
return 0;
}
} // namespace
int main(int argc, char** argv)
{
if ((argc > 1) && (strcmp(argv[1], "-h") == 0))
{
std::cerr << "li2bin [-u]" << std::endl;
std::cerr << " -u Create first 4 fields as unsigned integers" << std::endl;
return 0;
}
string input;
getline(cin, input);
if ((argc > 1) && (strcmp(argv[1], "-u") == 0))
{
unsigned_lineitem_image i;
while (!cin.eof())
{
if (unsigned_parseinto(i, input) == 0)
cout.write(reinterpret_cast<const char*>(&i), sizeof(i));
getline(cin, input);
}
}
else
{
lineitem_image i;
while (!cin.eof())
{
if (parseinto(i, input) == 0)
cout.write(reinterpret_cast<const char*>(&i), sizeof(i));
getline(cin, input);
}
}
return 0;
}