1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
2022-01-21 16:43:49 +00:00

264 lines
5.9 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/******************************************************************************************
* $Id: cpimport.cpp 33 2006-08-24 14:36:17Z wzhou $
*
******************************************************************************************/
#include <iostream>
#include <we_bulkload.h>
#define ENV_BULK_DIR "CP_BULK_DIR"
#include <string>
#include <boost/progress.hpp>
using namespace std;
using namespace WriteEngine;
#define MAXSTRINGS 100000
string Lines[MAXSTRINGS];
typedef std::vector<std::string> LineFldList;
const int parseStr(const string& instr, LineFldList fields)
{
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
boost::char_separator<char> sep("|");
tokenizer tokens(instr, sep);
for (tokenizer::iterator tok_iter = tokens.begin(); tok_iter != tokens.end(); ++tok_iter)
{
// std::cout << "<" << *tok_iter << "> ";
fields.push_back(*tok_iter);
}
// std::cout << "\n";
return EXIT_SUCCESS;
}
int strtok_test(const string& instr)
{
char* fragment;
char* search = (char*)malloc(instr.length());
memcpy(search, instr.c_str(), instr.length());
fragment = strtok(search, "|");
do
{
// printf("Token: %s\n", fragment);
fragment = strtok(NULL, "|");
} while (fragment);
free(search);
return EXIT_SUCCESS;
}
int handrolled_test(const string& instr)
{
char* search = (char*)malloc(instr.length());
char* pos;
int count = 0;
int span;
string temp;
string results[10];
memcpy(search, instr.data(), instr.length());
if (search[0] == '|')
{
pos = search + 1;
}
else
{
pos = search;
}
while (pos)
{
span = strcspn(pos, "|");
if (span)
{
temp.assign(pos, span);
results[count++].assign(temp);
}
pos = index(pos + 1, '|');
if (pos)
{
pos++;
}
};
free(search);
// printf("\n%i dips", count);
return EXIT_SUCCESS;
}
int handrolled_test2(string& instr, string Fields[])
{
char* search = (char*)malloc(instr.length() + 1);
char* pos; // pos is used to step inside the search string
int count = 0; // keeps track of fields found
int charspan;
int num_bars;
strcpy(search, instr.c_str());
pos = search;
if (search[0] == '|')
{
pos = search + 1;
Fields[count++].assign(""); // a leading bar indicates an opening blank
}
else
{
pos = search;
}
while (pos < search + instr.length() - 1)
{
charspan = strcspn(pos, "|");
if (charspan)
{
Fields[count++].assign(pos, charspan);
pos += charspan + 1;
}
else
{
Fields[count++].assign("");
pos++;
}
num_bars = strspn(pos, "|");
pos += num_bars;
for (; num_bars > 0; num_bars--)
{
Fields[count++].assign("");
}
};
free(search);
return count;
}
int parseToken()
{
return 1;
}
int build_data()
{
int idx;
for (idx = 0; idx < MAXSTRINGS; idx++)
{
// tpch data files are of the form
// item|item|item and the line may end with |
// even though this may wrongly suggest a blank value at the end
Lines[idx] = "12345|abcdef|banana|banana|"; // 'item item item item'
}
// std::cout << Lines[idx-1] << endl;
return 0;
}
int main(int argc, char** argv)
{
string sJobIdStr, sBulkDir = "", sDbDir = "", sFileName, sTmp;
int fcount;
string Fields[1000];
string search;
string searches[] = {"",
"|",
"|||||||||||||||",
"12345|abcdef|banana|",
"123456789012345678901234567890",
"|12345678901234567890|12345678901234567890|12345678901234567890|12345678901234567890|"
"12345678901234567890|12345678901234567890|12345678901234567890|12345678901234567890|"
"12345678901234567890|12345678901234567890|12345678901234567890",
"|12345|abcdef|banana|bank123",
"|123456789012345678901234567890",
"12345|abcdef|banana|bank123",
"12345||abcdef||banana|bank",
"|12345||abcdef|banana|bank",
"|12345|abcdef|banana|bank|",
"|12345|abcdef|banana||",
"|12345|abcdef|banana|||"};
// 14 elements
printf("\nAccuracy:");
for (int test = 0; test < 14; test++)
{
printf("\n\nSearch string %i: %s", test, searches[test].c_str());
fcount = handrolled_test2(searches[test], Fields);
for (int idx = 0; idx < fcount; idx++)
{
printf("\nString %i: %s$", idx, Fields[idx].c_str());
}
}
printf("\n\nSpeed:\n");
build_data();
boost::timer t;
LineFldList parseFields;
for (int idx = 0; idx < MAXSTRINGS; idx++)
{
parseStr(Lines[idx], parseFields);
}
printf("Boost Parse Timer: %lf\n", t.elapsed());
t.restart();
for (int idx = 0; idx < MAXSTRINGS; idx++)
{
strtok_test(Lines[idx]);
}
printf("Strtok Timer: %lf\n", t.elapsed());
t.restart();
for (int idx = 0; idx < MAXSTRINGS; idx++)
{
handrolled_test(Lines[idx]);
}
printf("Handrolled Timer: %lf\n", t.elapsed());
t.restart();
for (int idx = 0; idx < MAXSTRINGS; idx++)
{
fcount = handrolled_test2(Lines[idx], Fields);
}
printf("Handrolled2 Timer: %lf\n", t.elapsed());
printf("\n");
return 0;
}