You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-11-03 17:13:17 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			264 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			264 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* Copyright (C) 2014 InfiniDB, Inc.
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or
 | 
						|
   modify it under the terms of the GNU General Public License
 | 
						|
   as published by the Free Software Foundation; version 2 of
 | 
						|
   the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
						|
   MA 02110-1301, USA. */
 | 
						|
 | 
						|
/******************************************************************************************
 | 
						|
 * $Id: cpimport.cpp 33 2006-08-24 14:36:17Z wzhou $
 | 
						|
 *
 | 
						|
 ******************************************************************************************/
 | 
						|
#include <iostream>
 | 
						|
#include <we_bulkload.h>
 | 
						|
 | 
						|
#define ENV_BULK_DIR "CP_BULK_DIR"
 | 
						|
#include <string>
 | 
						|
#include <boost/progress.hpp>
 | 
						|
 | 
						|
using namespace std;
 | 
						|
using namespace WriteEngine;
 | 
						|
 | 
						|
#define MAXSTRINGS 100000
 | 
						|
string Lines[MAXSTRINGS];
 | 
						|
typedef std::vector<std::string> LineFldList;
 | 
						|
 | 
						|
const int parseStr(const string& instr, LineFldList fields)
 | 
						|
{
 | 
						|
  typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
 | 
						|
  boost::char_separator<char> sep("|");
 | 
						|
  tokenizer tokens(instr, sep);
 | 
						|
 | 
						|
  for (tokenizer::iterator tok_iter = tokens.begin(); tok_iter != tokens.end(); ++tok_iter)
 | 
						|
  {
 | 
						|
    //        std::cout << "<" << *tok_iter << "> ";
 | 
						|
    fields.push_back(*tok_iter);
 | 
						|
  }
 | 
						|
 | 
						|
  // std::cout << "\n";
 | 
						|
  return EXIT_SUCCESS;
 | 
						|
}
 | 
						|
int strtok_test(const string& instr)
 | 
						|
{
 | 
						|
  char* fragment;
 | 
						|
  char* search = (char*)malloc(instr.length());
 | 
						|
 | 
						|
  memcpy(search, instr.c_str(), instr.length());
 | 
						|
 | 
						|
  fragment = strtok(search, "|");
 | 
						|
 | 
						|
  do
 | 
						|
  {
 | 
						|
    // printf("Token: %s\n", fragment);
 | 
						|
    fragment = strtok(NULL, "|");
 | 
						|
  } while (fragment);
 | 
						|
 | 
						|
  free(search);
 | 
						|
  return EXIT_SUCCESS;
 | 
						|
}
 | 
						|
 | 
						|
int handrolled_test(const string& instr)
 | 
						|
{
 | 
						|
  char* search = (char*)malloc(instr.length());
 | 
						|
  char* pos;
 | 
						|
  int count = 0;
 | 
						|
  int span;
 | 
						|
  string temp;
 | 
						|
  string results[10];
 | 
						|
 | 
						|
  memcpy(search, instr.data(), instr.length());
 | 
						|
 | 
						|
  if (search[0] == '|')
 | 
						|
  {
 | 
						|
    pos = search + 1;
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    pos = search;
 | 
						|
  }
 | 
						|
 | 
						|
  while (pos)
 | 
						|
  {
 | 
						|
    span = strcspn(pos, "|");
 | 
						|
 | 
						|
    if (span)
 | 
						|
    {
 | 
						|
      temp.assign(pos, span);
 | 
						|
      results[count++].assign(temp);
 | 
						|
    }
 | 
						|
 | 
						|
    pos = index(pos + 1, '|');
 | 
						|
 | 
						|
    if (pos)
 | 
						|
    {
 | 
						|
      pos++;
 | 
						|
    }
 | 
						|
  };
 | 
						|
 | 
						|
  free(search);
 | 
						|
 | 
						|
  // printf("\n%i dips", count);
 | 
						|
  return EXIT_SUCCESS;
 | 
						|
}
 | 
						|
 | 
						|
int handrolled_test2(string& instr, string Fields[])
 | 
						|
{
 | 
						|
  char* search = (char*)malloc(instr.length() + 1);
 | 
						|
  char* pos;      // pos is used to step inside the search string
 | 
						|
  int count = 0;  // keeps track of fields found
 | 
						|
  int charspan;
 | 
						|
  int num_bars;
 | 
						|
 | 
						|
  strcpy(search, instr.c_str());
 | 
						|
  pos = search;
 | 
						|
 | 
						|
  if (search[0] == '|')
 | 
						|
  {
 | 
						|
    pos = search + 1;
 | 
						|
    Fields[count++].assign("");  // a leading bar indicates an opening blank
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    pos = search;
 | 
						|
  }
 | 
						|
 | 
						|
  while (pos < search + instr.length() - 1)
 | 
						|
  {
 | 
						|
    charspan = strcspn(pos, "|");
 | 
						|
 | 
						|
    if (charspan)
 | 
						|
    {
 | 
						|
      Fields[count++].assign(pos, charspan);
 | 
						|
      pos += charspan + 1;
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
      Fields[count++].assign("");
 | 
						|
      pos++;
 | 
						|
    }
 | 
						|
 | 
						|
    num_bars = strspn(pos, "|");
 | 
						|
    pos += num_bars;
 | 
						|
 | 
						|
    for (; num_bars > 0; num_bars--)
 | 
						|
    {
 | 
						|
      Fields[count++].assign("");
 | 
						|
    }
 | 
						|
  };
 | 
						|
 | 
						|
  free(search);
 | 
						|
 | 
						|
  return count;
 | 
						|
}
 | 
						|
 | 
						|
int parseToken()
 | 
						|
{
 | 
						|
  return 1;
 | 
						|
}
 | 
						|
 | 
						|
int build_data()
 | 
						|
{
 | 
						|
  int idx;
 | 
						|
 | 
						|
  for (idx = 0; idx < MAXSTRINGS; idx++)
 | 
						|
  {
 | 
						|
    // tpch data files are of the form
 | 
						|
    // item|item|item and the line may end with |
 | 
						|
    // even though this may wrongly suggest a blank value at the end
 | 
						|
    Lines[idx] = "12345|abcdef|banana|banana|";  // 'item item item item'
 | 
						|
  }
 | 
						|
 | 
						|
  // std::cout  << Lines[idx-1] << endl;
 | 
						|
  return 0;
 | 
						|
}
 | 
						|
 | 
						|
int main(int argc, char** argv)
 | 
						|
{
 | 
						|
  string sJobIdStr, sBulkDir = "", sDbDir = "", sFileName, sTmp;
 | 
						|
  int fcount;
 | 
						|
  string Fields[1000];
 | 
						|
  string search;
 | 
						|
  string searches[] = {"",
 | 
						|
                       "|",
 | 
						|
                       "|||||||||||||||",
 | 
						|
                       "12345|abcdef|banana|",
 | 
						|
                       "123456789012345678901234567890",
 | 
						|
                       "|12345678901234567890|12345678901234567890|12345678901234567890|12345678901234567890|"
 | 
						|
                       "12345678901234567890|12345678901234567890|12345678901234567890|12345678901234567890|"
 | 
						|
                       "12345678901234567890|12345678901234567890|12345678901234567890",
 | 
						|
                       "|12345|abcdef|banana|bank123",
 | 
						|
                       "|123456789012345678901234567890",
 | 
						|
                       "12345|abcdef|banana|bank123",
 | 
						|
                       "12345||abcdef||banana|bank",
 | 
						|
                       "|12345||abcdef|banana|bank",
 | 
						|
                       "|12345|abcdef|banana|bank|",
 | 
						|
                       "|12345|abcdef|banana||",
 | 
						|
                       "|12345|abcdef|banana|||"};
 | 
						|
  // 14 elements
 | 
						|
  printf("\nAccuracy:");
 | 
						|
 | 
						|
  for (int test = 0; test < 14; test++)
 | 
						|
  {
 | 
						|
    printf("\n\nSearch string %i: %s", test, searches[test].c_str());
 | 
						|
    fcount = handrolled_test2(searches[test], Fields);
 | 
						|
 | 
						|
    for (int idx = 0; idx < fcount; idx++)
 | 
						|
    {
 | 
						|
      printf("\nString %i: %s$", idx, Fields[idx].c_str());
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  printf("\n\nSpeed:\n");
 | 
						|
 | 
						|
  build_data();
 | 
						|
  boost::timer t;
 | 
						|
 | 
						|
  LineFldList parseFields;
 | 
						|
 | 
						|
  for (int idx = 0; idx < MAXSTRINGS; idx++)
 | 
						|
  {
 | 
						|
    parseStr(Lines[idx], parseFields);
 | 
						|
  }
 | 
						|
 | 
						|
  printf("Boost Parse Timer: %lf\n", t.elapsed());
 | 
						|
  t.restart();
 | 
						|
 | 
						|
  for (int idx = 0; idx < MAXSTRINGS; idx++)
 | 
						|
  {
 | 
						|
    strtok_test(Lines[idx]);
 | 
						|
  }
 | 
						|
 | 
						|
  printf("Strtok Timer: %lf\n", t.elapsed());
 | 
						|
 | 
						|
  t.restart();
 | 
						|
 | 
						|
  for (int idx = 0; idx < MAXSTRINGS; idx++)
 | 
						|
  {
 | 
						|
    handrolled_test(Lines[idx]);
 | 
						|
  }
 | 
						|
 | 
						|
  printf("Handrolled Timer: %lf\n", t.elapsed());
 | 
						|
 | 
						|
  t.restart();
 | 
						|
 | 
						|
  for (int idx = 0; idx < MAXSTRINGS; idx++)
 | 
						|
  {
 | 
						|
    fcount = handrolled_test2(Lines[idx], Fields);
 | 
						|
  }
 | 
						|
 | 
						|
  printf("Handrolled2 Timer: %lf\n", t.elapsed());
 | 
						|
 | 
						|
  printf("\n");
 | 
						|
  return 0;
 | 
						|
}
 |