1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
2025-02-21 20:02:38 +04:00

1198 lines
34 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/******************************************************************************
* $Id: largedatalist.h 9655 2013-06-25 23:08:13Z xlou $
*
*****************************************************************************/
/** @file
* class XXX interface
*/
#pragma once
#include <iostream>
#include <fstream>
#include <vector>
#include <stdexcept>
#include <sstream>
#include <boost/thread.hpp>
#include <boost/thread/condition.hpp>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <boost/date_time/posix_time/posix_time.hpp>
#include "datalistimpl.h"
#include "configcpp.h"
#include "elementcompression.h"
#include "resourcemanager.h"
#include "exceptclasses.h"
#ifndef O_BINARY
#define O_BINARY 0
#endif
#ifndef O_DIRECT
#define O_DIRECT 0
#endif
#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
#ifndef O_NOATIME
#define O_NOATIME 0
#endif
namespace joblist
{
extern const std::string defaultTempDiskPath; // defined in resourcemanager.cpp
const uint32_t defaultMaxElements = 0x2000000; // 32M
// Enumeration used to specify element type compression mode for elements
// saved to temporary disk.
enum CompressionModeType
{
COMPRESS_NO_COMPRESS = 1, // no compression of RID or data value
COMPRESS_TO_64_32 = 2, // compress to 64 bit RID, 32 bit value
COMPRESS_TO_32_64 = 3, // compress to 32 bit RID, 64 bit value
COMPRESS_TO_32_32 = 4, // compress to 32 bit RID, 32 bit value
COMPRESS_TO_32 = 5, // compress RID only to 32 bit RID only
COMPRESS_TO_32_STR = 6 // compress to 32 bit RID for a StringElementType
};
#ifndef _DISKIOINFO__
#define _DISKIOINFO__
struct DiskIoInfo
{
boost::posix_time::ptime fStart;
boost::posix_time::ptime fEnd;
uint64_t fBytes;
bool fWrite;
// c: byte count; b: is write operation?
explicit DiskIoInfo(bool b = false) : fBytes(0), fWrite(b)
{
}
};
#endif //_DISKIOINFO__
/* element_t has to implement ostream & operator<<(). */
/* This is an abstract class, but doesn't look like it. */
/** @brief class LargeDataList
*
*/
template <typename container_t, typename element_t>
class LargeDataList : public DataListImpl<container_t, element_t>
{
typedef DataListImpl<container_t, element_t> base;
public:
LargeDataList(uint32_t numConsumers, uint32_t elementSaveSize1, uint32_t elementSaveSize2,
const ResourceManager* rm);
virtual ~LargeDataList();
virtual void endOfInput();
virtual void insert(const element_t& e);
virtual uint64_t totalSize();
virtual void setMultipleProducers(bool);
virtual uint64_t numberOfTempFiles() const;
virtual void setDiskElemSize(uint32_t size1st, uint32_t size2nd);
virtual void setReuseInfo(SetRestoreInfo* info, const std::string filename, bool readonly);
virtual void restoreSetForReuse(const struct SetRestoreInfo& info);
virtual void traceOn(bool b)
{
fTraceOn = b;
}
std::list<DiskIoInfo>& diskIoList()
{
return fDiskIoList;
}
protected:
// @bug 721. add append option
virtual void load(uint64_t setNumber, bool append = false);
/* Warning!! The container must have enough memory reserved, it must be
contiguous, and element_t must be inline data!! */
// @bug 721. add append option
virtual void load_contiguous(uint64_t setNumber, bool append = false);
// these save methods return the number of bytes written by the save
// operation
virtual uint64_t save();
virtual uint64_t save(container_t* c);
virtual uint64_t save_contiguous();
virtual uint64_t save_contiguous(container_t* c);
virtual void registerNewSet();
virtual std::string getFilename();
virtual bool next(uint64_t it, element_t* e);
virtual bool next_nowait(uint64_t it, element_t* e);
virtual void waitForConsumePhase();
virtual void resetIterators();
virtual void resetIterators_nowait();
void removeFile();
bool saveForReuse()
{
return fSaveForReuse;
}
std::string path;
uint64_t loadedSet, setCount;
int64_t phase; // 0 = produce phase, 1 = consume phase
uint64_t totSize;
bool multipleProducers;
bool fTraceOn;
std::list<DiskIoInfo> fDiskIoList;
private:
// Declare but don't define default and copy constuctor, and assignment
// operator to disable their use.
explicit LargeDataList();
LargeDataList(const LargeDataList<container_t, element_t>&);
LargeDataList& operator=(const LargeDataList<container_t, element_t>&);
void createTempFile();
void save_contiguousCompressed(container_t* c);
void save_noncontiguousCompressed(container_t* c);
template <typename saveElement_t>
void writeContiguousCompressed(container_t* c);
void load_contiguousCompressed(bool append, uint64_t count);
template <typename saveElement_t>
void readContiguousCompressed(uint64_t count, element_t* pElementData);
void setCompressionMode(); // set current compression mode
void saveRestoreInfo();
boost::condition consumePhase; // consumers block here until endOfInput()
uint64_t filenameCounter;
std::string fFilename; // LDL file name
std::vector<std::fstream::pos_type> fSetStartPositions; // file offsets
std::fstream fFile; // stream used to store the LDL file
uint64_t fLoadedSetCount; // number of sets that've been loaded
CompressionModeType fCompressMode; // compression used when saved to disk
bool fReUse; // flag for reuse
bool fSaveForReuse; // flag to save the restore infomation
SetRestoreInfo* fRestoreInfo; // point to the restore data in control
};
template <typename container_t, typename element_t>
LargeDataList<container_t, element_t>::LargeDataList(uint32_t nc, uint32_t elementSaveSize1st,
uint32_t elementSaveSize2nd, const ResourceManager* rm)
: base(nc)
, path(rm.getScTempDiskPath())
, fTraceOn(false)
, fReUse(false)
, fSaveForReuse(false)
, fRestoreInfo(nullptr)
{
loadedSet = 0;
setCount = 1;
filenameCounter = 1;
phase = 0;
totSize = 0;
multipleProducers = false;
fLoadedSetCount = 0;
setDiskElemSize(elementSaveSize1st, elementSaveSize2nd);
}
template <typename container_t, typename element_t>
LargeDataList<container_t, element_t>::~LargeDataList()
{
std::vector<std::string>::iterator it;
// pthread_cond_destroy(&consumePhase);
removeFile();
}
template <typename container_t, typename element_t>
inline void LargeDataList<container_t, element_t>::removeFile()
{
if (!fFilename.empty() && !fReUse)
{
unlink(fFilename.c_str());
fFilename = "";
}
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::setMultipleProducers(bool b)
{
multipleProducers = b;
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::endOfInput()
{
if (fSaveForReuse == true)
saveRestoreInfo();
base::endOfInput();
phase = 1;
consumePhase.notify_all(); // pthread_cond_broadcast(&consumePhase);
}
template <typename container_t, typename element_t>
std::string LargeDataList<container_t, element_t>::getFilename()
{
std::stringstream o;
o << path << "/LDL-0x" << std::hex << (ptrdiff_t)this << std::dec << "-" << filenameCounter++;
return o.str();
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::setReuseInfo(SetRestoreInfo* info, const std::string filename,
bool readonly)
{
fReUse = true;
fSaveForReuse = !readonly;
fRestoreInfo = info;
fFilename = filename;
std::ios_base::openmode mode = std::ios_base::in | std::ios_base::binary;
// if need to create the file
if (fSaveForReuse)
mode |= std::ios_base::out | std::ios_base::trunc;
fFile.open(filename.c_str(), mode);
if (!(fFile.is_open()))
{
std::string errMsg("Error opening BucketReuse file ");
errMsg += filename;
perror(errMsg.c_str());
throw logging::LargeDataListExcept(errMsg);
}
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::saveRestoreInfo()
{
fRestoreInfo->fSetCount = setCount;
fRestoreInfo->fTotalSize = totSize;
fRestoreInfo->fSetStartPositions = fSetStartPositions;
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::restoreSetForReuse(const struct SetRestoreInfo& info)
{
setCount = info.fSetCount;
totSize = info.fTotalSize;
fSetStartPositions = info.fSetStartPositions;
load(0);
}
//------------------------------------------------------------------------------
// Return the number of temporary files created by this datalist.
// With the current implementation, there will only be 1 file, but this method
// will help protect the application code from knowing this, in case we
// want to change the implementation to sometimes output more than 1 file.
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
uint64_t LargeDataList<container_t, element_t>::numberOfTempFiles() const
{
// uint64_t nFiles = 0;
// if ( !fFilename.empty() )
// nFiles = 1;
//
// return nFiles;
return ((!fSetStartPositions.empty() && !fReUse) ? 1 : 0);
}
//------------------------------------------------------------------------------
// Create and open the temporary LDL file we will be saving data to. With
// the current implementation, we are creating a single file to hold all the
// sets for a LargeDataList.
// exceptions: runtime_error thrown if file creation or file open fails
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::createTempFile()
{
int64_t fd;
/* Is there a good way to do this through an fstream? */
do
{
fFilename = this->getFilename();
fd = open(fFilename.c_str(), O_CREAT | O_RDWR | O_EXCL | O_BINARY, 0666);
} while (fd < 0 && errno == EEXIST);
if (fd < 0)
{
std::ostringstream errmsg;
errmsg << "LargeDataList::createTempFile(): could not save to disk (" << errno << ") - "
<< std::strerror(errno);
std::cerr << errmsg.str() << std::endl;
throw std::runtime_error(errmsg.str());
}
close(fd);
// std::cout << "Creating/opening file: " << fFilename << std::endl;
fFile.open(fFilename.c_str(), std::ios_base::in | std::ios_base::out | std::ios_base::binary);
if (!(fFile.is_open()))
{
std::string errMsg("Error opening temp file ");
errMsg += fFilename;
perror(errMsg.c_str());
throw logging::LargeDataListExcept(errMsg);
}
}
// Need to grab the mutex at a higher level
/*
File format:
int: # of elements
element_t[# of elements] stored according to ostream& element_t::operator<<
*/
template <typename container_t, typename element_t>
uint64_t LargeDataList<container_t, element_t>::save()
{
container_t* c = (base::c);
return save(c);
}
// Need to grab the mutex at a higher level
/*
File format:
int: # of elements
element_t[# of elements] stored according to ostream& element_t::operator<<
*/
template <typename container_t, typename element_t>
uint64_t LargeDataList<container_t, element_t>::save(container_t* c)
{
uint64_t count, nBytesWritten = 0;
typename container_t::iterator it;
#ifdef PROFILE
struct timespec ts1, ts2;
clock_gettime(CLOCK_REALTIME, &ts1);
#endif
/* XXXPAT: catch exceptions in save/load or at the higher level? */
try
{
//...Create the temporary LDL file (if necessary)
if (fFilename.empty())
createTempFile();
// Save our file offset for this set, to be used when we load the set.
std::fstream::pos_type firstByte = fFile.tellp();
fSetStartPositions.push_back(firstByte);
// std::cout << "Saving " << fFilename << "; count-" << c->size() <<
// "; tellp-" << fFile.tellp() << std::endl;
DiskIoInfo info(true);
if (fTraceOn)
info.fStart = boost::posix_time::microsec_clock::local_time();
count = c->size();
fFile.write((char*)&count, sizeof(count));
if (fCompressMode == COMPRESS_NO_COMPRESS)
{
std::copy(c->begin(), c->end(), std::ostream_iterator<element_t>(fFile));
}
else
{
save_noncontiguousCompressed(c);
}
nBytesWritten = fFile.tellp() - firstByte;
if (fTraceOn)
{
info.fEnd = boost::posix_time::microsec_clock::local_time();
info.fBytes = nBytesWritten;
fDiskIoList.push_back(info);
}
}
catch (const std::runtime_error& e)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred saving non-contiguous container into file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg + e.what());
}
catch (...)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred saving non-contiguous container into file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg);
}
#ifdef PROFILE
clock_gettime(CLOCK_REALTIME, &ts2);
/* What should we do with this profile info? */
#endif
return nBytesWritten;
}
template <typename container_t, typename element_t>
uint64_t LargeDataList<container_t, element_t>::save_contiguous()
{
std::vector<element_t>* c = reinterpret_cast<std::vector<element_t>*>(base::c);
return save_contiguous(c);
}
template <typename container_t, typename element_t>
uint64_t LargeDataList<container_t, element_t>::save_contiguous(container_t* c)
{
uint64_t count, nBytesWritten = 0;
typename container_t::iterator it;
#ifdef PROFILE
struct timespec ts1, ts2;
clock_gettime(CLOCK_REALTIME, &ts1);
#endif
/* XXXPAT: catch exceptions in save/load or at the higher level? */
try
{
//...Create the temporary LDL file (if necessary)
if (fFilename.empty())
createTempFile();
// Save our file offset for this set, to be used when we load the set.
std::fstream::pos_type firstByte = fFile.tellp();
fSetStartPositions.push_back(firstByte);
// std::cout << "SavingC " << fFilename << "; count-" << c->size() <<
// "; tellp-" << fFile.tellp() << std::endl;
DiskIoInfo info(true);
if (fTraceOn)
info.fStart = boost::posix_time::microsec_clock::local_time();
count = c->size();
fFile.write((char*)&count, sizeof(count));
// Perform compression of data, as it is saved, "if" applicable
if (fCompressMode == COMPRESS_NO_COMPRESS)
{
fFile.write((char*)(c->begin().operator->()), sizeof(element_t) * count);
}
else
{
save_contiguousCompressed(c);
}
nBytesWritten = fFile.tellp() - firstByte;
if (fTraceOn)
{
info.fEnd = boost::posix_time::microsec_clock::local_time();
info.fBytes = nBytesWritten;
fDiskIoList.push_back(info);
}
}
catch (const std::runtime_error& e)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred saving contiguous container into file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg + e.what());
}
catch (...)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred saving contiguous container into file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg);
}
#ifdef PROFILE
clock_gettime(CLOCK_REALTIME, &ts2);
/* What should we do with this profile info? */
#endif
return nBytesWritten;
}
//------------------------------------------------------------------------------
// Compress and save contiguous data to temp file.
// c - container to be saved
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::save_contiguousCompressed(container_t* c)
{
//...Compress and write out the elements based on the compression mode
switch (fCompressMode)
{
case COMPRESS_TO_64_32:
{
writeContiguousCompressed<CompElement64Rid32Val>(c);
break;
}
case COMPRESS_TO_32_64:
{
writeContiguousCompressed<CompElement32Rid64Val>(c);
break;
}
case COMPRESS_TO_32_32:
{
writeContiguousCompressed<CompElement32Rid32Val>(c);
break;
}
case COMPRESS_TO_32:
{
writeContiguousCompressed<CompElement32RidOnly>(c);
break;
}
default:
{
std::ostringstream errmsg;
errmsg << "save_contiguousCompressed() called "
" without compression mode being set";
std::cerr << errmsg << std::endl;
throw std::logic_error(errmsg.str());
break;
}
}
}
//------------------------------------------------------------------------------
// Template method that compresses a contiguous collection of element_t,
// to a vector of saveElement_t; and then saves the data to temp file.
// c - container to be written to temporary disk file.
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
template <typename saveElement_t>
void LargeDataList<container_t, element_t>::writeContiguousCompressed(container_t* c)
{
uint64_t count = c->size();
std::vector<element_t>* v = reinterpret_cast<std::vector<element_t>*>(c);
//...copy/compress data into vector of saveElement_t
std::vector<saveElement_t> cSave;
cSave.resize(count);
ElementCompression::compress(*v, cSave);
//...write saveElement_t vector to temp file
fFile.write((char*)(cSave.begin().operator->()), (sizeof(saveElement_t) * count));
}
//------------------------------------------------------------------------------
// Compress and save noncontiguous data to temp file.
// c - container to be saved
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::save_noncontiguousCompressed(container_t* c)
{
//...Compress and write out the elements based on the compression mode.
//...The only compression currently supported here is compression of the
//...RID from 64 to 32 bits.
switch (fCompressMode)
{
case COMPRESS_TO_32_64:
case COMPRESS_TO_32:
case COMPRESS_TO_32_STR:
{
typename container_t::const_iterator iter = c->begin();
while (iter != c->end())
{
ElementCompression::writeWith32Rid(*iter, fFile);
++iter;
}
break;
}
default:
{
std::ostringstream errmsg;
errmsg << "save_noncontigousCompressed incorrectly called for "
"compression mode "
<< fCompressMode;
std::cerr << errmsg.str() << std::endl;
throw std::logic_error(errmsg.str());
break;
}
}
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::load(uint64_t setNumber, bool append)
{
uint64_t i, count;
std::vector<element_t>* v;
std::set<element_t>* s;
#ifdef PROFILE
struct timespec ts1, ts2;
clock_gettime(CLOCK_REALTIME, &ts1);
#endif
if (loadedSet == setNumber && phase != 0)
{
resetIterators();
return;
}
/* XXXPAT: How to handle errors here? Specifically, unless the entire load
is successful, things will be left in a relatively undefined state. Do we
have to care about things like that here? Initial guess: no. */
try
{
DiskIoInfo info;
if (fTraceOn)
info.fStart = boost::posix_time::microsec_clock::local_time();
// Position the file to the correct file offset for this set.
fFile.seekg(fSetStartPositions.at(setNumber));
// std::cout << "Loading filename-" << fFilename <<
// "; set-" << setNumber << "; fPos-" <<
// fSetStartPositions.at(setNumber) << std::endl;
std::streampos startPos = fFile.tellg();
fFile.read((char*)&count, sizeof(count));
// std::cout << "really slow load, count=" << count << std::endl;
// Specific logic to handle saving of a std::vector
if (typeid(*base::c) == typeid(std::vector<element_t>))
{
v = reinterpret_cast<std::vector<element_t>*>(base::c);
// @bug 721. merge all saving sets to current loaded set 0 and sort
if (!append)
v->resize(0);
if (count > v->size())
v->reserve(count);
switch (fCompressMode)
{
case COMPRESS_TO_32_64:
case COMPRESS_TO_32:
case COMPRESS_TO_32_STR:
{
element_t e;
for (i = 0; i < count; ++i)
{
ElementCompression::readWith32Rid(e, fFile);
v->push_back(e);
}
break;
}
default:
{
std::istream_iterator<element_t> it(fFile);
for (i = 0; i < count; ++i)
{
v->push_back(*it);
// Increment stream iterator except for last element.
// We don't want to go past the end of this set.
if ((i + 1) < count)
++it; // advance to next element in file
}
break;
}
}
}
// Specific logic to handle saving of a std::set
else if (typeid(*base::c) == typeid(std::set<element_t>))
{
s = reinterpret_cast<std::set<element_t>*>(base::c);
if (!append)
s->clear();
switch (fCompressMode)
{
case COMPRESS_TO_32_64:
case COMPRESS_TO_32:
case COMPRESS_TO_32_STR:
{
element_t e;
for (i = 0; i < count; ++i)
{
ElementCompression::readWith32Rid(e, fFile);
s->insert(e);
}
break;
}
default:
{
std::istream_iterator<element_t> it(fFile);
for (i = 0; i < count; ++i)
{
s->insert(*it);
// Increment stream iterator except for last element.
// We don't want to go past the end of this set.
if ((i + 1) < count)
++it; // advance to next element in file
}
break;
}
}
}
else
{
/* this is a slow fallback. If we need it, we should write
a specialization for whatever container c is */
if (!append)
{
delete base::c;
base::c = new container_t();
}
switch (fCompressMode)
{
case COMPRESS_TO_32_64:
case COMPRESS_TO_32:
case COMPRESS_TO_32_STR:
{
element_t e;
for (i = 0; i < count; ++i)
{
ElementCompression::readWith32Rid(e, fFile);
base::insert(e);
}
break;
}
default:
{
std::istream_iterator<element_t> it(fFile);
for (i = 0; i < count; ++i)
{
// std::cout << "inserting " << loaded << std::endl;
// we might want to use the derived class for inserting
// instead but we will want to add to the base class
// interface to support reentrancy. Possibly an
// "insert_nolock()" would be sufficient.
base::insert(*it);
// Increment stream iterator except for last element.
// We don't want to go past the end of this set.
if ((i + 1) < count)
++it; // advance to next element in file
}
break;
}
}
}
// std::cout << "... done" << std::endl;
if (fTraceOn)
{
info.fEnd = boost::posix_time::microsec_clock::local_time();
info.fBytes = fFile.tellg() - startPos;
fDiskIoList.push_back(info);
}
}
catch (const std::runtime_error& e)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred loading non-contiguous container from file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg + e.what());
}
catch (...)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred loading non-contiguous container from file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg);
}
resetIterators_nowait();
loadedSet = setNumber;
fLoadedSetCount++;
//...Close the file once all the sets have been loaded. We could compare
//...setNumber to see if it is the last set, but we make no assumptions
//...about the order in which the sets are loaded; so we instead track
//...the number of sets loaded, and use that to know when we are done.
if (fLoadedSetCount == fSetStartPositions.size())
{
fFile.close();
}
#ifdef PROFILE
clock_gettime(CLOCK_REALTIME, &ts2);
/* What should we do with this profile info? */
#endif
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::load_contiguous(uint64_t setNumber, bool append)
{
uint64_t count;
// char *buf = NULL;
std::vector<element_t>* v;
#ifdef PROFILE
struct timespec ts1, ts2;
clock_gettime(CLOCK_REALTIME, &ts1);
#endif
if (loadedSet == setNumber && phase != 0)
{
resetIterators();
return;
}
/* XXXPAT: How to handle errors here? Specifically, unless the entire load
is successful, things will be left in a relatively undefined state. Do we
have to care about things like that here? Initial guess: no. */
try
{
DiskIoInfo info;
if (fTraceOn)
info.fStart = boost::posix_time::microsec_clock::local_time();
v = reinterpret_cast<std::vector<element_t>*>(base::c);
// Position the file to the correct file offset for this set.
fFile.seekg(fSetStartPositions.at(setNumber));
// std::cout << "LoadingC filename-" << fFilename <<
// "; set-" << setNumber << "; fPos-" <<
// fSetStartPositions.at(setNumber) << std::endl;
std::streampos startPos = fFile.tellg();
fFile.read((char*)&count, sizeof(count));
// Perform expansion of data, as it is loaded, "if" applicable
if (fCompressMode == COMPRESS_NO_COMPRESS)
{
if (append)
{
// @bug 721. append to current set for sorting purpose
uint64_t ctn = base::c->size();
base::c->resize(ctn + count);
fFile.read((char*)((v->begin() + ctn).operator->()), count * sizeof(element_t));
}
else
{
if (count != base::c->size())
base::c->resize(count);
fFile.read((char*)(v->begin().operator->()), count * sizeof(element_t));
}
}
else
{
load_contiguousCompressed(append, count);
}
if (fTraceOn)
{
info.fEnd = boost::posix_time::microsec_clock::local_time();
info.fBytes = fFile.tellg() - startPos;
fDiskIoList.push_back(info);
}
}
catch (const std::runtime_error& e)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred loading contiguous container from file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg + e.what());
}
catch (...)
{
if (fFile.is_open())
fFile.close();
std::string msg("Error occurred loading contiguous container from file " + fFilename + " ");
std::cerr << msg << std::endl;
throw logging::LargeDataListExcept(msg);
}
resetIterators_nowait();
loadedSet = setNumber;
fLoadedSetCount++;
//...Close the file once all the sets have been loaded. We could compare
//...setNumber to see if it is the last set, but we make no assumptions
//...about the order in which the sets are loaded; so we instead track
//...the number of sets loaded, and use that to know when we are done.
if (fLoadedSetCount == fSetStartPositions.size())
{
fFile.close();
}
#ifdef PROFILE
clock_gettime(CLOCK_REALTIME, &ts2);
/* What should we do with this profile info? */
#endif
}
//------------------------------------------------------------------------------
// Load and expand contiguous data from temp file.
// append - flag that indicates whether data is to be appended to container
// count - the number of elements to be read and expanded
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::load_contiguousCompressed(bool append, uint64_t count)
{
std::vector<element_t>* v = reinterpret_cast<std::vector<element_t>*>(base::c);
element_t* pElementData = 0;
if (append)
{
uint64_t currentCount = base::c->size();
base::c->resize(currentCount + count);
pElementData = ((v->begin() + currentCount).operator->());
}
else
{
if (count != base::c->size())
base::c->resize(count);
pElementData = (v->begin().operator->());
}
//...Read in and expand the elements based on the compression mode
switch (fCompressMode)
{
case COMPRESS_TO_64_32:
{
readContiguousCompressed<CompElement64Rid32Val>(count, pElementData);
break;
}
case COMPRESS_TO_32_64:
{
readContiguousCompressed<CompElement32Rid64Val>(count, pElementData);
break;
}
case COMPRESS_TO_32_32:
{
readContiguousCompressed<CompElement32Rid32Val>(count, pElementData);
break;
}
case COMPRESS_TO_32:
{
readContiguousCompressed<CompElement32RidOnly>(count, pElementData);
break;
}
default:
{
std::ostringstream errmsg;
errmsg << "load_contiguousCompressed() called "
" without compression mode being set";
std::cerr << errmsg.str() << std::endl;
throw std::logic_error(errmsg.str());
break;
}
}
}
//------------------------------------------------------------------------------
// Template method that reads a vector of elements of type saveElement_t
// from a temp file, and then expands to a contiguous collection of element_t.
// count - (input) number of elements to be read from temp file
// pElementData - (output) element_t data that has been read and expanded
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
template <typename saveElement_t>
void LargeDataList<container_t, element_t>::readContiguousCompressed(uint64_t count, element_t* pElementData)
{
//...read data from temp file into saveElement_t vector
std::vector<saveElement_t> cLoad;
cLoad.resize(count);
fFile.read((char*)(cLoad.begin().operator->()), (sizeof(saveElement_t) * count));
//...copy/expand data into element_t container
ElementCompression::expand(cLoad, pElementData);
}
template <typename container_t, typename element_t>
inline bool LargeDataList<container_t, element_t>::next(uint64_t it, element_t* e)
{
bool ret;
waitForConsumePhase();
ret = base::next(it, e);
return ret;
}
template <typename container_t, typename element_t>
inline bool LargeDataList<container_t, element_t>::next_nowait(uint64_t it, element_t* e)
{
bool ret;
ret = base::next(it, e);
return ret;
}
template <typename container_t, typename element_t>
inline void LargeDataList<container_t, element_t>::waitForConsumePhase()
{
while (phase == 0)
consumePhase.wait(this->mutex); // pthread_cond_wait(&consumePhase, &(this->mutex));
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::registerNewSet()
{
delete base::c;
base::c = new container_t();
loadedSet++;
setCount++;
}
template <typename container_t, typename element_t>
uint64_t LargeDataList<container_t, element_t>::totalSize()
{
waitForConsumePhase();
return totSize;
}
template <typename container_t, typename element_t>
inline void LargeDataList<container_t, element_t>::insert(const element_t& e)
{
totSize++;
base::insert(e);
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::resetIterators()
{
waitForConsumePhase();
for (int i = 0; i < (int)base::numConsumers; ++i)
base::cIterators[i] = base::c->begin();
}
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::resetIterators_nowait()
{
for (uint64_t i = 0; i < base::numConsumers; ++i)
{
base::cIterators[i] = base::c->begin();
}
}
//------------------------------------------------------------------------------
// Set save element size values stored in our base class, and update the
// compression mode enumeration.
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::setDiskElemSize(uint32_t elementSaveSize1st,
uint32_t elementSaveSize2nd)
{
base::setDiskElemSize(elementSaveSize1st, elementSaveSize2nd);
// update our compression mode enumeration to reflect the save element size
setCompressionMode();
}
//------------------------------------------------------------------------------
// Sets compression mode enumeration based on the current sizes for
// save element size1st (RID) and save element size2nd (value).
//------------------------------------------------------------------------------
template <typename container_t, typename element_t>
void LargeDataList<container_t, element_t>::setCompressionMode()
{
const uint32_t COMPRESS_4BYTE_LENGTH = 4;
fCompressMode = COMPRESS_NO_COMPRESS;
uint32_t size1st = base::getDiskElemSize1st();
if (typeid(element_t) == typeid(RIDElementType))
{
if (size1st == COMPRESS_4BYTE_LENGTH)
{
fCompressMode = COMPRESS_TO_32;
}
}
else if (typeid(element_t) == typeid(StringElementType))
{
if (size1st == COMPRESS_4BYTE_LENGTH)
{
fCompressMode = COMPRESS_TO_32_STR;
}
}
else
{
uint32_t size2nd = base::getDiskElemSize2nd();
if ((size1st == COMPRESS_4BYTE_LENGTH) && (size2nd != COMPRESS_4BYTE_LENGTH))
{
fCompressMode = COMPRESS_TO_32_64;
}
else if ((size1st != COMPRESS_4BYTE_LENGTH) && (size2nd == COMPRESS_4BYTE_LENGTH))
{
fCompressMode = COMPRESS_TO_64_32;
}
else if ((size1st == COMPRESS_4BYTE_LENGTH) && (size2nd == COMPRESS_4BYTE_LENGTH))
{
fCompressMode = COMPRESS_TO_32_32;
}
}
}
} // namespace joblist