1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
Roman Nozdrin 4fe9cd64a3
Revert "No boost condition (#2822)" (#2828)
This reverts commit f916e64927cd81569327014f20c4cc0b8aca40ff.
2023-04-22 15:49:50 +03:00

1617 lines
41 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <fcntl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <boost/scoped_array.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/thread.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include "IDBDataFile.h"
#include "IDBFileSystem.h"
#include "IDBPolicy.h"
using namespace std;
using namespace idbdatafile;
size_t BLK_SIZE = 2048;
const size_t MAX_BLK_SIZE = 1048576;
size_t MAX_BLOCK = 1024;
int timeval_subtract(struct timeval* result, struct timeval* x, struct timeval* y)
{
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec)
{
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000)
{
int nsec = (y->tv_usec - x->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
void usage()
{
cerr << "usage: fophdfs [adpsrh:b:t:cvuen:l:m:] <fname>" << endl;
cerr << " -d use O_DIRECT" << endl;
cerr << " -p flush before read" << endl;
cerr << " -s sync on write" << endl;
cerr << " -r reopen file for read" << endl;
cerr << " -b <BLK_SIZE> block size in bytes" << endl;
cerr << " -d <milliseconds> delay before read" << endl;
cerr << " -c close file after each write" << endl;
cerr << " -v use setvbuf to disable buffering" << endl;
cerr << " -n <num blocks> number of blocks to write/read" << endl;
cerr << " -t <num threads> number of concurrent test threads" << endl;
cerr << " -l <filename> do an HDFS large read-write file test" << endl;
cerr << " -h <plugin-file> load an HDFS plugin (and run HDFS tests)" << endl;
cerr << " -m set max size for HDFS mem buffer, default is unlimited" << endl;
}
struct foptest_opts
{
bool usedirect;
bool preflush;
bool synconwrite;
bool reopen;
bool closeonwrite;
bool usevbuf;
int numthreads;
int numblocks;
IDBDataFile::Types filetype;
string largeFile;
string pluginFile;
bool useHdfs;
unsigned hdfsMaxMem;
foptest_opts()
: usedirect(false)
, preflush(false)
, synconwrite(false)
, reopen(false)
, closeonwrite(false)
, usevbuf(false)
, numthreads(1)
, numblocks(64)
, filetype(IDBDataFile::UNBUFFERED)
, largeFile("")
, pluginFile("")
, useHdfs(false)
, hdfsMaxMem(0)
{
;
}
};
class TestRunner
{
public:
TestRunner(int id, const foptest_opts& opts);
~TestRunner();
bool runTest(IDBDataFile::Types filetype, unsigned open_opts);
bool fillDefault();
bool doBlock(unsigned int blocknum, unsigned char tag, unsigned int count);
bool writeBlock(unsigned int blocknum, unsigned char* buf, unsigned char tag);
bool writeBlocks(unsigned int blocknum, unsigned char* buf, unsigned char tag, unsigned int count);
bool readBlock(unsigned int blocknum, unsigned char* buf, unsigned char tag);
bool readBlocks(unsigned int blocknum, unsigned char* buf, unsigned char tag, unsigned int count);
const foptest_opts& runOpts() const
{
return m_opts;
}
enum LogLevel
{
INFO,
ERROR
};
void logMsg(LogLevel level, const string& msg, bool bold = false);
private:
bool readTest(IDBDataFile::Types filetype);
friend void thread_func2(TestRunner& trun);
bool randomReadTest();
bool openByModeStrTest();
bool writeTest(IDBDataFile::Types filetype);
bool truncateTest(IDBDataFile::Types filetype);
bool renameTest(IDBDataFile::Types filetype);
bool copyTest(IDBDataFile::Types filetype);
bool rdwrTest(IDBDataFile::Types filetype);
bool hdfsRdwrExhaustTest();
bool concurrencyTest(IDBDataFile::Types filetype);
bool tellTest(IDBDataFile::Types filetype);
bool openWF(bool reopen = false);
bool openRF();
bool flushTest(IDBDataFile::Types);
bool seekTest(IDBDataFile::Types);
bool listDirTest(IDBDataFile::Types, const string&);
void reset();
string m_fname;
foptest_opts m_opts;
unsigned char m_defbuf[MAX_BLK_SIZE];
IDBDataFile* m_file;
unsigned m_open_opts;
int m_id;
static boost::mutex m_guard;
};
boost::mutex TestRunner::m_guard;
void thread_func2(TestRunner& trun)
{
trun.randomReadTest();
}
TestRunner::TestRunner(int id, const foptest_opts& opts)
: m_opts(opts), m_file(NULL), m_open_opts(0), m_id(id)
{
for (unsigned i = 0; i < BLK_SIZE; i++)
{
m_defbuf[i] = 0xfe;
}
}
void TestRunner::reset()
{
delete m_file;
m_file = 0;
}
struct TestCleanup
{
TestCleanup(IDBDataFile::Types type, const string& dir, const string& file)
: m_fs(IDBFileSystem::getFs(type)), m_dir(dir), mm_file(file)
{
}
~TestCleanup()
{
assert(m_fs.exists(mm_file.c_str()));
m_fs.remove(mm_file.c_str());
assert(!m_fs.exists(mm_file.c_str()));
assert(m_fs.exists(m_dir.c_str()));
assert(m_fs.isDir(m_dir.c_str()));
m_fs.remove(m_dir.c_str());
assert(!m_fs.exists(m_dir.c_str()));
}
IDBFileSystem& m_fs;
string m_dir;
string mm_file;
};
bool TestRunner::runTest(IDBDataFile::Types filetype, unsigned open_opts)
{
m_open_opts = open_opts;
ostringstream infostr;
string typeString =
(filetype == IDBDataFile::UNBUFFERED ? "Unbuffered"
: (filetype == IDBDataFile::BUFFERED ? "Buffered" : "HDFS"));
infostr << "Running test for file type " << typeString;
logMsg(INFO, infostr.str());
if (filetype != IDBDataFile::UNBUFFERED && filetype != IDBDataFile::BUFFERED)
{
if (!IDBPolicy::installPlugin(m_opts.pluginFile))
{
cout << "ERROR: unable to install HDFS plugin!" << endl;
return -1;
}
}
IDBFileSystem& fs = IDBFileSystem::getFs(filetype);
// build the file name we are going to use
ostringstream oss;
// embed pid so that this is a new directory path
oss << "/tmp/idbdf-dir-" << getpid() << "-" << m_id << "/Calpont/data";
// we need to make sure this directory doesn't already exist
// todo-this only works non-HDFS
string cmd = "rm -rf " + oss.str();
system(cmd.c_str());
string dir = oss.str();
m_fname = dir + "/foobar";
// instantiate this here so that we always clean up files we created no matter
// where we exit the function from
boost::scoped_ptr<TestCleanup> cleanup(new TestCleanup(filetype, dir, m_fname));
// HDFS will automatically create parent directories when opening a file so these
// tests around mkdir are irrelevant.
if (filetype != IDBDataFile::HDFS)
{
// we expect this to fail because the parent directory should not exist
reset();
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "w", m_open_opts);
if (m_file)
{
ostringstream errstr;
errstr << "open for writing of path " << m_fname << " should not succeed!";
logMsg(ERROR, errstr.str());
return false;
}
// now create the path
if (fs.mkdir(dir.c_str()))
{
ostringstream errstr;
errstr << "open mkdir of " << dir << " failed!";
logMsg(ERROR, errstr.str());
return false;
}
}
bool returnval = true;
if (returnval)
{
returnval = writeTest(filetype);
}
// going to check the size two different ways - once through the file and
// once through the file system.
unsigned fsize = m_file->size();
if (fsize != m_opts.numblocks * BLK_SIZE)
{
ostringstream errstr;
errstr << "bad file size from file " << fsize << " != " << m_opts.numblocks * BLK_SIZE << "!";
logMsg(ERROR, errstr.str());
return false;
}
// this deletes and closes the file - required to get accurate file size with
// buffered IO and in HDFS from the file system
reset();
fsize = fs.size(m_fname.c_str());
if (fsize != m_opts.numblocks * BLK_SIZE)
{
ostringstream errstr;
errstr << "bad file size from fs " << fsize << " != " << m_opts.numblocks * BLK_SIZE << "!";
logMsg(ERROR, errstr.str());
return false;
}
if (returnval)
{
returnval = tellTest(filetype);
}
if (returnval)
{
returnval = readTest(filetype);
}
if (returnval && (filetype == IDBDataFile::UNBUFFERED || filetype == IDBDataFile::HDFS))
{
returnval = concurrencyTest(filetype);
}
if (returnval)
{
reset();
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts);
if (!m_file)
{
ostringstream errstr;
errstr << "Unable to open " << m_fname << " for reading";
logMsg(ERROR, errstr.str());
return false;
}
if (m_opts.numthreads > 1)
{
boost::thread_group thread_group;
for (int i = 0; i < m_opts.numthreads; ++i)
{
thread_group.create_thread(boost::bind(thread_func2, boost::ref(*this)));
}
thread_group.join_all();
}
returnval = randomReadTest();
}
if (returnval)
{
returnval = rdwrTest(filetype);
}
if (returnval && filetype == IDBDataFile::BUFFERED)
{
returnval = openByModeStrTest();
}
if (returnval)
{
returnval = truncateTest(filetype);
}
if (returnval)
{
returnval = renameTest(filetype);
}
if (returnval)
{
returnval = copyTest(filetype);
}
if (returnval && filetype == IDBDataFile::HDFS)
{
returnval = hdfsRdwrExhaustTest();
}
if (returnval)
{
returnval = flushTest(filetype);
}
if (returnval)
{
returnval = seekTest(filetype);
}
if (returnval)
{
returnval = listDirTest(filetype, dir);
}
if (returnval)
logMsg(INFO, typeString + " tests passed!", true);
reset();
return returnval;
}
bool TestRunner::readTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "readTest");
reset();
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts);
if (!m_file)
{
ostringstream errstr;
errstr << "Unable to open " << m_fname << " for reading";
logMsg(ERROR, errstr.str());
return false;
}
// check the mtime method.
time_t now = time(0);
time_t mtime = m_file->mtime();
assert((now - mtime) <= 3);
struct timeval starttime, endtime, timediff;
gettimeofday(&starttime, 0x0);
for (int i = 0; i < m_opts.numblocks; ++i)
{
if (!readBlock(i, m_defbuf, i))
return false;
}
gettimeofday(&endtime, 0x0);
timeval_subtract(&timediff, &endtime, &starttime);
float secs = timediff.tv_sec + (timediff.tv_usec * 0.000001);
ostringstream infostr;
infostr << "Read " << m_opts.numblocks * BLK_SIZE << " bytes in " << secs << " secs, ";
infostr << "Throughput = " << setprecision(3) << ((m_opts.numblocks * BLK_SIZE) / 1000000.0) / secs
<< "MB/sec";
logMsg(INFO, infostr.str());
return true;
}
bool TestRunner::randomReadTest()
{
logMsg(INFO, "randomReadTest");
struct drand48_data d48data;
srand48_r(pthread_self(), &d48data);
for (int i = 0; i < 10; ++i)
{
long int blk_num;
lrand48_r(&d48data, &blk_num);
blk_num = blk_num % m_opts.numblocks;
unsigned char readbuf[4];
assert(m_file->pread(readbuf, blk_num * BLK_SIZE, 4) == 4);
assert(readbuf[0] == (unsigned char)blk_num);
}
return true;
}
bool TestRunner::openByModeStrTest()
{
logMsg(INFO, "openByModeStrTest");
// in this test we want to check the alternate open modes available for buffered i/o
// this test is only run if we are doing buffered I/O and expects it is run after the
// write test to guarantee the file is there
reset();
m_file = IDBDataFile::open(IDBDataFile::BUFFERED, m_fname.c_str(), "r+b", m_open_opts);
if (!m_file)
{
ostringstream errstr;
errstr << "Unable to open " << m_fname << " for read/write";
logMsg(ERROR, errstr.str());
return false;
}
// keep this fairly simple - read a block then write a block
ssize_t readct = m_file->read(m_defbuf, BLK_SIZE);
if ((size_t)readct != BLK_SIZE)
{
ostringstream errstr;
errstr << "Only read " << readct << " bytes, expected 4";
logMsg(ERROR, errstr.str());
return false;
}
if (m_defbuf[0] != (unsigned char)0)
{
ostringstream errstr;
errstr << "Data error - expected " << 0 << ", read " << (int)m_defbuf[0];
logMsg(ERROR, errstr.str());
return false;
}
// we should be at block 1
long filepos = m_file->tell();
if (filepos != long(BLK_SIZE))
{
ostringstream errstr;
errstr << "File position not at correct block, " << filepos << " != " << BLK_SIZE;
logMsg(ERROR, errstr.str());
return false;
}
m_defbuf[0] = 1;
ssize_t bytes_written = m_file->write(m_defbuf, BLK_SIZE);
if ((size_t)bytes_written != BLK_SIZE)
{
ostringstream errstr;
errstr << "Only wrote " << bytes_written << " bytes, expected 4";
logMsg(ERROR, errstr.str());
return false;
}
return true;
}
bool TestRunner::writeTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "writeTest");
reset();
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "w", m_open_opts);
if (!m_file)
{
ostringstream errstr;
errstr << "Unable to open " << m_fname << " for writing";
logMsg(ERROR, errstr.str());
return false;
}
struct timeval starttime, endtime, timediff;
gettimeofday(&starttime, 0x0);
for (int i = 0; i < m_opts.numblocks; ++i)
{
if (!writeBlock(i, m_defbuf, i))
return false;
}
gettimeofday(&endtime, 0x0);
timeval_subtract(&timediff, &endtime, &starttime);
float secs = timediff.tv_sec + (timediff.tv_usec * 0.000001);
ostringstream infostr;
infostr << "Wrote " << m_opts.numblocks * BLK_SIZE << " bytes in " << secs << " secs, ";
infostr << "Throughput = " << setprecision(3) << ((m_opts.numblocks * BLK_SIZE) / 1000000.0) / secs
<< "MB/sec";
logMsg(INFO, infostr.str());
return true;
}
bool TestRunner::truncateTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "truncateTest");
reset();
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "a", m_open_opts);
if (!m_file)
{
ostringstream errstr;
errstr << "Unable to open " << m_fname << " for writing";
logMsg(ERROR, errstr.str());
return false;
}
// choose a random block to truncate at
struct drand48_data d48data;
srand48_r(0xdeadbeef, &d48data);
long int blk_num;
lrand48_r(&d48data, &blk_num);
blk_num = blk_num % m_opts.numblocks;
// always leave at least one block
if (blk_num == 0)
blk_num = 1;
int rc = m_file->truncate(blk_num * BLK_SIZE);
if ((filetype != IDBDataFile::HDFS) && rc)
{
logMsg(ERROR, "truncate failed!");
return false;
}
else if ((filetype == IDBDataFile::HDFS) && !rc)
{
logMsg(ERROR, "truncate is supposed to fail for HDFS files!");
return false;
}
else if ((filetype == IDBDataFile::HDFS))
{
// this is the "success" case for HDFS we didn't expect to truncate so reset blk_num
blk_num = m_opts.numblocks;
}
off64_t fsize = m_file->size();
if (fsize != (off64_t)(blk_num * BLK_SIZE))
{
ostringstream errstr;
errstr << "wrong file size after truncate, " << fsize << " != " << blk_num * BLK_SIZE;
logMsg(ERROR, errstr.str());
return false;
}
return true;
}
bool TestRunner::renameTest(IDBDataFile::Types type)
{
logMsg(INFO, "renameTest");
// assume this test is run after the write test so that the file m_fname exists
reset();
IDBFileSystem& fs = IDBFileSystem::getFs(type);
// get the size before we move for compare purposes.
off64_t fsize_orig = fs.size(m_fname.c_str());
// choose a path in a different directory that we know already exists
// and make it specific to our thread...
ostringstream oss;
// embed pid so that this is a new directory path
oss << "/tmp/renametest-" << getpid() << "-" << m_id;
string newpath = oss.str();
// rename it
int rc = fs.rename(m_fname.c_str(), newpath.c_str());
if (rc != 0)
{
ostringstream errstr;
errstr << "rename failed, " << strerror(errno);
logMsg(ERROR, errstr.str());
return false;
}
// now check if oldpath exists using size method
off64_t fsize = fs.size(m_fname.c_str());
if (fsize != -1)
{
ostringstream errstr;
errstr << "old file still exists, size = " << fsize;
logMsg(ERROR, errstr.str());
return false;
}
// now check if newpath exists using size method
fsize = fs.size(newpath.c_str());
if (fsize != fsize_orig)
{
ostringstream errstr;
errstr << "new file and old file sizes differ, " << fsize << "!=" << fsize_orig;
logMsg(ERROR, errstr.str());
return false;
}
// everything looks good - put it back
// rename it
rc = fs.rename(newpath.c_str(), m_fname.c_str());
if (rc != 0)
{
ostringstream errstr;
errstr << "final rename failed, " << strerror(errno);
logMsg(ERROR, errstr.str());
return false;
}
// now a negative test case. Try to rename a file that does not exist
assert(fs.rename("a-bogus-file-name", newpath.c_str()) == -1);
return true;
}
bool TestRunner::copyTest(IDBDataFile::Types type)
{
// assume this test is run after the write test so that the file m_fname exists
reset();
IDBFileSystem& fs = IDBFileSystem::getFs(type);
// get the size before we copy for compare purposes.
off64_t fsize_orig = fs.size(m_fname.c_str());
// choose a path in a different directory that we know already exists
// and make it specific to our thread...
ostringstream oss;
// embed pid so that this is a new directory path
oss << "/tmp/copytest-" << getpid() << "-" << m_id;
string newpath = oss.str();
// copy it
int rc = fs.copyFile(m_fname.c_str(), newpath.c_str());
if (rc != 0)
{
ostringstream errstr;
errstr << "copy failed, " << strerror(errno);
logMsg(ERROR, errstr.str());
return false;
}
// now check if newpath exists using size method
off64_t fsize = fs.size(newpath.c_str());
if (fsize != fsize_orig)
{
ostringstream errstr;
errstr << "new file and old file sizes differ, " << fsize << "!=" << fsize_orig;
logMsg(ERROR, errstr.str());
return false;
}
// everything looks good - delete the copy
rc = fs.remove(newpath.c_str());
if (rc != 0)
{
ostringstream errstr;
errstr << "file delete failed (after copy), " << strerror(errno);
logMsg(ERROR, errstr.str());
return false;
}
// now a negative test case. Try to copy a file that does not exist
assert(fs.copyFile("a-bogus-file-name", newpath.c_str()) == -1);
return true;
}
bool TestRunner::rdwrTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "rdwrTest");
reset();
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "r+", m_open_opts);
if (!m_file)
{
ostringstream errstr;
errstr << "Unable to open " << m_fname << " for reading";
logMsg(ERROR, errstr.str());
return false;
}
struct drand48_data d48data;
srand48_r(0xdeadbeef, &d48data);
// we will write to 5 random blocks and then come back and
// verify the contents and undo them
int blocks_to_touch = min(5, m_opts.numblocks);
vector<int> touched;
for (int i = 0; i < blocks_to_touch; ++i)
{
long int blk_num;
// we need to make sure all the blocks we touch are unique or
// the pattern used by this test won't work
bool found = false;
while (!found)
{
lrand48_r(&d48data, &blk_num);
blk_num = blk_num % m_opts.numblocks;
vector<int>::iterator pos = find(touched.begin(), touched.end(), blk_num);
if (pos == touched.end())
found = true;
}
if (m_file->seek(blk_num * BLK_SIZE, SEEK_SET))
{
ostringstream errstr;
errstr << "failed to seek block " << blk_num;
logMsg(ERROR, errstr.str());
return false;
}
unsigned char writeval = 0xb0;
ssize_t writect = m_file->write(&writeval, 1);
if (writect != 1)
{
ostringstream errstr;
errstr << "Only wrote " << writect << " bytes, expected 1";
logMsg(ERROR, errstr.str());
return false;
}
touched.push_back(blk_num);
}
m_file->flush();
for (int i = 0; i < (int)touched.size(); ++i)
{
unsigned char readbuf;
ssize_t readct = m_file->pread(&readbuf, touched[i] * BLK_SIZE, 1);
if (readct != 1 || readbuf != (unsigned char)0xb0)
{
ostringstream errstr;
errstr << "Error reading expected value, readct=" << readct << " bytes, value" << (int)readbuf;
logMsg(ERROR, errstr.str());
return false;
}
readbuf = touched[i];
if (m_file->seek(-1, SEEK_CUR))
{
ostringstream errstr;
errstr << "failed to seek -1";
logMsg(ERROR, errstr.str());
return false;
}
ssize_t writect = m_file->write(&readbuf, 1);
if (writect != 1)
{
ostringstream errstr;
errstr << "Only wrote " << writect << " bytes, expected 1";
logMsg(ERROR, errstr.str());
return false;
}
}
return true;
}
bool TestRunner::hdfsRdwrExhaustTest()
{
logMsg(INFO, "hdfsRdwrExhaustTest");
// this is going to be a self-contained test that attempts to test
// all logic inherent in HdfsRdwr
// choose a new filename that is specific to our thread
ostringstream oss;
// embed pid so that this is a new directory path
oss << "/tmp/hdfsrdwr-" << getpid() << "-" << m_id;
string newpath = oss.str();
// open a file with arbitrarily small buffer
IDBDataFile* file = IDBDataFile::open(IDBDataFile::HDFS, newpath.c_str(), "r+", 0, 8);
assert(file);
// check various empty file conditions
assert(file->size() == 0);
assert(file->tell() == 0);
assert(file->seek(-1, SEEK_CUR) == -1);
assert(file->seek(0, SEEK_SET) == 0);
unsigned char buf[4];
assert(file->read(buf, 4) == 0);
// write some data
buf[0] = 0xde;
buf[1] = 0xad;
buf[2] = 0xbe;
buf[3] = 0xef;
assert(file->write(buf, 4) == 4);
assert(file->size() == 4);
assert(file->tell() == 4);
assert(file->truncate(-1) == -1);
// now make file empty again
assert(file->truncate(0) == 0);
assert(file->size() == 0);
assert(file->seek(0, SEEK_SET) == 0);
assert(file->tell() == 0);
assert(file->read(buf, 4) == 0);
// write data again, this time exactly up to allocated size
assert(file->write(buf, 4) == 4);
assert(file->write(buf, 4) == 4);
assert(file->size() == 8);
assert(file->tell() == 8);
// truncate back to 4
assert(file->truncate(4) == 0);
assert(file->size() == 4);
assert(file->seek(4, SEEK_SET) == 0);
assert(file->tell() == 4);
// now trigger a buffer reallocation
assert(file->write(buf, 4) == 4);
assert(file->write(buf, 4) == 4);
assert(file->size() == 12);
// now delete and close.
delete file;
// check the file size through the file system
IDBFileSystem& fs = IDBFileSystem::getFs(IDBDataFile::HDFS);
assert(fs.size(newpath.c_str()) == 12);
// open again - the file is bigger than the default buffer so it triggers alternate
// logic in the constructor
file = IDBDataFile::open(IDBDataFile::HDFS, newpath.c_str(), "r+", 0, 8);
assert(file);
assert(file->size() == 12);
unsigned char newbuf[4];
assert(file->pread(newbuf, 4, 4) == 4);
assert(newbuf[0] == 0xde && newbuf[1] == 0xad && newbuf[2] == 0xbe && newbuf[3] == 0xef);
delete file;
fs.remove(newpath.c_str());
if (m_opts.largeFile.length())
{
file = IDBDataFile::open(IDBDataFile::HDFS, m_opts.largeFile.c_str(), "r+", 0);
assert(file);
}
return true;
}
bool TestRunner::concurrencyTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "concurrencyTest");
reset();
// ok - scenario is a reader opens the file then a reader/writer opens
// the same file and updates something, then the reader tries to read
// again.
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts);
assert(m_file);
// read a few blocks
assert(readBlock(0, m_defbuf, 0));
assert(readBlock(1, m_defbuf, 1));
// open the same file for read/write
IDBDataFile* rdwrFile = IDBDataFile::open(filetype, m_fname.c_str(), "r+", m_open_opts);
assert(rdwrFile);
// read a block, write a block
assert(rdwrFile->pread(m_defbuf, 0, BLK_SIZE) == (ssize_t)BLK_SIZE);
assert(m_defbuf[0] == 0);
m_defbuf[0] = 95;
assert(rdwrFile->seek(0, 0) == 0);
assert(rdwrFile->write(m_defbuf, BLK_SIZE));
// close file
delete rdwrFile;
// this 5 seconds is important in the HDFS case because it gives HDFS
// time to age off (or whatever) the blocks for the original file that
// have now been rewritten. The value was determined experimentally -
// 3 secs works fine most times but not all. If HDFS hasn't aged out
// the blocks then the read will return the old data
if (filetype == IDBDataFile::HDFS)
sleep(10);
// go back to the reader and make sure he got the new value, then close
assert(readBlock(0, m_defbuf, 95));
delete m_file;
// now put block 0 back the way it was
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "r+", m_open_opts);
assert(m_file);
assert(writeBlock(0, m_defbuf, 0));
return true;
}
bool TestRunner::tellTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "tellTest");
reset();
// scenario: reader opens file, seeks somewhere and tells where it is.
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts);
assert(m_file);
// read a few blocks
assert(readBlock(0, m_defbuf, 0));
assert(readBlock(1, m_defbuf, 1));
if (m_file->seek(BLK_SIZE, SEEK_SET))
{
ostringstream errstr;
errstr << "tellTest: failed to seek block";
logMsg(ERROR, errstr.str());
return false;
}
off64_t filepos = m_file->tell();
if (filepos != off64_t(BLK_SIZE))
{
ostringstream errstr;
errstr << "tellTest: File position not at correct block, " << filepos << " != " << BLK_SIZE;
logMsg(ERROR, errstr.str());
return false;
}
return true;
}
bool TestRunner::flushTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "flushTest");
reset();
string scratch = "/tmp/rdwr_scratch" + m_fname; // scratch file name if exists
boost::filesystem::remove(scratch);
IDBPolicy::remove(m_fname.c_str());
// scenario: writer opens the file, writes 8 bytes, flush;
// reader opens the file, reads 8 bytes, verifys the data, then closes file;
// writer writes 8M bytes (force to buffered file) if -m option used correctly;
// reader opens the file, verifys the file size and content, then closes the file;
// writer closes the file.
ostringstream errstr;
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "w+", m_open_opts, /*default:4*/ 1);
if (!m_file)
{
errstr << "flushTest: open " << m_fname.c_str() << " for write failed";
logMsg(ERROR, errstr.str());
return false;
}
// write 8 "1" through mem buff
const char w1[] = "11111111";
ssize_t bytes = 0;
if ((bytes = m_file->write(w1, 8)) != 8)
{
errstr << "flushTest: write count = 8, return = " << bytes;
logMsg(ERROR, errstr.str());
return false;
}
// for HDFS, force writing out to disk.
m_file->flush();
if (!IDBPolicy::exists(m_fname.c_str()))
{
errstr << "flushTest: " << m_fname.c_str() << " does not exist";
logMsg(ERROR, errstr.str());
return false;
}
if (filetype == IDBDataFile::HDFS && boost::filesystem::exists(scratch))
{
errstr << "flushTest: " << scratch << " exists after 1st write";
logMsg(ERROR, errstr.str());
return false;
}
// read from file in "r" mode
IDBDataFile* file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts, 1);
if (!file)
{
errstr << "flushTest: 1st open " << m_fname.c_str() << " to read failed";
logMsg(ERROR, errstr.str());
return false;
}
char r1[9] = {0};
ssize_t n1 = file->pread(r1, 0, 8);
if (n1 != 8 || strncmp(r1, w1, 8) != 0)
{
errstr << "flushTest: read " << n1 << " != 8 OR " << r1 << "!= 11111111";
logMsg(ERROR, errstr.str());
return false;
}
delete file;
file = NULL;
// write 8M "2", switched to file buffer if max size for mem buffer is small.
// char w2[] = {[0 ... 8*1024*1024] = '2'};
ssize_t m9 = 9 * 1024 * 1024; // must be large than EXTENTSIZE = 8390656 to swith to file buffer
boost::scoped_array<char> w2(new char[m9]);
memset(w2.get(), '2', m9);
m_file->write(w2.get(), m9);
m_file->flush();
// check file size
if (IDBPolicy::size(m_fname.c_str()) != 8 + m9)
{
errstr << "flushTest: size of " << m_fname.c_str() << " is " << IDBPolicy::size(m_fname.c_str())
<< ", expecting " << (8 + m9);
logMsg(ERROR, errstr.str());
return false;
}
if (filetype == IDBDataFile::HDFS && !boost::filesystem::exists(scratch) && m_opts.hdfsMaxMem < m9)
{
errstr << "flushTest: " << scratch << " does not exist after 2nd write";
logMsg(ERROR, errstr.str());
return false;
}
// 2nd read
file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts, 1);
if (!file)
{
errstr << "flushTest: 2nd open " << m_fname.c_str() << " to read failed";
logMsg(ERROR, errstr.str());
return false;
}
// char r2[9*1024*1024 + 8 + 1] = {0};
boost::scoped_array<char> r2(new char[9 * 1024 * 1024 + 8 + 1]);
memset(r2.get(), 0, m9 + 9);
ssize_t n2 = file->pread(r2.get(), 0, m9 + 8);
if (n2 != (m9 + 8) || strncmp(r2.get(), w1, 8) != 0 || memcmp(r2.get() + 8, w2.get(), m9) != 0)
{
errstr << "flushTest: 2nd read " << m_fname.c_str() << " failed" << endl
<< " return value: " << n2 << " bytes -- " << r2; // need hex dump?
logMsg(ERROR, errstr.str());
return false;
}
delete file;
file = NULL;
delete m_file;
m_file = NULL;
return true;
}
bool TestRunner::seekTest(IDBDataFile::Types filetype)
{
logMsg(INFO, "seekTest");
reset();
// scenario: writer opens the file with w+, writes 8 bytes, seek to 4 from 0, write 4 bytes
// reader opens the file with r, verify size and contents,
// writer seeks 4 bytes beyond EOF, write 4 bytes, and close the file,
// reader rewinds, verify size and contents, and close the file.
ostringstream errstr;
m_file = IDBDataFile::open(filetype, m_fname.c_str(), "w+", m_open_opts);
if (!m_file)
{
errstr << "seekTest: open " << m_fname.c_str() << " for write failed";
logMsg(ERROR, errstr.str());
return false;
}
// write 8 "1" through mem buff
const char w1[] = "11111111";
ssize_t bytes = 0;
if ((bytes = m_file->write(w1, 8)) != 8)
{
errstr << "seekTest: write1 count = 8, return = " << bytes;
logMsg(ERROR, errstr.str());
return false;
}
if (m_file->seek(4, SEEK_SET) != 0)
{
errstr << "seekTest: seek(4, SEEK_SET) failed";
logMsg(ERROR, errstr.str());
return false;
}
const char w2[] = "2222";
if ((bytes = m_file->write(w2, 4)) != 4)
{
errstr << "seekTest: write2 count = 4, return = " << bytes;
logMsg(ERROR, errstr.str());
return false;
}
// for HDFS, force writing out to disk.
m_file->flush();
// read from file in "r" mode
IDBDataFile* file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts);
if (!file)
{
errstr << "seekTest: 1st open " << m_fname.c_str() << " to read failed";
logMsg(ERROR, errstr.str());
return false;
}
char r1[9] = {0};
ssize_t n1 = file->pread(r1, 0, 8);
if (IDBPolicy::size(m_fname.c_str()) != 8)
{
errstr << "seekTest: size of " << m_fname.c_str() << " is " << IDBPolicy::size(m_fname.c_str())
<< ", expecting 8";
logMsg(ERROR, errstr.str());
return false;
}
if (n1 != 8 || strncmp(r1, w1, 4) != 0 || strncmp(r1 + 4, w2, 4) != 0)
{
errstr << "seekTest: read " << n1 << " != 8 OR " << r1 << "!= 11112222";
logMsg(ERROR, errstr.str());
return false;
}
// now seek beyond the eof, and write 4 bytes.
const char w3[] = "3333";
if (m_file->seek(4, SEEK_END) != 0)
{
errstr << "seekTest: seek(4, SEEK_END) failed";
logMsg(ERROR, errstr.str());
return false;
}
if ((bytes = m_file->write(w3, 4)) != 4)
{
errstr << "seekTest: write3 count = 4, return = " << bytes;
logMsg(ERROR, errstr.str());
return false;
}
m_file->flush();
delete m_file;
m_file = NULL;
// check file size
if (IDBPolicy::size(m_fname.c_str()) != 16)
{
errstr << "seekTest: size of " << m_fname.c_str() << " is " << IDBPolicy::size(m_fname.c_str())
<< ", expecting 16";
logMsg(ERROR, errstr.str());
return false;
}
// 2nd read
file = IDBDataFile::open(filetype, m_fname.c_str(), "r", m_open_opts);
if (!file)
{
errstr << "seekTest: 2nd open " << m_fname.c_str() << " to read failed";
logMsg(ERROR, errstr.str());
return false;
}
char r2[17] = {0};
const char pd[4] = {0}; // padding
ssize_t n2 = file->pread(r2, 0, 16);
if (n2 != 16 || strncmp(r2, w1, 4) != 0 || memcmp(r2 + 4, w2, 4) != 0 || strncmp(r2 + 8, pd, 4) != 0 ||
memcmp(r2 + 12, w3, 4) != 0)
{
errstr << "seekTest: 2nd read " << m_fname.c_str() << " failed" << endl
<< " return value: " << n2 << " bytes -- " << r2; // need hex dump?
logMsg(ERROR, errstr.str());
return false;
}
delete file;
file = NULL;
return true;
}
bool TestRunner::listDirTest(IDBDataFile::Types filetype, const string& dir)
{
logMsg(INFO, "listDirTest");
IDBFileSystem& fs = IDBFileSystem::getFs(filetype);
ostringstream errstr;
string fname2 = m_fname + "2";
string fname3 = m_fname + "3";
IDBDataFile* file2 = IDBDataFile::open(filetype, fname2.c_str(), "w", m_open_opts);
if (file2)
delete file2;
IDBDataFile* file3 = IDBDataFile::open(filetype, fname3.c_str(), "w", m_open_opts);
if (file3)
delete file3;
list<string> dircontents;
if (fs.listDirectory(dir.c_str(), dircontents) != 0)
{
errstr << "Error calling listDirectory";
logMsg(ERROR, errstr.str());
return false;
}
ostringstream ldstr;
ldstr << "Listed directory " << dir << ":";
list<string>::iterator iend = dircontents.end();
bool foobarFound = false;
bool foobar2Found = false;
bool foobar3Found = false;
for (list<string>::iterator i = dircontents.begin(); i != iend; ++i)
{
ldstr << (*i) << ",";
if ((*i) == "foobar")
foobarFound = true;
else if ((*i) == "foobar2")
foobar2Found = true;
else if ((*i) == "foobar3")
foobar3Found = true;
}
logMsg(INFO, ldstr.str());
if (dircontents.size() != 3)
{
errstr << "listDirectory not returning 3 file names";
logMsg(ERROR, errstr.str());
return false;
}
if ((!foobarFound || !foobar2Found || !foobar3Found))
{
errstr << "listDirectory returning incorrect file names";
logMsg(ERROR, errstr.str());
return false;
}
// now check a bogus path and make sure it returns -1
if (fs.listDirectory("/this-is-a-bogus-directory", dircontents) != -1)
{
errstr << "listDirectory not failing a call for a bogus directory";
logMsg(ERROR, errstr.str());
return false;
}
assert(fs.remove("/this-is-a-bogus-directory") == 0);
assert(!fs.isDir("/this-is-a-bogus-directory"));
assert(!fs.isDir(m_fname.c_str()));
return true;
}
TestRunner::~TestRunner()
{
}
bool TestRunner::fillDefault()
{
return true;
}
bool TestRunner::writeBlock(unsigned int blocknum, unsigned char* buf, unsigned char tag)
{
buf[0] = tag;
ssize_t rc = m_file->write(buf, BLK_SIZE);
if ((size_t)rc != BLK_SIZE)
{
ostringstream errstr;
errstr << "writeBlock failed for block " << blocknum << ", wrote " << rc << " bytes, expecting "
<< BLK_SIZE;
logMsg(ERROR, errstr.str());
return false;
}
return true;
}
bool TestRunner::writeBlocks(unsigned int blocknum, unsigned char* buf, unsigned char tag, unsigned int count)
{
for (unsigned i = 0; i < count; ++i)
{
writeBlock(blocknum + i, buf, tag + i);
}
// should actually aggregate return values but using messages for now
return true;
}
bool TestRunner::readBlock(unsigned int blocknum, unsigned char* buf, unsigned char tag)
{
ssize_t rc = m_file->pread(buf, blocknum * BLK_SIZE, BLK_SIZE);
// cout << "DEBUG: read " << rc << " bytes at offset " << blocknum * BLK_SIZE << endl;
if ((size_t)rc != BLK_SIZE)
{
ostringstream errstr;
errstr << "readBlock failed for block " << blocknum << ", read " << rc << " bytes, expecting "
<< BLK_SIZE;
logMsg(ERROR, errstr.str());
return false;
}
else if (tag != buf[0])
{
ostringstream errstr;
errstr << "read tag 0x" << setw(2) << hex << setfill('0') << (int)buf[0] << " at block " << dec
<< blocknum << ", expected 0x" << (int)tag;
logMsg(ERROR, errstr.str());
return false;
}
return true;
}
bool TestRunner::readBlocks(unsigned int blocknum, unsigned char* buf, unsigned char tag, unsigned int count)
{
for (unsigned i = 0; i < count; ++i)
{
readBlock(blocknum + i, buf, (unsigned char)(tag + i));
}
// should actually aggregate return values but using messages for now
return true;
}
bool TestRunner::doBlock(unsigned int blocknum, unsigned char tag, unsigned int count)
{
writeBlocks(blocknum, m_defbuf, tag, count);
m_file->flush();
if (m_opts.closeonwrite)
{
delete m_file;
// will have to cache type somewhere later
m_file = IDBDataFile::open(IDBDataFile::HDFS, m_fname.c_str(), "a", m_open_opts);
if (!m_file)
return false;
}
if (m_opts.reopen)
{
delete m_file;
// will have to cache type somewhere later
m_file = IDBDataFile::open(IDBDataFile::HDFS, m_fname.c_str(), "r", m_open_opts);
if (!m_file)
return false;
}
unsigned char buf[BLK_SIZE];
return readBlocks(blocknum, buf, tag, count);
}
void TestRunner::logMsg(LogLevel level, const string& msg, bool bold)
{
boost::mutex::scoped_lock lock(m_guard);
if (bold)
cout << "\033[0;1m";
cout << "Test-" << m_id << ":" << ((level == INFO) ? "INFO:" : "ERROR:") << msg << endl;
if (bold)
cout << "\033[0;39m";
}
void thread_func(TestRunner& trun)
{
bool ret = true;
// todo-add some capability to use USE_ODIRECT or not
ret = ret && trun.runTest(IDBDataFile::UNBUFFERED, 0);
// todo-add some capability to use USE_VBUF or not
ret = ret && trun.runTest(IDBDataFile::BUFFERED, 0);
if (trun.runOpts().useHdfs)
ret = ret && trun.runTest(IDBDataFile::HDFS, 0);
trun.logMsg(TestRunner::INFO, string(ret ? "A" : "NOT a") + "ll tests passed!\n", true);
}
int main(int argc, char** argv)
{
foptest_opts opts;
int c;
while ((c = getopt(argc, argv, "adpsrh:b:t:cvuen:l:m:")) != -1)
switch (c)
{
case 'b':
BLK_SIZE = atoi(optarg);
if (BLK_SIZE > MAX_BLK_SIZE)
{
BLK_SIZE = MAX_BLK_SIZE;
cout << "WARNING: block size exceeds max, using " << BLK_SIZE << endl;
}
else
{
cout << "INFO: using BLK_SIZE " << BLK_SIZE << endl;
}
break;
case 'c':
opts.closeonwrite = true;
cout << "INFO: will close on write" << endl;
break;
case 'd':
opts.usedirect = 1;
cout << "INFO: will open with O_DIRECT" << endl;
break;
case 'p':
opts.preflush = 1;
cout << "INFO: will use read pre-flush" << endl;
break;
case 's':
opts.synconwrite = 1;
cout << "INFO: will fsync after write" << endl;
break;
case 'r':
opts.reopen = 1;
cout << "INFO: will reopen before each read" << endl;
break;
case 't':
opts.numthreads = atoi(optarg);
cout << "INFO: will start " << opts.numthreads << " test threads" << endl;
break;
case 'v':
opts.usevbuf = true;
cout << "INFO: will use setvbuf to disable buffering" << endl;
break;
case 'n':
opts.numblocks = atoi(optarg);
cout << "INFO: will operate on " << opts.numblocks << " blocks" << endl;
break;
case 'l':
opts.largeFile = optarg;
cout << "INFO: will run a large HDFS RDWR test on " << opts.largeFile << endl;
break;
case 'h':
opts.pluginFile = optarg;
cout << "INFO: will load HDFS plugin " << optarg << endl;
break;
case 'm':
opts.hdfsMaxMem = atoi(optarg);
if (opts.hdfsMaxMem)
cout << "INFO: will set hdfsRdwrBufferMaxSize to " << opts.hdfsMaxMem << endl;
else
cout << "INFO: will set hdfsRdwrBufferMaxSize to unlimited" << endl;
break;
default: usage(); return 1;
}
// init the library with logging enabled
IDBPolicy::init(true, true, "/tmp/rdwr_scratch", opts.hdfsMaxMem);
if (opts.pluginFile.length())
opts.useHdfs = true;
// vector<TestRunner>
boost::thread_group thread_group;
for (int i = 0; i < opts.numthreads; ++i)
{
thread_group.create_thread(boost::bind(thread_func, boost::ref(*(new TestRunner(i, opts)))));
}
thread_group.join_all();
}