From d53471fc759ae3948cf9156b21ba8868b76b60fd Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 18 Jan 2019 10:19:14 -0600 Subject: [PATCH] Checkpointing some stuff. No way it'll build yet. --- .gitmodules | 3 + utils/cloudio/SMComm.cpp | 0 utils/cloudio/SMComm.h | 54 +++++++++++++++ utils/cloudio/SMDataFile.cpp | 106 ++++++++++++++++++++++++++++++ utils/cloudio/SMDataFile.h | 42 ++++++++++++ utils/cloudio/SMFileFactory.cpp | 91 +++++++++++++++++++++++++ utils/cloudio/SMFileFactory.h | 19 ++++++ utils/cloudio/SMFileSystem.cpp | 91 +++++++++++++++++++++++++ utils/cloudio/SMFileSystem.h | 35 ++++++++++ utils/cloudio/sm_exceptions.h | 17 +++++ utils/cloudio/storage-manager | 1 + utils/idbdatafile/IDBDataFile.h | 1 + utils/idbdatafile/IDBFileSystem.h | 3 +- 13 files changed, 462 insertions(+), 1 deletion(-) create mode 100644 .gitmodules create mode 100644 utils/cloudio/SMComm.cpp create mode 100644 utils/cloudio/SMComm.h create mode 100644 utils/cloudio/SMDataFile.cpp create mode 100644 utils/cloudio/SMDataFile.h create mode 100644 utils/cloudio/SMFileFactory.cpp create mode 100644 utils/cloudio/SMFileFactory.h create mode 100644 utils/cloudio/SMFileSystem.cpp create mode 100644 utils/cloudio/SMFileSystem.h create mode 100644 utils/cloudio/sm_exceptions.h create mode 160000 utils/cloudio/storage-manager diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..f2f9d199d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "utils/cloudio/storage-manager"] + path = utils/cloudio/storage-manager + url = git@github.com:pleblanc1976/storage-manager diff --git a/utils/cloudio/SMComm.cpp b/utils/cloudio/SMComm.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/utils/cloudio/SMComm.h b/utils/cloudio/SMComm.h new file mode 100644 index 000000000..17b178ea3 --- /dev/null +++ b/utils/cloudio/SMComm.h @@ -0,0 +1,54 @@ +# copy some licensing stuff here + +#ifndef SMCOMM_H_ +#define SMCOMM_H_ + +namespace idbdatafile { + +class SMComm : public boost::noncopyable +{ + public: + SMComm *get(); + + /* Open currently returns a stat struct so SMDataFile can set its initial position, otherwise + behaves how you'd think. */ + int open(const std::string &filename, int mode, struct stat *statbuf); + + ssize_t pread(const std::string &filename, const void *buf, size_t count, off_t offset); + + ssize_t pwrite(const std::string &filename, const void *buf, size_t count, off_t offset); + + /* append exists for cases where the file is open in append mode. A normal write won't work + because the file position may be out of date if there are multiple writers. */ + ssize_t append(const std::string &filename, const void *buf, size_t count); + + int unlink(const std::string &filename); + + int stat(const std::string &filename, struct stat *statbuf); + + // added this one because it should be trivial to implement in SM, and prevents a large + // operation in SMDataFile. + int truncate(const std::string &filename, off64_t length); + + int listDirectory(const std::string &path, std::list *entries); + + // health indicator. 0 = processes are talking to each other and SM has read/write access to + // the specified S3 bucket. Need to define specific error codes. + int ping(); + + virtual ~SMComm(); + + private: + SMComm(); + + SocketPool sockets; + + +} + + +} + + + +#endif diff --git a/utils/cloudio/SMDataFile.cpp b/utils/cloudio/SMDataFile.cpp new file mode 100644 index 000000000..1c8add2aa --- /dev/null +++ b/utils/cloudio/SMDataFile.cpp @@ -0,0 +1,106 @@ +// copy licensing stuff here + + + +#include "SMDataFile.h" + +using namespace std; + +namespace idbdatafile +{ + +SMDataFile::SMDataFile(const char *name, int _openmode, const struct stat &_stat) : + IDBDataFile(name) +{ + openmode = _openmode; + // the 'a' file open mode is the only one that starts at EOF + if ((openmode & O_APPEND) && !(openmode & O_RDWR)) + position = _stat.st_size; + else + position = 0; + comm = SMComm::get(); +} + +ssize_t SMDataFile::pread(void *buf, off64_t offset, size_t count) +{ + return comm->pread(name(), buf, count, offset); +} + +ssize_t SMDataFile::read(void *buf, size_t count) +{ + ssize_t ret = comm->pread(name(), buf, count, position); + position += ret; + return ret; +} + +ssize_t SMDataFile::write(const void *buf, size_t count) +{ + if (openmode & O_APPEND) + return comm->append(name(), buf, count); + ssize_t ret = comm->pwrite(name(), buf, count, position); + position += ret; + return ret; +} + +int SMDataFile::seek(off64_t offset, int whence) +{ + switch (whence) { + case SEEK_SET: + position = offset; + break; + case SEEK_CUR: + position += offset; + break; + case SEEK_END: + struct stat _stat; + int err = comm->stat(name(), &_stat); + if (err) + return err; + position = _stat.st_size + offset; + break; + default: + errno = EINVAL; + return -1; + } + return 0; +} + +int SMDataFile::truncate(off64_t length) +{ + return comm->truncate(name(), length); +} + +off64_t SMDataFile::size() +{ + struct stat _stat; + int err = comm->stat(name(), &_stat); + + if (err) + return err; + return _stat.st_size; +} + +off64_t SMDataFile::tell() +{ + return position; +} + +int SMDataFile::flush() +{ + return 0; // writes are synchronous b/c of replication. If we allow asynchronous replication, + // then we need to implement a flush() cmd in SMComm. +} + +time_t SMDataFile::mtime() +{ + struct stat _stat; + int err = comm->stat(name(), &_stat); + + if (err) + return (time_t) err; + return _stat.st_mtime; +} + + + +} diff --git a/utils/cloudio/SMDataFile.h b/utils/cloudio/SMDataFile.h new file mode 100644 index 000000000..dab286978 --- /dev/null +++ b/utils/cloudio/SMDataFile.h @@ -0,0 +1,42 @@ +# copy some licensing stuff here + + +#ifndef SMDATAFILE_H_ +#define SMDATAFILE_H_ + +#include +#include +#include "IDBDataFile.h" +#include "SMComm.h" + +namespace idbdatafile +{ + +class SMDataFile : public IDBDataFile +{ + public: + virtual ~SMDataFile(); + + ssize_t pread(void* ptr, off64_t offset, size_t count); + ssize_t read(void* ptr, size_t count); + ssize_t write(const void* ptr, size_t count); + int seek(off64_t offset, int whence); + int truncate(off64_t length); + off64_t size(); + off64_t tell(); + int flush(); + time_t mtime(); + + private: + SMDataFile(); + SMDataFile(const char *fname, int openmode, const struct stat &); + off64_t position; + int openmode; + SMComm *comm; + + friend class SMFileFactory; +}; + +} + +#endif diff --git a/utils/cloudio/SMFileFactory.cpp b/utils/cloudio/SMFileFactory.cpp new file mode 100644 index 000000000..eda1eb007 --- /dev/null +++ b/utils/cloudio/SMFileFactory.cpp @@ -0,0 +1,91 @@ +# copy licensing stuff here + +#include +#include "SMFileFactory.h" +#include "SMComm.h" + +using namespace std; + + +namespace idbdatafile { + +#define _toUint64(ptr_to_eight_bytes) *((uint64_t *) ptr_to_eight_bytes) + +IDBDataFile* SMFileFactory::open(const char *filename, const char *mode, uint opts, uint colWidth) +{ + bool _read = false; + bool _write = false; + bool at_eof = false; + bool create = false; + bool truncate = false; + bool append_only = false; + string s_filename = filename; + + // strip 'b' chars from mode + char newmode[8] = {'\0'}; // there'd better not be 7 chars in the mode string + int i = 0; + for (char *c = mode; *c != '\0' && i < 8; c++) + if (*c != 'b') + newmode[i++] = *c; + if (i == 8) + return NULL; + + // I hate dealing with C-lib file IO. This is ugly but fast. + switch (_toUint64(newmode)) { + case _toUint64("r\0\0\0\0\0\0\0"): + _read = true; + break; + case _toUint64("r+\0\0\0\0\0\0"): + _read = true; + _write = true; + break; + case _toUint64("w\0\0\0\0\0\0\0"): + _write = true; + truncate = true; + create = true; + break; + case _toUint64("w+\0\0\0\0\0\0"): + _read = true; + _write = true; + truncate = true; + create = true; + break; + case _toUint64("a\0\0\0\0\0\0\0"): + _write = true; + create = true; + at_eof = true; + break; + case _toUint64("a+\0\0\0\0\0\0"): + _read = true; + _write = true; + create = true; + append_only = true; + break; + default: + return NULL; + } + + uint posix_flags = 0; + if (_read && write) + posix_flags |= O_RDWR; + else if (_read) + posix_flags |= O_RDONLY; + else if (_write) + posix_flags |= O_WRONLY; + + posix_flags |= (create ? O_CREAT : 0); + posix_flags |= (truncate ? O_TRUNC : 0); + posix_flage |= (at_eof ? O_APPEND : 0); + + SMComm *comm = SMComm::get(); + struct stat _stat; + int err = comm->open(s_filename, posix_flags, &stat); + if (!err) + return NULL; + + SMDataFile *ret = new SMDataFile(s_filename, posix_flags, append_only, stat); + return ret; +} + + +} diff --git a/utils/cloudio/SMFileFactory.h b/utils/cloudio/SMFileFactory.h new file mode 100644 index 000000000..81ac23660 --- /dev/null +++ b/utils/cloudio/SMFileFactory.h @@ -0,0 +1,19 @@ +# copy licensing stuff here + +#ifndef SMFILEFACTORY_H_ +#define SMFILEFACTORY_H_ + +#include "IDBDataFile.h" +#include "FileFactoryBase.h" + +namespace idbdatafile +{ + +class SMFileFactory : public FileFactoryBase +{ + public: + IDBDataFile* open(const char* fname, const char* mode, unsigned opts, unsigned colWidth); +}; + +} +#endif diff --git a/utils/cloudio/SMFileSystem.cpp b/utils/cloudio/SMFileSystem.cpp new file mode 100644 index 000000000..c2e2731e3 --- /dev/null +++ b/utils/cloudio/SMFileSystem.cpp @@ -0,0 +1,91 @@ +# copy licensing stuff here + +#include +#include "SMFileSystem.h" +#include "SMComm.h" +#include "sm_exceptions.h" + +using namespace std; + +namespace idbdatafile +{ + +SMFileSystem::SMFileSystem() : IDBFileSystem(IDBFileSystem::CLOUD) +{ + SMComm::getSMComm(); // get SMComm running +} + +int SMFileSystem::mkdir(const char *path) +{ + return 0; +} + +int SMFileSystem::size(const char *filename) const +{ + struct stat _stat; + + SMComm *smComm = SMComm::get(); + int err = smComm->stat(filename, &_stat); + if (!err) + return err; + + return _stat.st_size; +} + +off64_t SMFileSystem::compressedSize(const char *filename) const +{ + // Yikes, punting on this one. + throw NotImplementedYet(__func__); +} + +int SMFileSystem::remove(const char *filename) const +{ + SMComm *comm = SMComm::get(); + return comm->unlink(filename); +} + +int SMFileSystem::rename(const char *oldFile, const char *newFile) const +{ + // This will actually be pretty expensive to do b/c we store the filename in + // the key in cloud. If we do this a lot, we'll have to implement copy() in the SM. + throw NotImplementedYet(__func__); +} + +bool SMFileSystem::exists(const char *filename) const +{ + struct stat _stat; + SMComm *comm = SMComm::get(); + + int err = comm->stat(filename, &_stat); + return (err == 0); +} + +int SMFileSystem::listDirectory(const char* pathname, std::list& contents) const +{ + SMComm *comm = SMComm::get(); + return comm->listDirectory(pathname, &contents); +} + +bool SMFileSystem::isDir(const char *path) const +{ + SMComm *comm = SMComm::get(); + struct stat _stat; + + int err = comm->stat(path, &stat); + if (err != 0) + return false; // reasonable to throw here? todo, look at what the other classes do. + return (stat.st_mode & S_IFDIR); +} + +int SMFileSystem::copyFile(const char *src, const char *dest) const +{ + throw NotImplementedYet(__func__); +} + +bool SMFileSystem::filesystemIsUp() const +{ + SMComm *comm = SMComm::get(); + return (comm->ping() == 0); +} + +} diff --git a/utils/cloudio/SMFileSystem.h b/utils/cloudio/SMFileSystem.h new file mode 100644 index 000000000..9555dfcda --- /dev/null +++ b/utils/cloudio/SMFileSystem.h @@ -0,0 +1,35 @@ +# copy licensing stuff here + + +#ifndef SMFILESYSTEM_H_ +#define SMFILESYSTEM_H_ + +#include +#include +#include +#include "IDBFileSystem.h" + +namespace idbdatafile +{ + +class SMFileSystem : public IDBFileSystem, boost::noncopyable +{ + public: + SMFileSystem(); + virtual ~SMFileSystem(); + + int mkdir(const char* pathname); + off64_t size(const char* path) const; + off64_t compressedSize(const char* path) const; + int remove(const char* pathname); + int rename(const char* oldpath, const char* newpath); + bool exists(const char* pathname) const; + int listDirectory(const char* pathname, std::list& contents) const; + bool isDir(const char* pathname) const; + int copyFile(const char* srcPath, const char* destPath) const; + bool filesystemIsUp() const; +}; + +} + +#endif diff --git a/utils/cloudio/sm_exceptions.h b/utils/cloudio/sm_exceptions.h new file mode 100644 index 000000000..15231eafe --- /dev/null +++ b/utils/cloudio/sm_exceptions.h @@ -0,0 +1,17 @@ +#ifndef _SMEXECEPTIONS_H_ +#define _SMEXECEPTIONS_H_ + +#include + +class NotImplementedYet : public std::exception +{ + public: + NotImplementedYet(const std::string &s); +}; + +NotImplementedYes::NotImplementedYet(const std::string &s) : + std::exception(s + "() isn't implemented yet.") +{ +} + +#endif diff --git a/utils/cloudio/storage-manager b/utils/cloudio/storage-manager new file mode 160000 index 000000000..20b0d3b88 --- /dev/null +++ b/utils/cloudio/storage-manager @@ -0,0 +1 @@ +Subproject commit 20b0d3b88b23dc018ccd4f4b64ddddc9df447798 diff --git a/utils/idbdatafile/IDBDataFile.h b/utils/idbdatafile/IDBDataFile.h index 6d8046bb8..1106087d9 100644 --- a/utils/idbdatafile/IDBDataFile.h +++ b/utils/idbdatafile/IDBDataFile.h @@ -54,6 +54,7 @@ public: BUFFERED = 0x0001, UNBUFFERED = 0x0002, HDFS = 0x0003, + CLOUD = 0x0004, }; /** diff --git a/utils/idbdatafile/IDBFileSystem.h b/utils/idbdatafile/IDBFileSystem.h index 636787031..5979d822f 100644 --- a/utils/idbdatafile/IDBFileSystem.h +++ b/utils/idbdatafile/IDBFileSystem.h @@ -41,7 +41,8 @@ public: { UNKNOWN = 0x00, POSIX = 0x0001, - HDFS = 0x0002 + HDFS = 0x0002, + CLOUD = 0x0003 }; /**