From 6f120d26379a05b9ff4f30dfef18e68a11f2de8a Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 8 Oct 2020 13:21:22 +0000 Subject: [PATCH] MCOL-4328 MCS avoids chown() calls for files that are on S3 MCS now chowns created directories hierarchy not only files and immediate parent directories Minor changes to cpimport's help printout cpimport's -f option is now mandatory with mode 2 --- utils/idbdatafile/IDBFileSystem.h | 13 +++++ utils/idbdatafile/PosixFileSystem.cpp | 12 ++++ utils/idbdatafile/PosixFileSystem.h | 24 ++++---- writeengine/dictionary/we_dctnry.cpp | 3 +- writeengine/shared/we_fileop.cpp | 81 +++++++++----------------- writeengine/shared/we_fileop.h | 5 +- writeengine/shared/we_rbmetawriter.cpp | 12 +++- writeengine/shared/we_typeext.h | 19 +++--- writeengine/splitter/we_cmdargs.cpp | 12 ++-- 9 files changed, 97 insertions(+), 84 deletions(-) diff --git a/utils/idbdatafile/IDBFileSystem.h b/utils/idbdatafile/IDBFileSystem.h index 27ef09be7..98459e9f1 100644 --- a/utils/idbdatafile/IDBFileSystem.h +++ b/utils/idbdatafile/IDBFileSystem.h @@ -134,6 +134,19 @@ public: return true; } + /** + * chown() changes the owner of the object on the FS + * Returns 0 on success. -1 on error. + */ + virtual int chown(const char* objectName, + const uid_t p_uid, + const gid_t p_pid, + int& funcErrno) const + { + return 0; + } + + protected: IDBFileSystem( Types type ); diff --git a/utils/idbdatafile/PosixFileSystem.cpp b/utils/idbdatafile/PosixFileSystem.cpp index f52bdc56a..4ebdeb4ae 100644 --- a/utils/idbdatafile/PosixFileSystem.cpp +++ b/utils/idbdatafile/PosixFileSystem.cpp @@ -306,4 +306,16 @@ int PosixFileSystem::copyFile(const char* srcPath, const char* destPath) const return ret; } +int PosixFileSystem::chown(const char* objectName, + const uid_t p_uid, + const gid_t p_gid, + int& funcErrno) const +{ + int ret = 0; + errno = 0; + if ((ret = ::chown(objectName, p_uid, p_gid))) + funcErrno = errno; + return ret; +} + } diff --git a/utils/idbdatafile/PosixFileSystem.h b/utils/idbdatafile/PosixFileSystem.h index 023cd3818..ab976d8bc 100644 --- a/utils/idbdatafile/PosixFileSystem.h +++ b/utils/idbdatafile/PosixFileSystem.h @@ -27,17 +27,21 @@ class PosixFileSystem : public IDBFileSystem { public: PosixFileSystem(); - /* virtual */ ~PosixFileSystem(); + ~PosixFileSystem(); - /* virtual */ int mkdir(const char* pathname); - /* virtual */ off64_t size(const char* path) const; - /* virtual */ off64_t compressedSize(const char* path) const; - /* virtual */ int remove(const char* pathname); - /* virtual */ int rename(const char* oldpath, const char* newpath); - /* virtual */ bool exists(const char* pathname) const; - /* virtual */ int listDirectory(const char* pathname, std::list& contents) const; - /* virtual */ bool isDir(const char* pathname) const; - /* virtual */ int copyFile(const char* srcPath, const char* destPath) const; + int mkdir(const char* pathname) override; + off64_t size(const char* path) const override; + off64_t compressedSize(const char* path) const override; + int remove(const char* pathname) override; + int rename(const char* oldpath, const char* newpath) override; + bool exists(const char* pathname) const override; + int listDirectory(const char* pathname, std::list& contents) const override; + bool isDir(const char* pathname) const override; + int copyFile(const char* srcPath, const char* destPath) const override; + int chown(const char* objectName, + const uid_t p_uid, + const gid_t p_pid, + int& funcErrno) const override; }; } diff --git a/writeengine/dictionary/we_dctnry.cpp b/writeengine/dictionary/we_dctnry.cpp index b1142175f..aacfdfd7a 100644 --- a/writeengine/dictionary/we_dctnry.cpp +++ b/writeengine/dictionary/we_dctnry.cpp @@ -219,8 +219,7 @@ int Dctnry::createDctnry( const OID& dctnryOID, int colWidth, { // We presume the path will contain / std::string filePath(fileName); - std::ostringstream ossChown; - if (chownDataFileDir(ossChown, filePath)) + if (chownDataPath(filePath)) { return ERR_FILE_CHOWN; } diff --git a/writeengine/shared/we_fileop.cpp b/writeengine/shared/we_fileop.cpp index 66e9b71c6..fe2eb2710 100644 --- a/writeengine/shared/we_fileop.cpp +++ b/writeengine/shared/we_fileop.cpp @@ -791,8 +791,7 @@ int FileOp::extendFile( { // We presume the path will contain / std::string filePath(fileName); - std::ostringstream ossChown; - if (chownDataFileDir(ossChown, filePath)) + if (chownDataPath(filePath)) return ERR_FILE_CHOWN; } @@ -2365,57 +2364,27 @@ int FileOp::oid2FileName( FID fid, return ERR_FILE_NOT_EXIST; } - /* - char dirName[FILE_NAME_SIZE]; - - sprintf( dirName, "%s/%s", Config::getDBRootByNum(dbRoot).c_str(), - dbDir[0] ); - if( !isDir( dirName ) ) - RETURN_ON_ERROR( createDir( dirName )); - - sprintf( dirName, "%s/%s", dirName, dbDir[1] ); - if( !isDir( dirName ) ) - RETURN_ON_ERROR( createDir( dirName )); - - sprintf( dirName, "%s/%s", dirName, dbDir[2] ); - if( !isDir( dirName ) ) - RETURN_ON_ERROR( createDir( dirName )); - - sprintf( dirName, "%s/%s", dirName, dbDir[3] ); - if( !isDir( dirName ) ) - RETURN_ON_ERROR( createDir( dirName )); - - sprintf( dirName, "%s/%s", dirName, dbDir[4] ); - if( !isDir( dirName ) ) - RETURN_ON_ERROR( createDir( dirName )); - */ - std::stringstream aDirName; + for (size_t i = 0; i < MaxDirLevels; i++) + { + if (i == 0) + { + aDirName << Config::getDBRootByNum(dbRoot).c_str() + << "/" << dbDir[i]; + } + else + { + aDirName << "/" << dbDir[i]; + } + if (!isDir(aDirName.str().c_str())) + RETURN_ON_ERROR( createDir(aDirName.str().c_str()) ); - aDirName << Config::getDBRootByNum(dbRoot).c_str() << "/" << dbDir[0]; - - if (!isDir((aDirName.str()).c_str())) - RETURN_ON_ERROR( createDir((aDirName.str()).c_str()) ); - - aDirName << "/" << dbDir[1]; - - if (!isDir(aDirName.str().c_str())) - RETURN_ON_ERROR( createDir(aDirName.str().c_str()) ); - - aDirName << "/" << dbDir[2]; - - if (!isDir(aDirName.str().c_str())) - RETURN_ON_ERROR( createDir(aDirName.str().c_str()) ); - - aDirName << "/" << dbDir[3]; - - if (!isDir(aDirName.str().c_str())) - RETURN_ON_ERROR( createDir(aDirName.str().c_str()) ); - - aDirName << "/" << dbDir[4]; - - if (!isDir(aDirName.str().c_str())) - RETURN_ON_ERROR( createDir(aDirName.str().c_str()) ); + { + std::ostringstream ossChown; + if (chownDataPath(aDirName.str())) + return ERR_FILE_CHOWN; + } + } return NO_ERROR; } @@ -2932,11 +2901,13 @@ void FileOp::setFixFlag(bool isFix) m_isFix = isFix; } -bool FileOp::chownDataFileDir(std::ostringstream& error, - const std::string& fileName) +// Small note. We call chownFileDir in couple places to chown of the +// target file and call in oid2Filename() chowns directories created +bool FileOp::chownDataPath(const std::string& fileName) const { - std::string dirName = fileName.substr(0, fileName.find_last_of('/')); - if (chownFileDir(error, fileName, dirName)) + std::ostringstream error; + idbdatafile::IDBFileSystem& fs = IDBPolicy::getFs(fileName); + if (chownPath(error, fileName, fs)) { logging::Message::Args args; logging::Message message(1); diff --git a/writeengine/shared/we_fileop.h b/writeengine/shared/we_fileop.h index 1519093c4..0b7d094e4 100644 --- a/writeengine/shared/we_fileop.h +++ b/writeengine/shared/we_fileop.h @@ -58,6 +58,8 @@ namespace WriteEngine { +constexpr size_t MaxDirLevels = 5; + /** Class FileOp */ class FileOp : public BlockOp, public WeUIDGID { @@ -503,8 +505,7 @@ public: bool bOptExtension=false ); // Calls a chown and logs an error message - bool chownDataFileDir(std::ostringstream& error, - const std::string& fileName); + bool chownDataPath(const std::string& fileName) const; protected: EXPORT virtual int updateColumnExtent(IDBDataFile* pFile, int nBlocks); diff --git a/writeengine/shared/we_rbmetawriter.cpp b/writeengine/shared/we_rbmetawriter.cpp index bf8b3ed3f..f832e5287 100644 --- a/writeengine/shared/we_rbmetawriter.cpp +++ b/writeengine/shared/we_rbmetawriter.cpp @@ -454,8 +454,12 @@ std::string RBMetaWriter::openMetaFile ( uint16_t dbRoot ) { std::ostringstream ossChown; - if (chownFileDir(ossChown, tmpMetaFileName, bulkRollbackPath)) + idbdatafile::IDBFileSystem& fs = IDBPolicy::getFs(tmpMetaFileName.c_str()); + if (chownPath(ossChown, tmpMetaFileName, fs) + || chownPath(ossChown, bulkRollbackPath, fs)) + { throw WeException(ossChown.str(), ERR_FILE_CHOWN); + } } fMetaDataStream << @@ -1336,8 +1340,12 @@ int RBMetaWriter::writeHWMChunk( { std::ostringstream ossChown; - if (chownFileDir(ossChown, fileName, dirPath)) + idbdatafile::IDBFileSystem& fs = IDBPolicy::getFs(fileName.c_str()); + if (chownPath(ossChown, fileName, fs) + || chownPath(ossChown, dirPath, fs)) + { throw WeException(ossChown.str(), ERR_FILE_CHOWN); + } } return NO_ERROR; diff --git a/writeengine/shared/we_typeext.h b/writeengine/shared/we_typeext.h index 9ac649e9e..91c95f0c1 100644 --- a/writeengine/shared/we_typeext.h +++ b/writeengine/shared/we_typeext.h @@ -32,6 +32,8 @@ #include #include #include +#include "IDBFileSystem.h" + /** Namespace WriteEngine */ namespace WriteEngine @@ -67,8 +69,9 @@ public: virtual ~WeUIDGID() {}; virtual void setUIDGID(const uid_t uid, const gid_t gid); void setUIDGID(const WeUIDGID* id); - bool chownFileDir(std::ostringstream& error, - const std::string& fileName, const std::string& dirName) const; + bool chownPath(std::ostringstream& error, + const std::string& fileName, + const idbdatafile::IDBFileSystem& fs) const; ; private: @@ -87,18 +90,18 @@ inline void WeUIDGID::setUIDGID(const WeUIDGID* id) *this = *id; } -inline bool WeUIDGID::chownFileDir(std::ostringstream& error, - const std::string& fileName, const std::string& dirName) const +inline bool WeUIDGID::chownPath(std::ostringstream& error, + const std::string& fileName, + const idbdatafile::IDBFileSystem& fs) const { if (uid != UID_NONE) { - errno = 0; - if (chown(fileName.c_str(), uid, gid) == -1 || - chown(dirName.c_str(), uid, gid) == -1) + int funcErrno = 0; + if (fs.chown(fileName.c_str(), uid, gid, funcErrno) == -1) { error << "Error calling chown() with uid " << uid << " and gid " << gid << " with the file " - << fileName << " with errno " << errno; + << fileName << " with errno " << funcErrno; return true; } } diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index 9cb28f5dc..30716252c 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -505,7 +505,7 @@ void WECmdArgs::usage() cout << "\t\t [-r readers] [-j JobID] [-e maxErrs] [-B libBufSize] [-w parsers]\n"; cout << "\t\t [-s c] [-E enclosedChar] [-C escapeChar] [-n NullOption]\n"; cout << "\t\t [-q batchQty] [-p jobPath] [-P list of PMs] [-S] [-i] [-v verbose]\n"; - cout << "\t\t [-I binaryOpt] [-T timeZone] [-U username]\n"; + cout << "\t\t [-I binaryOpt] [-T timeZone]\n"; cout << "Traditional usage without positional parameters (XML job file required):\n"; @@ -514,7 +514,7 @@ void WECmdArgs::usage() cout << "\t\t [-b readBufs] [-p path] [-c readBufSize] [-e maxErrs] [-B libBufSize]\n"; cout << "\t\t [-n NullOption] [-E encloseChar] [-C escapeChar] [-i] [-v verbose]\n"; cout << "\t\t [-d debugLevel] [-q batchQty] [-l loadFile] [-P list of PMs] [-S]\n"; - cout << "\t\t [-I binaryOpt] [-T timeZone] [-U username]\n"; + cout << "\t\t [-I binaryOpt] [-T timeZone]\n"; cout << "\n\nPositional parameters:\n"; cout << "\tdbName Name of the database to load\n"; @@ -566,8 +566,7 @@ void WECmdArgs::usage() << "\t-K\tS3 Authentication Secret (for S3 imports)\n" << "\t-t\tS3 Bucket (for S3 imports)\n" << "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n" - << "\t-g\tS3 Region (for S3 imports)\n" - << "\t-U\tusername of the data files owner. Default is mysql\n"; + << "\t-g\tS3 Region (for S3 imports)\n"; cout << "\nExample1: Traditional usage\n" << "\tcpimport -j 1234"; @@ -580,7 +579,7 @@ void WECmdArgs::usage() cout << "\nExample4: Import a nation table to only PM1 and PM2 in Mode 1\n" << "\tcpimport -m 1 -P 1,2 tpch nation nation.tbl"; cout << "\nExample5: Import nation.tbl from PMs to nation table in Mode 2\n" - << "\tcpimport -m 2 tpch nation -l nation.tbl"; + << "\tcpimport -m 2 tpch nation -f /var/lib/columnstore/data/bulk/data/import/ -l nation.tbl"; cout << "\nExample6: Import nation.tbl in mode 3\n" << "\tcpimport -m 3 tpch nation nation.tbl\n\n"; @@ -959,6 +958,9 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) checkForBulkLogDir(bulkRootPath); + if (2 == fArgMode && fPmFilePath.empty()) + throw runtime_error("-f option is mandatory with mode 2."); + if (aJobType) { if (0 == fArgMode) throw runtime_error("Incompatible mode and option types");