1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4328 There is a new option in both cpimport and cpimport.bin to asign

an owner for all data files created by cpimport

The patch consists of two parts: cpimport.bin changes, cpimport splitter
changes

cpimport.bin computes uid_t and gid_t early and propagates it down the stack
where MCS creates data files
This commit is contained in:
Roman Nozdrin
2020-10-01 12:19:32 +00:00
parent f584bab846
commit 328ae25650
19 changed files with 200 additions and 43 deletions

View File

@ -45,10 +45,6 @@ int main(int argc, char** argv)
{
const int DEBUG_LVL_TO_DUMP_SYSCAT_RPT = 4;
// set effective ID to root
if( setuid( 0 ) < 0 )
{
std::cerr << " colxml: couldn't set uid " << std::endl;
}
setlocale(LC_ALL, "");
setlocale(LC_NUMERIC, "C");
WriteEngine::Config::initConfigCache(); // load Columnstore.xml config settings

View File

@ -105,7 +105,8 @@ void printUsage()
" [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl <<
" [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
"[-d debugLevel] [-i] " << endl <<
" [-D] [-N] [-L rejectDir] [-T timeZone]" << endl;
" [-D] [-N] [-L rejectDir] [-T timeZone]" << endl <<
" [-U username]" << endl << endl;
cout << endl << "Traditional usage without positional parameters "
"(XML job file required):" << endl <<
@ -115,7 +116,8 @@ void printUsage()
" [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
"[-d debugLevel] [-i] " << endl <<
" [-p path] [-l loadFile]" << endl <<
" [-D] [-N] [-L rejectDir] [-T timeZone]" << endl << endl;
" [-D] [-N] [-L rejectDir] [-T timeZone]" << endl <<
" [-U username]" << endl << endl;
cout << " Positional parameters:" << endl <<
" dbName Name of database to load" << endl <<
@ -171,7 +173,8 @@ void printUsage()
" -K S3 Authentication Secret (for S3 imports)" << endl <<
" -t S3 Bucket (for S3 imports)" << endl <<
" -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl <<
" -g S3 Regions (for S3 imports)" << endl;
" -g S3 Regions (for S3 imports)" << endl <<
" -U username of new data files owner. Default is mysql" << endl;
cout << " Example1:" << endl <<
" cpimport.bin -j 1234" << endl <<
@ -322,7 +325,7 @@ void parseCmdLineArgs(
std::string jobUUID;
while ( (option = getopt(
argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:")) != EOF )
argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:U:")) != EOF )
{
switch (option)
{
@ -743,6 +746,11 @@ void parseCmdLineArgs(
break;
}
case 'U':
{
curJob.setUsername(optarg);
break;
}
default :
{
@ -1083,12 +1091,6 @@ int main(int argc, char** argv)
{
#ifdef _MSC_VER
_setmaxstdio(2048);
#else
// set effective ID to root
if( setuid( 0 ) < 0 )
{
std::cerr << " cpimport: couldn't set uid " << std::endl;
}
#endif
setupSignalHandlers();

View File

@ -38,6 +38,7 @@
#include <boost/filesystem/convenience.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <pwd.h>
#include "we_bulkstatus.h"
#include "we_rbmetawriter.h"
@ -157,7 +158,8 @@ BulkLoad::BulkLoad() :
fbContinue(false),
fDisableTimeOut(false),
fUUID(boost::uuids::nil_generator()()),
fTimeZone("SYSTEM")
fTimeZone("SYSTEM"),
fUsername("mysql") // MCOL-4328 default file owner
{
fTableInfo.clear();
setDebugLevel( DEBUG_0 );
@ -484,6 +486,23 @@ int BulkLoad::preProcess( Job& job, int tableNo,
tableInfo->setTimeZone(fTimeZone);
tableInfo->setJobUUID(fUUID);
// MCOL-4328 Get username gid and uid if they are set
// We inject uid and gid into TableInfo and All ColumnInfo-s later.
struct passwd* pwd = nullptr;
errno = 0;
if (fUsername.length() && (pwd = getpwnam(fUsername.c_str())) == nullptr)
{
std::ostringstream oss;
oss << "Error getting pwd for " << fUsername
<< " with errno "
<< errno;
fLog.logMsg( oss.str(), MSGLVL_ERROR );
return ERR_FILE_CHOWN;
}
if (pwd)
tableInfo->setUIDGID(pwd->pw_uid, pwd->pw_gid);
if (fMaxErrors != -1)
tableInfo->setMaxErrorRows(fMaxErrors);
else
@ -685,6 +704,9 @@ int BulkLoad::preProcess( Job& job, int tableNo,
pDBRootExtentTracker,
tableInfo);
if (pwd)
info->setUIDGID(pwd->pw_uid, pwd->pw_gid);
// For auto increment column, we need to get the starting value
if (info->column.autoIncFlag)
{
@ -921,7 +943,7 @@ int BulkLoad::preProcessHwmLbid(
return rc;
}
//------------------------------------------------------------------------------
// DESCRIPTION:
// NO_ERROR if success

View File

@ -160,6 +160,7 @@ public:
void setS3Bucket ( const std::string& bucket );
void setS3Host ( const std::string& host );
void setS3Region ( const std::string& region );
void setUsername ( const std::string& username );
// Timer functions
void startTimer ( );
void stopTimer ( );
@ -244,6 +245,7 @@ private:
std::string fS3Host; // S3 Host
std::string fS3Bucket; // S3 Bucket
std::string fS3Region; // S3 Region
std::string fUsername; // data files owner name mysql by default
//--------------------------------------------------------------------------
// Private Functions
@ -536,6 +538,11 @@ inline void BulkLoad::setS3Region( const std::string& region )
fS3Region = region;
}
inline void BulkLoad::setUsername( const std::string& username )
{
fUsername = username;
}
inline void BulkLoad::startTimer( )
{
gettimeofday( &fStartTime, 0 );

View File

@ -458,6 +458,9 @@ int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName )
}
boost::scoped_ptr<Dctnry> refDctnry(tempD);
// MCOL-4328 Define a file owner uid and gid
refDctnry->setUIDGID(this);
rc = tempD->createDctnry(
column.dctnry.dctnryOid,
column.dctnryWidth,
@ -1681,6 +1684,7 @@ int ColumnInfo::openDctnryStore( bool bMustExist )
fStore->setLogger(fLog);
fStore->setColWidth( column.dctnryWidth );
fStore->setUIDGID(this);
if (column.fWithDefault)
fStore->setDefault( column.fDefaultChr );

View File

@ -123,8 +123,9 @@ struct LockInfo
/** @brief Maintains information about a DB column.
*/
struct ColumnInfo
class ColumnInfo: public WeUIDGID
{
public:
//--------------------------------------------------------------------------
// Public Data Members
//--------------------------------------------------------------------------
@ -397,6 +398,8 @@ struct ColumnInfo
*/
unsigned rowsPerExtent( );
void setUIDGID(const uid_t uid, const gid_t gid) override;
protected:
//--------------------------------------------------------------------------
@ -507,6 +510,13 @@ protected:
//------------------------------------------------------------------------------
// Inline functions
//------------------------------------------------------------------------------
inline void ColumnInfo::setUIDGID(const uid_t p_uid, const gid_t p_gid)
{
WeUIDGID::setUIDGID(p_uid, p_gid);
if (colOp)
colOp->setUIDGID(this);
}
inline boost::mutex& ColumnInfo::colMutex()
{
return fColMutex;

View File

@ -2428,6 +2428,8 @@ int TableInfo::saveBulkRollbackMetaData( Job& job,
} // end of loop through columns
fRBMetaWriter.setUIDGID(this);
try
{
fRBMetaWriter.saveBulkRollbackMetaData(

View File

@ -55,7 +55,7 @@ namespace WriteEngine
/* @brief Class which maintains the information for a table.
*/
class TableInfo
class TableInfo : public WeUIDGID
{
private:

View File

@ -215,6 +215,16 @@ int Dctnry::createDctnry( const OID& dctnryOID, int colWidth,
// if obsolete file exists, "w+b" will truncate and write over
m_dFile = createDctnryFile(fileName, colWidth, "w+b", DEFAULT_BUFSIZ);
{
// We presume the path will contain /
std::string filePath(fileName);
std::ostringstream ossChown;
if (chownDataFileDir(ossChown, filePath))
{
return ERR_FILE_CHOWN;
}
}
}
else
{

View File

@ -248,7 +248,6 @@ public:
return createDctnryFile(name, width, mode, ioBuffSize);
}
//------------------------------------------------------------------------------
// Protected members
//------------------------------------------------------------------------------

View File

@ -153,6 +153,7 @@ const int ERR_VB_FILE_NOT_EXIST = ERR_FILEBASE + 17;// Version buffer file n
const int ERR_FILE_FLUSH = ERR_FILEBASE + 18;// Error flushing file
const int ERR_FILE_GLOBBING = ERR_FILEBASE + 19;// Error globbing a file name
const int ERR_FILE_EOF = ERR_FILEBASE + 20;// EOF
const int ERR_FILE_CHOWN = ERR_FILEBASE + 21;// EOF
//--------------------------------------------------------------------------
// XML level error

View File

@ -785,10 +785,18 @@ int FileOp::extendFile(
// if obsolete file exists, "w+b" will truncate and write over
pFile = openFile( fileName, "w+b" );//new file
if (pFile == 0)
return ERR_FILE_CREATE;
{
// We presume the path will contain /
std::string filePath(fileName);
std::ostringstream ossChown;
if (chownDataFileDir(ossChown, filePath))
return ERR_FILE_CHOWN;
}
newFile = true;
if ( isDebug(DEBUG_1) && getLogger() )
@ -2923,5 +2931,24 @@ void FileOp::setFixFlag(bool isFix)
{
m_isFix = isFix;
}
bool FileOp::chownDataFileDir(std::ostringstream& error,
const std::string& fileName)
{
std::string dirName = fileName.substr(0, fileName.find_last_of('/'));
if (chownFileDir(error, fileName, dirName))
{
logging::Message::Args args;
logging::Message message(1);
args.add(error.str());
message.format(args);
logging::LoggingID lid(SUBSYSTEM_ID_WE_BULK);
logging::MessageLog ml(lid);
ml.logErrorMessage( message );
return true;
}
return false;
}
} //end of namespace

View File

@ -59,7 +59,7 @@ namespace WriteEngine
{
/** Class FileOp */
class FileOp : public BlockOp
class FileOp : public BlockOp, public WeUIDGID
{
public:
/**
@ -502,6 +502,10 @@ public:
bool bAbbrevExtent,
bool bOptExtension=false );
// Calls a chown and logs an error message
bool chownDataFileDir(std::ostringstream& error,
const std::string& fileName);
protected:
EXPORT virtual int updateColumnExtent(IDBDataFile* pFile, int nBlocks);
EXPORT virtual int updateDctnryExtent(IDBDataFile* pFile, int nBlocks);

View File

@ -452,6 +452,12 @@ std::string RBMetaWriter::openMetaFile ( uint16_t dbRoot )
throw WeException( oss.str(), ERR_FILE_OPEN );
}
{
std::ostringstream ossChown;
if (chownFileDir(ossChown, tmpMetaFileName, bulkRollbackPath))
throw WeException(ossChown.str(), ERR_FILE_CHOWN);
}
fMetaDataStream <<
"# VERSION: 4" << std::endl <<
"# APPLICATION: " << fAppDesc << std::endl <<
@ -1196,6 +1202,7 @@ int RBMetaWriter::writeHWMChunk(
std::ostringstream ossFile;
ossFile << "/" << columnOID << ".p" << partition << ".s" << segment;
std::string fileName;
std::string dirPath;
int rc = getSubDirPath( dbRoot, fileName );
if (rc != NO_ERROR)
@ -1207,6 +1214,8 @@ int RBMetaWriter::writeHWMChunk(
return ERR_METADATABKUP_COMP_OPEN_BULK_BKUP;
}
dirPath = fileName;
fileName += ossFile.str();
std::string fileNameTmp = fileName;
@ -1325,9 +1334,14 @@ int RBMetaWriter::writeHWMChunk(
return ERR_METADATABKUP_COMP_RENAME;
}
{
std::ostringstream ossChown;
if (chownFileDir(ossChown, fileName, dirPath))
throw WeException(ossChown.str(), ERR_FILE_CHOWN);
}
return NO_ERROR;
}
//------------------------------------------------------------------------------
// Returns the directory path to be used for storing any backup data files.
//

View File

@ -136,7 +136,7 @@ typedef std::set< RBChunkInfo, RBChunkInfoCompare > RBChunkSet;
* parallel by several threads for different dictionary columns.
*/
//------------------------------------------------------------------------------
class RBMetaWriter
class RBMetaWriter: public WeUIDGID
{
public:

View File

@ -30,6 +30,8 @@
#define _WE_TYPEEXT_H_
#include <stdint.h>
#include <sys/types.h>
#include <pwd.h>
#include <sstream>
/** Namespace WriteEngine */
namespace WriteEngine
@ -55,6 +57,53 @@ struct Token
}
};
constexpr uid_t UID_NONE = (uid_t) -1;
constexpr gid_t GID_NONE = (gid_t) -1;
class WeUIDGID
{
public:
WeUIDGID(): uid(UID_NONE), gid(GID_NONE) {}
virtual ~WeUIDGID() {};
virtual void setUIDGID(const uid_t uid, const gid_t gid);
void setUIDGID(const WeUIDGID* id);
bool chownFileDir(std::ostringstream& error,
const std::string& fileName, const std::string& dirName) const;
;
private:
uid_t uid;
gid_t gid;
};
inline void WeUIDGID::setUIDGID(const uid_t p_uid, const gid_t p_gid)
{
uid = p_uid; gid = p_gid;
}
inline void WeUIDGID::setUIDGID(const WeUIDGID* id)
{
if (id->uid != UID_NONE)
*this = *id;
}
inline bool WeUIDGID::chownFileDir(std::ostringstream& error,
const std::string& fileName, const std::string& dirName) const
{
if (uid != UID_NONE)
{
errno = 0;
if (chown(fileName.c_str(), uid, gid) == -1 ||
chown(dirName.c_str(), uid, gid) == -1)
{
error << "Error calling chown() with uid " << uid
<< " and gid " << gid << " with the file "
<< fileName << " with errno " << errno;
return true;
}
}
return false;
}
} //end of namespace

View File

@ -145,6 +145,9 @@ std::string WECmdArgs::getCpImportCmdLine()
aSS << " -f " << fPmFilePath;
}
if (fUsername.length() > 0)
aSS << " -U " << fUsername;
if (fJobId.length() > 0)
aSS << " -j " << fJobId;
@ -502,7 +505,7 @@ void WECmdArgs::usage()
cout << "\t\t [-r readers] [-j JobID] [-e maxErrs] [-B libBufSize] [-w parsers]\n";
cout << "\t\t [-s c] [-E enclosedChar] [-C escapeChar] [-n NullOption]\n";
cout << "\t\t [-q batchQty] [-p jobPath] [-P list of PMs] [-S] [-i] [-v verbose]\n";
cout << "\t\t [-I binaryOpt] [-T timeZone]\n";
cout << "\t\t [-I binaryOpt] [-T timeZone] [-U username]\n";
cout << "Traditional usage without positional parameters (XML job file required):\n";
@ -511,7 +514,7 @@ void WECmdArgs::usage()
cout << "\t\t [-b readBufs] [-p path] [-c readBufSize] [-e maxErrs] [-B libBufSize]\n";
cout << "\t\t [-n NullOption] [-E encloseChar] [-C escapeChar] [-i] [-v verbose]\n";
cout << "\t\t [-d debugLevel] [-q batchQty] [-l loadFile] [-P list of PMs] [-S]\n";
cout << "\t\t [-I binaryOpt] [-T timeZone]\n";
cout << "\t\t [-I binaryOpt] [-T timeZone] [-U username]\n";
cout << "\n\nPositional parameters:\n";
cout << "\tdbName Name of the database to load\n";
@ -563,7 +566,8 @@ void WECmdArgs::usage()
<< "\t-K\tS3 Authentication Secret (for S3 imports)\n"
<< "\t-t\tS3 Bucket (for S3 imports)\n"
<< "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n"
<< "\t-g\tS3 Region (for S3 imports)\n";
<< "\t-g\tS3 Region (for S3 imports)\n"
<< "\t-U\tusername of the data files owner. Default is mysql\n";
cout << "\nExample1: Traditional usage\n"
<< "\tcpimport -j 1234";
@ -597,19 +601,14 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
if (argc > 0)
fPrgmName = "cpimport.bin"; //argv[0] is splitter but we need cpimport
//Just for testing cpimport invoking in UM
//if(argc>0)
// fPrgmName = "/home/bpaul/genii/export/bin/cpimport";
while ((aCh = getopt(argc, argv,
"d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:Ny:K:t:H:g:"))
"d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:Ny:K:t:H:g:U:"))
!= EOF)
{
switch (aCh)
{
case 'm':
{
//fMode = atoi(optarg);
fArgMode = atoi(optarg);
//cout << "Mode level set to " << fMode << endl;
@ -937,6 +936,12 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
break;
}
case 'U': //-U username of the files owner
{
fUsername = optarg;
break;
}
default:
{
std::string aErr = "Unknown command line option " + aCh;

View File

@ -180,6 +180,8 @@ public:
{
fbTruncationAsError = bTruncationAsError;
}
void setUsername(const std::string& username);
bool isJobLogOnly() const
{
return fJobLogOnly;
@ -266,7 +268,7 @@ public:
{
return fS3Region;
}
std::string& getUsername();
std::string PrepMode2ListOfFiles(std::string& FileName); // Bug 4342
void getColumnList( std::set<std::string>& columnList ) const;
@ -325,10 +327,20 @@ private: // variables for SplitterApp
bool fbTruncationAsError; // Treat string truncation as error
boost::uuids::uuid fUUID;
bool fConsoleOutput; // If false, no output to console.
std::string fTimeZone; // Timezone to use for TIMESTAMP datatype
std::string fTimeZone; // Timezone to use for TIMESTAMP datatype
std::string fUsername; // Username of the data files owner
};
//----------------------------------------------------------------------
inline void WECmdArgs::setUsername(const std::string& username)
{
fUsername = username;
}
inline std::string& WECmdArgs::getUsername()
{
return fUsername;
}
}

View File

@ -520,13 +520,6 @@ void WESplitterApp::updateWithJobFile(int aIdx)
int main(int argc, char** argv)
{
std::string err;
// Why do we need this if we don't care about f()'s rc ?
// @BUG4343
if( setuid( 0 ) < 0 )
{
std::cerr << " we_splitterapp: couldn't set uid " << std::endl;
}
std::cin.sync_with_stdio(false);
try