1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-12-15 12:09:09 +03:00

Several fixes / improvements....

- normalized pathnames passed to IOC functions
 - fixed a race that could leave files orphaned in cloud storage
 - fixed a couple small things in Metadatafile
 - added metadata sanity checks to Sync fcns to detect truncation &
   deletion before they do work
This commit is contained in:
Patrick LeBlanc
2019-05-30 10:56:45 -05:00
parent b85030b164
commit 3fe2a4859c
5 changed files with 109 additions and 41 deletions

View File

@@ -36,6 +36,7 @@ class Cache : public boost::noncopyable
// an 'atomic' existence check & delete. Covers the object and journal. Does not delete the files. // an 'atomic' existence check & delete. Covers the object and journal. Does not delete the files.
// returns 0 if it didn't exist, 1 if the object exists, 2 if the journal exists, and 3 (1 | 2) if both exist // returns 0 if it didn't exist, 1 if the object exists, 2 if the journal exists, and 3 (1 | 2) if both exist
// This should be called while holding the file lock for key because it touches the journal file.
int ifExistsThenDelete(const std::string &key); int ifExistsThenDelete(const std::string &key);
// rename is used when an old obj gets merged with its journal file // rename is used when an old obj gets merged with its journal file

View File

@@ -119,7 +119,7 @@ int IOCoordinator::loadObjectAndJournal(const char *objFilename, const char *jou
return 0; return 0;
} }
int IOCoordinator::read(const char *filename, uint8_t *data, off_t offset, size_t length) int IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset, size_t length)
{ {
/* /*
This is a bit complex and verbose, so for the first cut, it will only return a partial This is a bit complex and verbose, so for the first cut, it will only return a partial
@@ -135,6 +135,8 @@ int IOCoordinator::read(const char *filename, uint8_t *data, off_t offset, size_
release read lock release read lock
put together the response in data put together the response in data
*/ */
bf::path p(_filename);
const char *filename = p.string().c_str();
ScopedReadLock fileLock(this, filename); ScopedReadLock fileLock(this, filename);
MetadataFile meta(filename, MetadataFile::no_create_t()); MetadataFile meta(filename, MetadataFile::no_create_t());
@@ -242,8 +244,11 @@ int IOCoordinator::read(const char *filename, uint8_t *data, off_t offset, size_
return count; return count;
} }
int IOCoordinator::write(const char *filename, const uint8_t *data, off_t offset, size_t length) int IOCoordinator::write(const char *_filename, const uint8_t *data, off_t offset, size_t length)
{ {
bf::path p(_filename);
const char *filename = p.string().c_str();
ScopedWriteLock lock(this, filename); ScopedWriteLock lock(this, filename);
return _write(filename, data, offset, length); return _write(filename, data, offset, length);
} }
@@ -372,8 +377,11 @@ int IOCoordinator::_write(const char *filename, const uint8_t *data, off_t offse
return count; return count;
} }
int IOCoordinator::append(const char *filename, const uint8_t *data, size_t length) int IOCoordinator::append(const char *_filename, const uint8_t *data, size_t length)
{ {
bf::path p(_filename);
const char *filename = p.string().c_str();
int err; int err;
size_t count = 0; size_t count = 0;
uint64_t writeLength = 0; uint64_t writeLength = 0;
@@ -474,8 +482,10 @@ int IOCoordinator::append(const char *filename, const uint8_t *data, size_t leng
} }
// TODO: might need to support more open flags, ex: O_EXCL // TODO: might need to support more open flags, ex: O_EXCL
int IOCoordinator::open(const char *filename, int openmode, struct stat *out) int IOCoordinator::open(const char *_filename, int openmode, struct stat *out)
{ {
bf::path p = _filename; // normalize it
const char *filename = p.string().c_str();
ScopedReadLock s(this, filename); ScopedReadLock s(this, filename);
MetadataFile meta(filename, MetadataFile::no_create_t()); MetadataFile meta(filename, MetadataFile::no_create_t());
@@ -513,14 +523,17 @@ int IOCoordinator::listDirectory(const char *dirname, vector<string> *listing)
return 0; return 0;
} }
int IOCoordinator::stat(const char *path, struct stat *out) int IOCoordinator::stat(const char *_path, struct stat *out)
{ {
bf::path p(_path);
const char *path = p.string().c_str();
ScopedReadLock s(this, path); ScopedReadLock s(this, path);
MetadataFile meta(path, MetadataFile::no_create_t()); MetadataFile meta(path, MetadataFile::no_create_t());
return meta.stat(out); return meta.stat(out);
} }
int IOCoordinator::truncate(const char *path, size_t newSize) int IOCoordinator::truncate(const char *_path, size_t newSize)
{ {
/* /*
grab the write lock. grab the write lock.
@@ -533,6 +546,8 @@ int IOCoordinator::truncate(const char *path, size_t newSize)
tell cache they were deleted tell cache they were deleted
tell synchronizer they were deleted tell synchronizer they were deleted
*/ */
bf::path p(_path);
const char *path = p.string().c_str();
Synchronizer *synchronizer = Synchronizer::get(); // needs to init sync here to break circular dependency... Synchronizer *synchronizer = Synchronizer::get(); // needs to init sync here to break circular dependency...
@@ -559,7 +574,7 @@ int IOCoordinator::truncate(const char *path, size_t newSize)
return 0; return 0;
} }
vector<metadataObject> objects = meta.metadataRead(newSize, filesize); vector<metadataObject> objects = meta.metadataRead(newSize, filesize - newSize);
// truncate the file // truncate the file
if (newSize == objects[0].offset) if (newSize == objects[0].offset)
@@ -575,7 +590,7 @@ int IOCoordinator::truncate(const char *path, size_t newSize)
err = replicator->updateMetadata(path, meta); err = replicator->updateMetadata(path, meta);
if (err) if (err)
return err; return err;
lock.unlock(); //lock.unlock(); <-- ifExistsThenDelete() needs the file lock held during the call
uint i = (newSize == objects[0].offset ? 0 : 1); uint i = (newSize == objects[0].offset ? 0 : 1);
vector<string> deletedObjects; vector<string> deletedObjects;
@@ -608,13 +623,14 @@ void IOCoordinator::deleteMetaFile(const bf::path &file)
Synchronizer *synchronizer = Synchronizer::get(); Synchronizer *synchronizer = Synchronizer::get();
// this is kind of ugly. We need to lock on 'file' relative to metaPath, and without the .meta extension // this is kind of ugly. We need to lock on 'file' relative to metaPath, and without the .meta extension
string pita = file.string(); string pita = file.string().substr(metaPath.string().length()); // get rid of metapath
pita = pita.substr(metaPath.string().length()); pita = pita.substr(0, pita.length() - 5); // get rid of the extension
pita = pita.substr(0, pita.find_last_of('.'));
ScopedWriteLock lock(this, pita); ScopedWriteLock lock(this, pita);
//cout << "file is " << file.string() << " locked on " << pita << endl;
MetadataFile meta(file); MetadataFile meta(file);
replicator->remove(file); replicator->remove(file);
//lock.unlock(); <-- ifExistsThenDelete() needs the file lock held during the call
vector<metadataObject> objects = meta.metadataRead(0, meta.getLength()); vector<metadataObject> objects = meta.metadataRead(0, meta.getLength());
vector<string> deletedObjects; vector<string> deletedObjects;
@@ -701,7 +717,7 @@ struct CFException
string entry; string entry;
}; };
int IOCoordinator::copyFile(const char *filename1, const char *filename2) int IOCoordinator::copyFile(const char *_filename1, const char *_filename2)
{ {
/* /*
if filename2 exists, delete it if filename2 exists, delete it
@@ -724,10 +740,15 @@ int IOCoordinator::copyFile(const char *filename1, const char *filename2)
write the new metadata object write the new metadata object
*/ */
const bf::path p1(_filename1);
const bf::path p2(_filename2);
const char *filename1 = p1.string().c_str();
const char *filename2 = p2.string().c_str();
CloudStorage *cs = CloudStorage::get(); CloudStorage *cs = CloudStorage::get();
Synchronizer *sync = Synchronizer::get(); Synchronizer *sync = Synchronizer::get();
bf::path metaFile1 = metaPath/(string(filename1) + ".meta"); bf::path metaFile1 = metaPath/(p1.string() + ".meta");
bf::path metaFile2 = metaPath/(string(filename2) + ".meta"); bf::path metaFile2 = metaPath/(p2.string() + ".meta");
int err; int err;
char errbuf[80]; char errbuf[80];

View File

@@ -233,19 +233,23 @@ vector<metadataObject> MetadataFile::metadataRead(off_t offset, size_t length) c
{ {
if ((uint64_t) offset <= (i->offset + i->length - 1) || if ((uint64_t) offset <= (i->offset + i->length - 1) ||
(i->offset == lastOffset && ((uint64_t) offset <= i->offset + mpConfig->mObjectSize - 1))) (i->offset == lastOffset && ((uint64_t) offset <= i->offset + mpConfig->mObjectSize - 1)))
break; {
foundLen = (i->offset == lastOffset ? mpConfig->mObjectSize : i->length) - (offset - i->offset);
ret.push_back(*i);
++i;
break;
}
++i; ++i;
} }
// append objects until foundLen >= length or EOF
// first time thrus foundLen should be adjusted based on offset
off_t foundOffset = offset - i->offset;
while (i != mObjects.end() && foundLen < length) while (i != mObjects.end() && foundLen < length)
{ {
ret.push_back(*i); ret.push_back(*i);
foundLen += (i->length - foundOffset); foundLen += i->length;
foundOffset = 0; //zero on every other time thru this loop
++i; ++i;
} }
assert(!(offset == 0 && length == getLength()) || (ret.size() == mObjects.size()));
return ret; return ret;
} }
@@ -302,13 +306,23 @@ int MetadataFile::writeMetadata(const char *filename)
return error; return error;
} }
const metadataObject & MetadataFile::getEntry(off_t offset) bool MetadataFile::getEntry(off_t offset, const metadataObject **out) const
{ {
return *(mObjects.find(offset)); const auto &it = mObjects.find(offset);
if (it != mObjects.end())
{
*out = &(*it);
return true;
}
else
return false;
} }
void MetadataFile::removeEntry(off_t offset) void MetadataFile::removeEntry(off_t offset)
{ {
const auto &it = mObjects.find(offset);
assert(it != mObjects.end());
mObjects.erase(offset); mObjects.erase(offset);
} }

View File

@@ -54,7 +54,7 @@ class MetadataFile
void updateEntry(off_t offset, const std::string &newName, size_t newLength); void updateEntry(off_t offset, const std::string &newName, size_t newLength);
void updateEntryLength(off_t offset, size_t newLength); void updateEntryLength(off_t offset, size_t newLength);
metadataObject addMetadataObject(const char *filename, size_t length); metadataObject addMetadataObject(const char *filename, size_t length);
const metadataObject &getEntry(off_t offset); bool getEntry(off_t offset, const metadataObject **out) const;
void removeEntry(off_t offset); void removeEntry(off_t offset);
// TBD: this may have to go; there may be no use case where only the uuid needs to change. // TBD: this may have to go; there may be no use case where only the uuid needs to change.

View File

@@ -123,6 +123,8 @@ void Synchronizer::deletedObjects(const vector<string> &keys)
//makeJob(key); //makeJob(key);
pendingOps[key] = boost::shared_ptr<PendingOps>(new PendingOps(DELETE)); pendingOps[key] = boost::shared_ptr<PendingOps>(new PendingOps(DELETE));
} }
// would be good to signal to the things in opsInProgress that these were deleted. That would
// quiet down the logging somewhat. How to do that efficiently, and w/o gaps or deadlock...
} }
void Synchronizer::flushObject(const string &key) void Synchronizer::flushObject(const string &key)
@@ -272,8 +274,10 @@ void Synchronizer::process(list<string>::iterator name)
boost::shared_ptr<PendingOps> pending = it->second; boost::shared_ptr<PendingOps> pending = it->second;
bool inserted = opsInProgress.insert(*it).second; bool inserted = opsInProgress.insert(*it).second;
if (!inserted) if (!inserted)
{
objNames.erase(name);
return; // the one in pending will have to wait until the next time to avoid clobbering waiting threads return; // the one in pending will have to wait until the next time to avoid clobbering waiting threads
//opsInProgress[key] = it->second; }
string sourceFile = MetadataFile::getSourceFromKey(*name); string sourceFile = MetadataFile::getSourceFromKey(*name);
pendingOps.erase(it); pendingOps.erase(it);
s.unlock(); s.unlock();
@@ -325,7 +329,7 @@ void Synchronizer::process(list<string>::iterator name)
} }
} }
opsInProgress.erase(key); opsInProgress.erase(*name);
objNames.erase(name); objNames.erase(name);
} }
@@ -337,6 +341,22 @@ void Synchronizer::synchronize(const string &sourceFile, list<string>::iterator
char buf[80]; char buf[80];
bool exists = false; bool exists = false;
int err; int err;
MetadataFile md(sourceFile.c_str(), MetadataFile::no_create_t());
if (!md.exists())
{
logger->log(LOG_DEBUG, "synchronize(): no metadata found for %s. It must have been deleted.", sourceFile.c_str());
return;
}
const metadataObject *mdEntry;
bool entryExists = md.getEntry(MetadataFile::getOffsetFromKey(key), &mdEntry);
if (!entryExists)
{
logger->log(LOG_DEBUG, "synchronize(): %s does not exist in metadata for %s. This suggests truncation.", key.c_str(), sourceFile.c_str());
return;
}
assert(key == mdEntry->key);
err = cs->exists(key, &exists); err = cs->exists(key, &exists);
if (err) if (err)
@@ -345,6 +365,7 @@ void Synchronizer::synchronize(const string &sourceFile, list<string>::iterator
if (exists) if (exists)
return; return;
// TODO: should be safe to check with Cache instead of a file existence check
exists = bf::exists(cachePath / key); exists = bf::exists(cachePath / key);
if (!exists) if (!exists)
{ {
@@ -362,10 +383,6 @@ void Synchronizer::synchronizeDelete(const string &sourceFile, list<string>::ite
{ {
ScopedWriteLock s(ioc, sourceFile); ScopedWriteLock s(ioc, sourceFile);
cs->deleteObject(*it); cs->deleteObject(*it);
// delete any pending jobs for *it. There shouldn't be any, this is out of pure paranoia.
boost::unique_lock<boost::mutex> sc(mutex);
pendingOps.erase(*it);
} }
void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>::iterator &lit) void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>::iterator &lit)
@@ -378,10 +395,18 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
if (!md.exists()) if (!md.exists())
{ {
logger->log(LOG_DEBUG, "synchronizeWithJournal(): no metadata found for %s", sourceFile.c_str()); logger->log(LOG_DEBUG, "synchronizeWithJournal(): no metadata found for %s. It must have been deleted.", sourceFile.c_str());
return; return;
} }
const metadataObject &mdEntry = md.getEntry(MetadataFile::getOffsetFromKey(key));
const metadataObject *mdEntry;
bool metaExists = md.getEntry(MetadataFile::getOffsetFromKey(key), &mdEntry);
if (!metaExists)
{
logger->log(LOG_DEBUG, "synchronizeWithJournal(): %s does not exist in metadata for %s. This suggests truncation.", key.c_str(), sourceFile.c_str());
return;
}
assert(key == mdEntry->key);
bf::path oldCachePath = cachePath / key; bf::path oldCachePath = cachePath / key;
string journalName = (journalPath/ (key + ".journal")).string(); string journalName = (journalPath/ (key + ".journal")).string();
@@ -419,7 +444,7 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
int err; int err;
boost::shared_array<uint8_t> data; boost::shared_array<uint8_t> data;
size_t count = 0, size = mdEntry.length; size_t count = 0, size = mdEntry->length;
bool oldObjIsCached = cache->exists(key); bool oldObjIsCached = cache->exists(key);
@@ -440,14 +465,14 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
//TODO!! This sucks. Need a way to pass in a larger array to cloud storage, and also have it not //TODO!! This sucks. Need a way to pass in a larger array to cloud storage, and also have it not
// do any add'l alloc'ing or copying // do any add'l alloc'ing or copying
if (size < mdEntry.length) if (size < mdEntry->length)
{ {
boost::shared_array<uint8_t> tmp(new uint8_t[mdEntry.length]()); boost::shared_array<uint8_t> tmp(new uint8_t[mdEntry->length]());
memcpy(tmp.get(), data.get(), size); memcpy(tmp.get(), data.get(), size);
memset(&tmp[size], 0, mdEntry.length - size); memset(&tmp[size], 0, mdEntry->length - size);
data.swap(tmp); data.swap(tmp);
} }
size = mdEntry.length; // reset length. Using the metadata as a source of truth (truncation), not actual file length. size = mdEntry->length; // reset length. Using the metadata as a source of truth (truncation), not actual file length.
err = ioc->mergeJournalInMem(data, size, journalName.c_str()); err = ioc->mergeJournalInMem(data, size, journalName.c_str());
if (err) if (err)
@@ -536,10 +561,17 @@ void Synchronizer::rename(const string &oldKey, const string &newKey)
boost::unique_lock<boost::mutex> s(mutex); boost::unique_lock<boost::mutex> s(mutex);
auto it = pendingOps.find(oldKey); auto it = pendingOps.find(oldKey);
if (it == pendingOps.end()) if (it != pendingOps.end())
return; {
pendingOps[newKey] = it->second; pendingOps[newKey] = it->second;
pendingOps.erase(it); pendingOps.erase(it);
}
it = opsInProgress.find(oldKey);
if (it != opsInProgress.end())
{
opsInProgress[newKey] = it->second;
opsInProgress.erase(it);
}
for (auto &name: objNames) for (auto &name: objNames)
if (name == oldKey) if (name == oldKey)