You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Fixed a number of bugs in storage manager, and added code to detect
and recover from being killed while writing new objects. Conflicts: storage-manager/src/Synchronizer.cpp
This commit is contained in:
@ -355,6 +355,20 @@ void Cache::configListener()
|
|||||||
logger->log(LOG_CRIT, "Cache/cache_size is not a number. Using current value = %zi",maxCacheSize);
|
logger->log(LOG_CRIT, "Cache/cache_size is not a number. Using current value = %zi",maxCacheSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Cache::repopulate()
|
||||||
|
{
|
||||||
|
boost::unique_lock<boost::mutex> sl(lru_mutex);
|
||||||
|
|
||||||
|
for (auto &pcache : prefixCaches)
|
||||||
|
pcache.second->repopulate();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cache::repopulate(const boost::filesystem::path &p)
|
||||||
|
{
|
||||||
|
getPCache(p).repopulate();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -92,6 +92,11 @@ class Cache : public boost::noncopyable , public ConfigListener
|
|||||||
void shutdown();
|
void shutdown();
|
||||||
void printKPIs() const;
|
void printKPIs() const;
|
||||||
|
|
||||||
|
// Used to update accounting variables in the PrefixCaches when a potential error
|
||||||
|
// is detected.
|
||||||
|
void repopulate();
|
||||||
|
void repopulate(const boost::filesystem::path &prefix);
|
||||||
|
|
||||||
// test helpers
|
// test helpers
|
||||||
const boost::filesystem::path &getCachePath() const;
|
const boost::filesystem::path &getCachePath() const;
|
||||||
const boost::filesystem::path &getJournalPath() const;
|
const boost::filesystem::path &getJournalPath() const;
|
||||||
|
@ -505,6 +505,8 @@ ssize_t IOCoordinator::_write(const boost::filesystem::path &filename, const uin
|
|||||||
dataRemaining -= err;
|
dataRemaining -= err;
|
||||||
count += err;
|
count += err;
|
||||||
iocBytesWritten += err;
|
iocBytesWritten += err;
|
||||||
|
// get a new name for the object
|
||||||
|
newObject.key = metadata.getNewKeyFromOldKey(newObject.key, err + objectOffset);
|
||||||
metadata.updateEntryLength(newObject.offset, (err + objectOffset));
|
metadata.updateEntryLength(newObject.offset, (err + objectOffset));
|
||||||
cache->newObject(firstDir, newObject.key,err + objectOffset);
|
cache->newObject(firstDir, newObject.key,err + objectOffset);
|
||||||
newObjectKeys.push_back(newObject.key);
|
newObjectKeys.push_back(newObject.key);
|
||||||
@ -634,14 +636,17 @@ ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t
|
|||||||
count += err;
|
count += err;
|
||||||
dataRemaining -= err;
|
dataRemaining -= err;
|
||||||
iocBytesWritten += err;
|
iocBytesWritten += err;
|
||||||
|
if (err < (int64_t) writeLength)
|
||||||
|
{
|
||||||
|
newObject.key = metadata.getNewKeyFromOldKey(newObject.key, err + newObject.offset);
|
||||||
|
metadata.updateEntry(newObject.offset, newObject.key, err + newObject.offset);
|
||||||
|
}
|
||||||
cache->newObject(firstDir, newObject.key,err);
|
cache->newObject(firstDir, newObject.key,err);
|
||||||
newObjectKeys.push_back(newObject.key);
|
newObjectKeys.push_back(newObject.key);
|
||||||
|
|
||||||
if (err < (int64_t) writeLength)
|
if (err < (int64_t) writeLength)
|
||||||
{
|
{
|
||||||
//logger->log(LOG_ERR,"IOCoordinator::append(): newObject failed to complete write, %u of %u bytes written.",count,length);
|
//logger->log(LOG_ERR,"IOCoordinator::append(): newObject failed to complete write, %u of %u bytes written.",count,length);
|
||||||
// make the object reflect length actually written
|
|
||||||
metadata.updateEntryLength(newObject.offset, err);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1135,7 +1140,10 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
|
|
||||||
objFD = ::open(object, O_RDONLY);
|
objFD = ::open(object, O_RDONLY);
|
||||||
if (objFD < 0)
|
if (objFD < 0)
|
||||||
|
{
|
||||||
|
*_bytesReadOut = 0;
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
ScopedCloser s1(objFD);
|
ScopedCloser s1(objFD);
|
||||||
|
|
||||||
ret.reset(new uint8_t[len]);
|
ret.reset(new uint8_t[len]);
|
||||||
@ -1148,11 +1156,12 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
int err = ::read(objFD, &ret[count], len - count);
|
int err = ::read(objFD, &ret[count], len - count);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
{
|
{
|
||||||
char buf[80];
|
|
||||||
logger->log(LOG_CRIT, "IOC::mergeJournal(): failed to read %s, got '%s'", object, strerror_r(errno, buf, 80));
|
|
||||||
int l_errno = errno;
|
int l_errno = errno;
|
||||||
|
char buf[80];
|
||||||
|
logger->log(LOG_CRIT, "IOC::mergeJournal(): failed to read %s, got '%s'", object, strerror_r(l_errno, buf, 80));
|
||||||
ret.reset();
|
ret.reset();
|
||||||
errno = l_errno;
|
errno = l_errno;
|
||||||
|
*_bytesReadOut = count;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
else if (err == 0)
|
else if (err == 0)
|
||||||
@ -1171,17 +1180,18 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
size_t mjimBytesRead = 0;
|
size_t mjimBytesRead = 0;
|
||||||
int mjimerr = mergeJournalInMem(ret, len, journal, &mjimBytesRead);
|
int mjimerr = mergeJournalInMem(ret, len, journal, &mjimBytesRead);
|
||||||
if (mjimerr)
|
if (mjimerr)
|
||||||
{
|
|
||||||
ret.reset();
|
ret.reset();
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
l_bytesRead += mjimBytesRead;
|
l_bytesRead += mjimBytesRead;
|
||||||
|
*_bytesReadOut = l_bytesRead;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
journalFD = ::open(journal, O_RDONLY);
|
journalFD = ::open(journal, O_RDONLY);
|
||||||
if (journalFD < 0)
|
if (journalFD < 0)
|
||||||
|
{
|
||||||
|
*_bytesReadOut = l_bytesRead;
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
ScopedCloser s2(journalFD);
|
ScopedCloser s2(journalFD);
|
||||||
|
|
||||||
boost::shared_array<char> headertxt = seekToEndOfHeader1(journalFD, &l_bytesRead);
|
boost::shared_array<char> headertxt = seekToEndOfHeader1(journalFD, &l_bytesRead);
|
||||||
@ -1219,17 +1229,21 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
err = ::read(journalFD, &ret[startReadingAt - offset + count], lengthOfRead - count);
|
err = ::read(journalFD, &ret[startReadingAt - offset + count], lengthOfRead - count);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
{
|
{
|
||||||
|
int l_errno = errno;
|
||||||
char buf[80];
|
char buf[80];
|
||||||
logger->log(LOG_ERR, "mergeJournal: got %s", strerror_r(errno, buf, 80));
|
logger->log(LOG_ERR, "mergeJournal: got %s", strerror_r(l_errno, buf, 80));
|
||||||
ret.reset();
|
ret.reset();
|
||||||
return ret;
|
errno = l_errno;
|
||||||
|
l_bytesRead += count;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
else if (err == 0)
|
else if (err == 0)
|
||||||
{
|
{
|
||||||
logger->log(LOG_ERR, "mergeJournal: got early EOF. offset=%ld, len=%ld, jOffset=%ld, jLen=%ld,"
|
logger->log(LOG_ERR, "mergeJournal: got early EOF. offset=%ld, len=%ld, jOffset=%ld, jLen=%ld,"
|
||||||
" startReadingAt=%ld, lengthOfRead=%ld", offset, len, offlen[0], offlen[1], startReadingAt, lengthOfRead);
|
" startReadingAt=%ld, lengthOfRead=%ld", offset, len, offlen[0], offlen[1], startReadingAt, lengthOfRead);
|
||||||
ret.reset();
|
ret.reset();
|
||||||
return ret;
|
l_bytesRead += count;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
count += err;
|
count += err;
|
||||||
}
|
}
|
||||||
@ -1243,6 +1257,7 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
// skip over this journal entry
|
// skip over this journal entry
|
||||||
::lseek(journalFD, offlen[1], SEEK_CUR);
|
::lseek(journalFD, offlen[1], SEEK_CUR);
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
*_bytesReadOut = l_bytesRead;
|
*_bytesReadOut = l_bytesRead;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -127,12 +127,20 @@ PrefixCache::~PrefixCache()
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrefixCache::populate()
|
void PrefixCache::repopulate()
|
||||||
|
{
|
||||||
|
lru_mutex.lock();
|
||||||
|
populate(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrefixCache::populate(bool useSync)
|
||||||
{
|
{
|
||||||
Synchronizer *sync = Synchronizer::get();
|
Synchronizer *sync = Synchronizer::get();
|
||||||
bf::directory_iterator dir(cachePrefix);
|
bf::directory_iterator dir(cachePrefix);
|
||||||
bf::directory_iterator dend;
|
bf::directory_iterator dend;
|
||||||
vector<string> newObjects;
|
vector<string> newObjects;
|
||||||
|
lru.clear();
|
||||||
|
m_lru.clear();
|
||||||
while (dir != dend)
|
while (dir != dend)
|
||||||
{
|
{
|
||||||
// put everything in lru & m_lru
|
// put everything in lru & m_lru
|
||||||
@ -143,13 +151,15 @@ void PrefixCache::populate()
|
|||||||
auto last = lru.end();
|
auto last = lru.end();
|
||||||
m_lru.insert(--last);
|
m_lru.insert(--last);
|
||||||
currentCacheSize += bf::file_size(*dir);
|
currentCacheSize += bf::file_size(*dir);
|
||||||
newObjects.push_back(p.filename().string());
|
if (useSync)
|
||||||
|
newObjects.push_back(p.filename().string());
|
||||||
}
|
}
|
||||||
else if (p != cachePrefix/downloader->getTmpPath())
|
else if (p != cachePrefix/downloader->getTmpPath())
|
||||||
logger->log(LOG_WARNING, "Cache: found something in the cache that does not belong '%s'", p.string().c_str());
|
logger->log(LOG_WARNING, "Cache: found something in the cache that does not belong '%s'", p.string().c_str());
|
||||||
++dir;
|
++dir;
|
||||||
}
|
}
|
||||||
sync->newObjects(firstDir, newObjects);
|
if (useSync)
|
||||||
|
sync->newObjects(firstDir, newObjects);
|
||||||
newObjects.clear();
|
newObjects.clear();
|
||||||
|
|
||||||
// account for what's in the journal dir
|
// account for what's in the journal dir
|
||||||
@ -164,7 +174,8 @@ void PrefixCache::populate()
|
|||||||
{
|
{
|
||||||
size_t s = bf::file_size(*dir);
|
size_t s = bf::file_size(*dir);
|
||||||
currentCacheSize += s;
|
currentCacheSize += s;
|
||||||
newJournals.push_back(pair<string, size_t>(p.stem().string(), s));
|
if (useSync)
|
||||||
|
newJournals.push_back(pair<string, size_t>(p.stem().string(), s));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
logger->log(LOG_WARNING, "Cache: found a file in the journal dir that does not belong '%s'", p.string().c_str());
|
logger->log(LOG_WARNING, "Cache: found a file in the journal dir that does not belong '%s'", p.string().c_str());
|
||||||
@ -174,7 +185,8 @@ void PrefixCache::populate()
|
|||||||
++dir;
|
++dir;
|
||||||
}
|
}
|
||||||
lru_mutex.unlock();
|
lru_mutex.unlock();
|
||||||
sync->newJournalEntries(firstDir, newJournals);
|
if (useSync)
|
||||||
|
sync->newJournalEntries(firstDir, newJournals);
|
||||||
}
|
}
|
||||||
|
|
||||||
// be careful using this! SM should be idle. No ongoing reads or writes.
|
// be careful using this! SM should be idle. No ongoing reads or writes.
|
||||||
@ -380,14 +392,25 @@ void PrefixCache::newJournalEntry(size_t size)
|
|||||||
void PrefixCache::deletedJournal(size_t size)
|
void PrefixCache::deletedJournal(size_t size)
|
||||||
{
|
{
|
||||||
boost::unique_lock<boost::mutex> s(lru_mutex);
|
boost::unique_lock<boost::mutex> s(lru_mutex);
|
||||||
assert(currentCacheSize >= size);
|
|
||||||
currentCacheSize -= size;
|
//assert(currentCacheSize >= size);
|
||||||
|
if (currentCacheSize >= size)
|
||||||
|
currentCacheSize -= size;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "PrefixCache::deletedJournal(): Detected an accounting error." <<
|
||||||
|
" Reloading cache metadata, this will pause IO activity briefly.";
|
||||||
|
logger->log(LOG_WARNING, oss.str().c_str());
|
||||||
|
populate(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrefixCache::deletedObject(const string &key, size_t size)
|
void PrefixCache::deletedObject(const string &key, size_t size)
|
||||||
{
|
{
|
||||||
boost::unique_lock<boost::mutex> s(lru_mutex);
|
boost::unique_lock<boost::mutex> s(lru_mutex);
|
||||||
assert(currentCacheSize >= size);
|
|
||||||
|
//assert(currentCacheSize >= size);
|
||||||
M_LRU_t::iterator mit = m_lru.find(key);
|
M_LRU_t::iterator mit = m_lru.find(key);
|
||||||
assert(mit != m_lru.end());
|
assert(mit != m_lru.end());
|
||||||
|
|
||||||
@ -397,7 +420,16 @@ void PrefixCache::deletedObject(const string &key, size_t size)
|
|||||||
doNotEvict.erase(mit->lit);
|
doNotEvict.erase(mit->lit);
|
||||||
lru.erase(mit->lit);
|
lru.erase(mit->lit);
|
||||||
m_lru.erase(mit);
|
m_lru.erase(mit);
|
||||||
currentCacheSize -= size;
|
if (currentCacheSize >= size)
|
||||||
|
currentCacheSize -= size;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "PrefixCache::deletedObject(): Detected an accounting error." <<
|
||||||
|
" Reloading cache metadata, this will pause IO activity briefly.";
|
||||||
|
logger->log(LOG_WARNING, oss.str().c_str());
|
||||||
|
populate(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,6 +77,11 @@ class PrefixCache : public boost::noncopyable
|
|||||||
size_t getMaxCacheSize() const;
|
size_t getMaxCacheSize() const;
|
||||||
void shutdown();
|
void shutdown();
|
||||||
|
|
||||||
|
// clears out cache structures and reloads them from cache/journal dir contents
|
||||||
|
// needed to potentially repair the cache's accounting error after detecting
|
||||||
|
// an error.
|
||||||
|
void repopulate();
|
||||||
|
|
||||||
// test helpers
|
// test helpers
|
||||||
const boost::filesystem::path &getCachePath();
|
const boost::filesystem::path &getCachePath();
|
||||||
const boost::filesystem::path &getJournalPath();
|
const boost::filesystem::path &getJournalPath();
|
||||||
@ -97,7 +102,9 @@ class PrefixCache : public boost::noncopyable
|
|||||||
SMLogging *logger;
|
SMLogging *logger;
|
||||||
Downloader *downloader;
|
Downloader *downloader;
|
||||||
|
|
||||||
void populate();
|
// useSync makes populate() tell Synchronizer about what it finds.
|
||||||
|
// set it to false when the system is already fully up.
|
||||||
|
void populate(bool useSync = true);
|
||||||
void _makeSpace(size_t size);
|
void _makeSpace(size_t size);
|
||||||
|
|
||||||
/* The main PrefixCache structures */
|
/* The main PrefixCache structures */
|
||||||
|
@ -128,7 +128,7 @@ int Replicator::newObject(const boost::filesystem::path &filename, const uint8_t
|
|||||||
OPEN(objectFilename.c_str(), O_WRONLY | O_CREAT);
|
OPEN(objectFilename.c_str(), O_WRONLY | O_CREAT);
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
while (count < length) {
|
while (count < length) {
|
||||||
err = ::pwrite(fd, &data[count], length - count, offset);
|
err = ::pwrite(fd, &data[count], length - count, offset + count);
|
||||||
if (err <= 0)
|
if (err <= 0)
|
||||||
{
|
{
|
||||||
if (count > 0) // return what was successfully written
|
if (count > 0) // return what was successfully written
|
||||||
|
@ -683,7 +683,7 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
|
|||||||
|
|
||||||
while (count < size)
|
while (count < size)
|
||||||
{
|
{
|
||||||
err = ::write(newFD, data.get(), size - count);
|
err = ::write(newFD, &data[count], size - count);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
{
|
{
|
||||||
::unlink(newCachePath.string().c_str());
|
::unlink(newCachePath.string().c_str());
|
||||||
@ -693,8 +693,19 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
|
|||||||
count += err;
|
count += err;
|
||||||
}
|
}
|
||||||
numBytesWritten += size;
|
numBytesWritten += size;
|
||||||
assert(bf::file_size(oldCachePath) == MetadataFile::getLengthFromKey(cloudKey));
|
|
||||||
|
//assert(bf::file_size(oldCachePath) == MetadataFile::getLengthFromKey(cloudKey));
|
||||||
cache->rename(prefix, cloudKey, newCloudKey, size - MetadataFile::getLengthFromKey(cloudKey));
|
cache->rename(prefix, cloudKey, newCloudKey, size - MetadataFile::getLengthFromKey(cloudKey));
|
||||||
|
if (bf::file_size(oldCachePath) != MetadataFile::getLengthFromKey(cloudKey))
|
||||||
|
{
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "Synchronizer::synchronizeWithJournal(): detected a mismatch between file size and " <<
|
||||||
|
"length stored in the object name. object name = " << cloudKey << " length-in-name = " <<
|
||||||
|
MetadataFile::getLengthFromKey(cloudKey) << " real-length = " << bf::file_size(oldCachePath)
|
||||||
|
<< ". Reloading cache metadata, this will pause IO activity briefly.";
|
||||||
|
logger->log(LOG_WARNING, oss.str().c_str());
|
||||||
|
cache->repopulate(prefix);
|
||||||
|
}
|
||||||
replicator->remove(oldCachePath);
|
replicator->remove(oldCachePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user