From cca5e21c2481847c40be638fbf42bff3ddfebb9b Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 28 Jun 2019 14:22:31 -0500 Subject: [PATCH 1/9] Fixed a cache accounting error from a previous bug fix. --- src/Replicator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Replicator.cpp b/src/Replicator.cpp index a671bba25..3b0a6cef4 100755 --- a/src/Replicator.cpp +++ b/src/Replicator.cpp @@ -3,6 +3,7 @@ #include "IOCoordinator.h" #include "SMLogging.h" #include "Utilities.h" +#include "Cache.h" #include #include #include @@ -134,6 +135,7 @@ int Replicator::addJournalEntry(const char *filename, const uint8_t *data, off_t assert((uint) err == header.length() + 1); if (err <= 0) return err; + Cache::get()->newJournalEntry(header.length() + 1); } else { From 2722da0b40f0114b69c1f497c04ee27a1cd961f7 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Tue, 2 Jul 2019 11:12:29 -0500 Subject: [PATCH 2/9] Checkpointing a bunch of edge-case fixes I made and mistakenly mushed together with the cache size consistency code. --- src/Cache.cpp | 95 +++++++++++++++++++++++++++++-------------- src/Cache.h | 8 +++- src/IOCoordinator.cpp | 38 ++++++++++++----- src/LocalStorage.cpp | 37 ++++++++++++++++- src/LocalStorage.h | 7 +++- src/MetadataFile.cpp | 5 +++ src/MetadataFile.h | 1 + src/SMLogging.cpp | 2 +- src/Synchronizer.cpp | 13 +++--- src/Utilities.cpp | 5 ++- src/Utilities.h | 1 + src/main.cpp | 1 + 12 files changed, 160 insertions(+), 53 deletions(-) diff --git a/src/Cache.cpp b/src/Cache.cpp index f937c81b9..6693a0fb1 100644 --- a/src/Cache.cpp +++ b/src/Cache.cpp @@ -132,7 +132,7 @@ void Cache::populate() lru.push_back(p.filename().string()); auto last = lru.end(); m_lru.insert(--last); - currentCacheSize += bf::file_size(*dir); + currentCacheSize += bf::file_size(p); } else logger->log(LOG_WARNING, "Cache: found something in the cache that does not belong '%s'", p.string().c_str()); @@ -147,7 +147,7 @@ void Cache::populate() if (bf::is_regular_file(p)) { if (p.extension() == ".journal") - currentCacheSize += bf::file_size(*dir); + currentCacheSize += bf::file_size(p); else logger->log(LOG_WARNING, "Cache: found a file in the journal dir that does not belong '%s'", p.string().c_str()); } @@ -157,6 +157,31 @@ void Cache::populate() } } +// be careful using this! SM should be idle. No ongoing reads or writes. +void Cache::validateCacheSize() +{ + boost::unique_lock s(lru_mutex); + + if (!doNotEvict.empty() || !toBeDeleted.empty()) + { + cout << "Not safe to use validateCacheSize() at the moment." << endl; + return; + } + + size_t oldSize = currentCacheSize; + currentCacheSize = 0; + m_lru.clear(); + lru.clear(); + populate(); + + if (oldSize != currentCacheSize) + logger->log(LOG_DEBUG, "Cache::validateCacheSize(): found a discrepancy. Actual size is %lld, had %lld.", + currentCacheSize, oldSize); + else + logger->log(LOG_DEBUG, "Cache::validateCacheSize(): Cache size accounting agrees with reality for now."); +} + + #if 0 /* Need to simplify this, we keep running into sneaky problems, and I just spotted a couple more. Just going to rewrite it. We can revisit later if the simplified version needs improvement */ @@ -250,7 +275,7 @@ void Cache::read(const vector &keys) if (mit != m_lru.end()) { addToDNE(mit->lit); - lru.splice(lru.end(), lru, mit->lit); + lru.splice(lru.end(), lru, mit->lit); // move them to the back so they are last to pick for eviction // if it's about to be deleted, stop that TBD_t::iterator tbd_it = toBeDeleted.find(mit->lit); if (tbd_it != toBeDeleted.end()) @@ -260,14 +285,15 @@ void Cache::read(const vector &keys) } } else + { + addToDNE(key); keysToFetch.push_back(&key); + } } if (keysToFetch.empty()) return; - assert(s.owns_lock()); downloader.download(keysToFetch, &dlErrnos, &dlSizes); - assert(s.owns_lock()); size_t sum_sizes = 0; for (uint i = 0; i < keysToFetch.size(); ++i) @@ -282,7 +308,6 @@ void Cache::read(const vector &keys) lru.push_back(*keysToFetch[i]); LRU_t::iterator lit = lru.end(); m_lru.insert(--lit); // I dislike this way of grabbing the last iterator in a list. - addToDNE(lit); } } @@ -290,7 +315,7 @@ void Cache::read(const vector &keys) for (const string &key : keys) { mit = m_lru.find(key); - if (mit != m_lru.end()) // it could have been deleted by deletedObject() or ifExistsThenDelete() + if (mit != m_lru.end()) // all of the files exist, just not all of them are 'owned by' this thread. lru.splice(lru.end(), lru, mit->lit); } @@ -319,24 +344,26 @@ void Cache::doneWriting() Cache::DNEElement::DNEElement(const LRU_t::iterator &k) : key(k), refCount(1) { } - -void Cache::addToDNE(const LRU_t::iterator &key) + +Cache::DNEElement::DNEElement(const string &k) : sKey(k), refCount(1) { - DNEElement e(key); - DNE_t::iterator it = doNotEvict.find(e); +} + +void Cache::addToDNE(const DNEElement &key) +{ + DNE_t::iterator it = doNotEvict.find(key); if (it != doNotEvict.end()) { DNEElement &dnee = const_cast(*it); ++(dnee.refCount); } else - doNotEvict.insert(e); + doNotEvict.insert(key); } -void Cache::removeFromDNE(const LRU_t::iterator &key) +void Cache::removeFromDNE(const DNEElement &key) { - DNEElement e(key); - DNE_t::iterator it = doNotEvict.find(e); + DNE_t::iterator it = doNotEvict.find(key); if (it == doNotEvict.end()) return; DNEElement &dnee = const_cast(*it); @@ -400,7 +427,7 @@ void Cache::deletedObject(const string &key, size_t size) M_LRU_t::iterator mit = m_lru.find(key); assert(mit != m_lru.end()); // TODO: 5/16/19 - got this assertion using S3 by running test000, then test000 again. - // if it's being flushed, let it do the deleting + // if it's being flushed, let makeSpace() do the deleting if (toBeDeleted.find(mit->lit) == toBeDeleted.end()) { doNotEvict.erase(mit->lit); @@ -456,9 +483,9 @@ void Cache::_makeSpace(size_t size) return; } - - bf::path cachedFile = prefix / *it; - assert(bf::exists(cachedFile)); + if (!bf::exists(prefix / *it)) + cout << prefix / *it << " doesn't exist, WTF?" << endl; + assert(bf::exists(prefix / *it)); /* tell Synchronizer that this key will be evicted delete the file @@ -468,11 +495,12 @@ void Cache::_makeSpace(size_t size) //logger->log(LOG_WARNING, "Cache: flushing!"); toBeDeleted.insert(it); - + string key = *it; // need to make a copy; it could be in the process of being renamed + lru_mutex.unlock(); try { - Synchronizer::get()->flushObject(*it); + Synchronizer::get()->flushObject(key); } catch (...) { @@ -489,9 +517,9 @@ void Cache::_makeSpace(size_t size) // if it's still in toBeDeleted then it is safe to delete. // if read() happened to access it while it was flushing, it will not // be in that set. - cachedFile = prefix / *it; - toBeDeleted.erase(tbd_it); + bf::path cachedFile = prefix / *it; m_lru.erase(*it); + toBeDeleted.erase(tbd_it); lru.erase(it); size_t newSize = bf::file_size(cachedFile); replicator->remove(cachedFile, Replicator::LOCAL_ONLY); @@ -557,10 +585,15 @@ int Cache::ifExistsThenDelete(const string &key) boost::unique_lock s(lru_mutex); bool objectExists = false; - auto it = m_lru.find(key); if (it != m_lru.end()) { + if (doNotEvict.find(it->lit) != doNotEvict.end()) + { + cout << "almost deleted a file being read, are we really doing that somewhere?" << endl; + return 0; + } + if (toBeDeleted.find(it->lit) == toBeDeleted.end()) { doNotEvict.erase(it->lit); @@ -572,7 +605,7 @@ int Cache::ifExistsThenDelete(const string &key) return 0; } bool journalExists = bf::exists(journalPath); - assert(objectExists == bf::exists(cachedPath)); + //assert(objectExists == bf::exists(cachedPath)); size_t objectSize = (objectExists ? bf::file_size(cachedPath) : 0); //size_t objectSize = (objectExists ? MetadataFile::getLengthFromKey(key) : 0); @@ -637,12 +670,16 @@ inline bool Cache::KeyEquals::operator()(const M_LRU_element_t &l1, const M_LRU_ inline size_t Cache::DNEHasher::operator()(const DNEElement &l) const { - return hash()(*(l.key)); + return (l.sKey.empty() ? hash()(*(l.key)) : hash()(l.sKey)); } inline bool Cache::DNEEquals::operator()(const DNEElement &l1, const DNEElement &l2) const { - return (*(l1.key) == *(l2.key)); + const string *s1, *s2; + s1 = l1.sKey.empty() ? &(*(l1.key)) : &(l1.sKey); + s2 = l2.sKey.empty() ? &(*(l2.key)) : &(l2.sKey); + + return (*s1 == *s2); } inline bool Cache::TBDLess::operator()(const LRU_t::iterator &i1, const LRU_t::iterator &i2) const @@ -651,7 +688,3 @@ inline bool Cache::TBDLess::operator()(const LRU_t::iterator &i1, const LRU_t::i } } - - - - diff --git a/src/Cache.h b/src/Cache.h index 321cac486..21c98670f 100644 --- a/src/Cache.h +++ b/src/Cache.h @@ -59,6 +59,8 @@ class Cache : public boost::noncopyable const boost::filesystem::path &getJournalPath(); // this will delete everything in the cache and journal paths, and empty all Cache structures. void reset(); + + void validateCacheSize(); private: Cache(); @@ -104,7 +106,9 @@ class Cache : public boost::noncopyable struct DNEElement { DNEElement(const LRU_t::iterator &); + DNEElement(const std::string &); LRU_t::iterator key; + std::string sKey; uint refCount; }; @@ -120,8 +124,8 @@ class Cache : public boost::noncopyable typedef std::unordered_set DNE_t; DNE_t doNotEvict; - void addToDNE(const LRU_t::iterator &key); - void removeFromDNE(const LRU_t::iterator &key); + void addToDNE(const DNEElement &); + void removeFromDNE(const DNEElement &); // the to-be-deleted set. Elements removed from the LRU but not yet deleted will be here. // Elements are inserted and removed by makeSpace(). If read() references a file that is in this, diff --git a/src/IOCoordinator.cpp b/src/IOCoordinator.cpp index 0e2839049..752947edd 100755 --- a/src/IOCoordinator.cpp +++ b/src/IOCoordinator.cpp @@ -150,7 +150,8 @@ int IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset, size vector relevants = meta.metadataRead(offset, length); map journalFDs, objectFDs; map keyToJournalName, keyToObjectName; - vector fdMinders; + ScopedCloser fdMinders[relevants.size() * 2]; + int mindersIndex = 0; char buf[80]; // load them into the cache @@ -174,7 +175,8 @@ int IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset, size { keyToJournalName[key] = filename; journalFDs[key] = fd; - fdMinders.push_back(SharedCloser(fd)); + fdMinders[mindersIndex++].fd = fd; + //fdMinders.push_back(SharedCloser(fd)); } else if (errno != ENOENT) { @@ -202,7 +204,8 @@ int IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset, size } keyToObjectName[key] = filename; objectFDs[key] = fd; - fdMinders.push_back(SharedCloser(fd)); + fdMinders[mindersIndex++].fd = fd; + //fdMinders.push_back(SharedCloser(fd)); } //fileLock.unlock(); @@ -317,7 +320,7 @@ int IOCoordinator::_write(const char *filename, const uint8_t *data, off_t offse // There's another block below that looks similar. Also similar blocks in append(). if ((count + objectOffset) > i->length) metadata.updateEntryLength(i->offset, (count + objectOffset)); - metadata.writeMetadata(filename); + replicator->updateMetadata(filename, metadata); logger->log(LOG_ERR,"IOCoordinator::write(): object failed to complete write, %u of %u bytes written.",count,length); return count; } @@ -368,7 +371,7 @@ int IOCoordinator::_write(const char *filename, const uint8_t *data, off_t offse // update metadataObject length to reflect what awas actually written if ((count + objectOffset) > newObject.length) metadata.updateEntryLength(newObject.offset, (count + objectOffset)); - metadata.writeMetadata(filename); + replicator->updateMetadata(filename, metadata); logger->log(LOG_ERR,"IOCoordinator::write(): newObject failed to complete write, %u of %u bytes written.",count,length); return count; //log error and abort @@ -382,7 +385,7 @@ int IOCoordinator::_write(const char *filename, const uint8_t *data, off_t offse } synchronizer->newObjects(newObjectKeys); - metadata.writeMetadata(filename); + replicator->updateMetadata(filename, metadata); return count; } @@ -432,7 +435,7 @@ int IOCoordinator::append(const char *_filename, const uint8_t *data, size_t len if (err <= 0) { metadata.updateEntryLength(i->offset, (count + i->length)); - metadata.writeMetadata(filename); + replicator->updateMetadata(filename, metadata); logger->log(LOG_ERR,"IOCoordinator::append(): journal failed to complete write, %u of %u bytes written.",count,length); goto out; } @@ -469,7 +472,7 @@ int IOCoordinator::append(const char *_filename, const uint8_t *data, size_t len { // update metadataObject length to reflect what awas actually written metadata.updateEntryLength(newObject.offset, (count)); - metadata.writeMetadata(filename); + replicator->updateMetadata(filename, metadata); logger->log(LOG_ERR,"IOCoordinator::append(): newObject failed to complete write, %u of %u bytes written.",count,length); goto out; //log error and abort @@ -481,7 +484,7 @@ int IOCoordinator::append(const char *_filename, const uint8_t *data, size_t len dataRemaining -= writeLength; } synchronizer->newObjects(newObjectKeys); - metadata.writeMetadata(filename); + replicator->updateMetadata(filename, metadata); // had to add this hack to prevent deadlock out: @@ -797,10 +800,15 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) vector newJournalEntries; ScopedReadLock lock(this, filename1); + ScopedWriteLock lock2(this, filename2); MetadataFile meta1(metaFile1); MetadataFile meta2(metaFile2); vector objects = meta1.metadataRead(0, meta1.getLength()); + if (meta2.exists()) + cout << "copyFile: overwriting a file" << endl; + meta2.removeAllEntries(); + // TODO. I dislike large try-catch blocks, and large loops. Maybe a little refactoring is in order. try { @@ -871,8 +879,9 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) return -1; } lock.unlock(); - replicator->updateMetadata(filename2, meta2); + lock2.unlock(); + for (auto &jEntry : newJournalEntries) sync->newJournalEntry(jEntry); return 0; @@ -1086,6 +1095,15 @@ int IOCoordinator::mergeJournalInMem(boost::shared_array &objData, size uint64_t startReadingAt = offlen[0]; uint64_t lengthOfRead = offlen[1]; + // XXXPAT: Speculative change. Got mem errors from writing past the end of objData. The length + // in the metadata is shorter than this journal entry, and not because it got crazy values. + // I think the explanation is a truncation. Remove the log here if we see good results. + if (startReadingAt + lengthOfRead > len) + { + logger->log(LOG_CRIT, "mergeJournalInMem: possibly bad journal entry in %s. jStart = %lld, jEnd = %lld, max = %lld", + journalPath, startReadingAt, startReadingAt + lengthOfRead, len); + lengthOfRead = len - startReadingAt; + } uint count = 0; while (count < lengthOfRead) { diff --git a/src/LocalStorage.cpp b/src/LocalStorage.cpp index db0b92cb0..3bab4f988 100644 --- a/src/LocalStorage.cpp +++ b/src/LocalStorage.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "LocalStorage.h" #include "Config.h" @@ -30,6 +31,22 @@ LocalStorage::LocalStorage() throw e; } } + string stmp = Config::get()->getValue("LocalStorage", "fake_latency"); + if (!stmp.empty() && (stmp[0] == 'Y' || stmp[0] == 'y')) + { + fakeLatency = true; + stmp = Config::get()->getValue("LocalStorage", "max_latency"); + usecLatencyCap = strtoull(stmp.c_str(), NULL, 10); + if (usecLatencyCap == 0) + { + logger->log(LOG_CRIT, "LocalStorage: bad value for max_latency"); + throw runtime_error("LocalStorage: bad value for max_latency"); + } + r_seed = (uint) ::time(NULL); + logger->log(LOG_DEBUG, "LocalStorage: Will simulate cloud latency of max %llu us", usecLatencyCap); + } + else + fakeLatency = false; } LocalStorage::~LocalStorage() @@ -41,8 +58,19 @@ const bf::path & LocalStorage::getPrefix() const return prefix; } +void LocalStorage::addLatency() +{ + if (fakeLatency) + { + uint64_t usec_delay = ((double) rand_r(&r_seed) / (double) RAND_MAX) * usecLatencyCap; + ::usleep(usec_delay); + } +} + int LocalStorage::copy(const bf::path &source, const bf::path &dest) { + addLatency(); + boost::system::error_code err; bf::copy_file(source, dest, bf::copy_option::fail_if_exists, err); if (err) @@ -72,6 +100,8 @@ int LocalStorage::getObject(const string &source, const string &dest, size_t *si int LocalStorage::getObject(const std::string &sourceKey, boost::shared_array *data, size_t *size) { + addLatency(); + bf::path source = prefix / sourceKey; const char *c_source = source.string().c_str(); //char buf[80]; @@ -115,6 +145,8 @@ int LocalStorage::putObject(const string &source, const string &dest) int LocalStorage::putObject(boost::shared_array data, size_t len, const string &dest) { + addLatency(); + bf::path destPath = prefix / dest; const char *c_dest = destPath.string().c_str(); //char buf[80]; @@ -155,14 +187,17 @@ int LocalStorage::copyObject(const string &source, const string &dest) int LocalStorage::deleteObject(const string &key) { - boost::system::error_code err; + addLatency(); + boost::system::error_code err; bf::remove(prefix / key, err); return 0; } int LocalStorage::exists(const std::string &key, bool *out) { + addLatency(); + *out = bf::exists(prefix / key); return 0; } diff --git a/src/LocalStorage.h b/src/LocalStorage.h index f72e5284a..ec571a639 100644 --- a/src/LocalStorage.h +++ b/src/LocalStorage.h @@ -27,8 +27,13 @@ class LocalStorage : public CloudStorage private: boost::filesystem::path prefix; - int copy(const boost::filesystem::path &sourceKey, const boost::filesystem::path &destKey); + + // stuff for faking the latency on cloud ops + bool fakeLatency; + uint64_t usecLatencyCap; + uint r_seed; + void addLatency(); }; } diff --git a/src/MetadataFile.cpp b/src/MetadataFile.cpp index 705a3abd0..502a94e0e 100755 --- a/src/MetadataFile.cpp +++ b/src/MetadataFile.cpp @@ -326,6 +326,11 @@ void MetadataFile::removeEntry(off_t offset) mObjects.erase(it); } +void MetadataFile::removeAllEntries() +{ + mObjects.clear(); +} + // There are more efficient ways to do it. Optimize if necessary. void MetadataFile::breakout(const string &key, vector &ret) { diff --git a/src/MetadataFile.h b/src/MetadataFile.h index 9c69d3626..75dd7b41d 100755 --- a/src/MetadataFile.h +++ b/src/MetadataFile.h @@ -56,6 +56,7 @@ class MetadataFile metadataObject addMetadataObject(const char *filename, size_t length); bool getEntry(off_t offset, const metadataObject **out) const; void removeEntry(off_t offset); + void removeAllEntries(); // TBD: this may have to go; there may be no use case where only the uuid needs to change. static std::string getNewKeyFromOldKey(const std::string &oldKey, size_t length=0); diff --git a/src/SMLogging.cpp b/src/SMLogging.cpp index c3cd0be69..5d2922ac3 100644 --- a/src/SMLogging.cpp +++ b/src/SMLogging.cpp @@ -47,7 +47,7 @@ void SMLogging::log(int priority,const char *format, ...) va_list args2; va_copy(args2, args); vfprintf(stderr, format, args2); - printf("\n"); + fprintf(stderr, "\n"); #endif vsyslog(priority, format, args); diff --git a/src/Synchronizer.cpp b/src/Synchronizer.cpp index abd6b1453..72472d30d 100755 --- a/src/Synchronizer.cpp +++ b/src/Synchronizer.cpp @@ -359,12 +359,13 @@ void Synchronizer::synchronize(const string &sourceFile, list::iterator const metadataObject *mdEntry; bool entryExists = md.getEntry(MetadataFile::getOffsetFromKey(key), &mdEntry); - if (!entryExists) + if (!entryExists || key != mdEntry->key) { logger->log(LOG_DEBUG, "synchronize(): %s does not exist in metadata for %s. This suggests truncation.", key.c_str(), sourceFile.c_str()); return; } - assert(key == mdEntry->key); + + //assert(key == mdEntry->key); <-- This could fail b/c of truncation + a write/append before this job runs. err = cs->exists(key, &exists); if (err) @@ -374,7 +375,7 @@ void Synchronizer::synchronize(const string &sourceFile, list::iterator return; // TODO: should be safe to check with Cache instead of a file existence check - exists = bf::exists(cachePath / key); + exists = cache->exists(key); if (!exists) { logger->log(LOG_DEBUG, "synchronize(): was told to upload %s but it does not exist locally", key.c_str()); @@ -409,12 +410,12 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list const metadataObject *mdEntry; bool metaExists = md.getEntry(MetadataFile::getOffsetFromKey(key), &mdEntry); - if (!metaExists) + if (!metaExists || key != mdEntry->key) { logger->log(LOG_DEBUG, "synchronizeWithJournal(): %s does not exist in metadata for %s. This suggests truncation.", key.c_str(), sourceFile.c_str()); return; } - assert(key == mdEntry->key); + //assert(key == mdEntry->key); <--- I suspect this can happen in a truncate + write situation + a deep sync queue bf::path oldCachePath = cachePath / key; string journalName = (journalPath/ (key + ".journal")).string(); @@ -431,7 +432,7 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list throw runtime_error(string("Synchronizer: cs->exists() failed: ") + strerror_r(errno, buf, 80)); if (!existsOnCloud) { - if (bf::exists(oldCachePath)) + if (cache->exists(key)) { logger->log(LOG_DEBUG, "synchronizeWithJournal(): %s has no journal and does not exist in the cloud, calling " "synchronize() instead. Need to explain how this happens.", key.c_str()); diff --git a/src/Utilities.cpp b/src/Utilities.cpp index 304cb433c..88af9dcd9 100644 --- a/src/Utilities.cpp +++ b/src/Utilities.cpp @@ -54,8 +54,11 @@ void ScopedWriteLock::unlock() } } +ScopedCloser::ScopedCloser() : fd(-1) { } ScopedCloser::ScopedCloser(int f) : fd(f) { assert(f != -1); } -ScopedCloser::~ScopedCloser() { +ScopedCloser::~ScopedCloser() { + if (fd < 0) + return; int s_errno = errno; ::close(fd); errno = s_errno; diff --git a/src/Utilities.h b/src/Utilities.h index f71fc925b..1df8766a4 100644 --- a/src/Utilities.h +++ b/src/Utilities.h @@ -37,6 +37,7 @@ struct ScopedWriteLock struct ScopedCloser { + ScopedCloser(); ScopedCloser(int f); ~ScopedCloser(); int fd; diff --git a/src/main.cpp b/src/main.cpp index 291816c3b..957584de0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -22,6 +22,7 @@ using namespace storagemanager; void printCacheUsage(int sig) { + Cache::get()->validateCacheSize(); cout << "Current cache size = " << Cache::get()->getCurrentCacheSize() << endl; cout << "Cache element count = " << Cache::get()->getCurrentCacheElementCount() << endl; } From 56a51605afe3fa87712ea6c656f3e8127afbf144 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Wed, 3 Jul 2019 13:31:49 -0500 Subject: [PATCH 3/9] Fixed more races and edge cases I found. --- src/Cache.cpp | 51 ++++++++++++++++++++++++++++--------------- src/Downloader.cpp | 14 ++++++++++++ src/Downloader.h | 2 ++ src/IOCoordinator.cpp | 34 ++++++++++++++++++++++------- storagemanager.cnf | 8 +++++++ 5 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Cache.cpp b/src/Cache.cpp index 6693a0fb1..280e89e25 100644 --- a/src/Cache.cpp +++ b/src/Cache.cpp @@ -132,7 +132,7 @@ void Cache::populate() lru.push_back(p.filename().string()); auto last = lru.end(); m_lru.insert(--last); - currentCacheSize += bf::file_size(p); + currentCacheSize += bf::file_size(*dir); } else logger->log(LOG_WARNING, "Cache: found something in the cache that does not belong '%s'", p.string().c_str()); @@ -147,7 +147,7 @@ void Cache::populate() if (bf::is_regular_file(p)) { if (p.extension() == ".journal") - currentCacheSize += bf::file_size(p); + currentCacheSize += bf::file_size(*dir); else logger->log(LOG_WARNING, "Cache: found a file in the journal dir that does not belong '%s'", p.string().c_str()); } @@ -286,8 +286,17 @@ void Cache::read(const vector &keys) } else { + // There's window where the file has been downloaded but is not yet + // added to the lru structs. However it is in the DNE. If it is in the DNE, then it is also + // in Downloader's map. So, this thread needs to start the download if it's not in the + // DNE or if there's an existing download that hasn't finished yet. Starting the download + // includes waiting for an existing download to finish, which from this class's pov is the + // same thing. + if (doNotEvict.find(key) == doNotEvict.end() || downloader.inProgress(key)) + keysToFetch.push_back(&key); + else + cout << "Cache: detected and stopped a racey download" << endl; addToDNE(key); - keysToFetch.push_back(&key); } } if (keysToFetch.empty()) @@ -301,13 +310,22 @@ void Cache::read(const vector &keys) // downloads with size 0 didn't actually happen, either because it // was a preexisting download (another read() call owns it), or because // there was an error downloading it. Use size == 0 as an indication of - // what to add to the cache + // what to add to the cache. Also needs to verify that the file was not deleted, + // indicated by existence in doNotEvict. if (dlSizes[i] != 0) { - sum_sizes += dlSizes[i]; - lru.push_back(*keysToFetch[i]); - LRU_t::iterator lit = lru.end(); - m_lru.insert(--lit); // I dislike this way of grabbing the last iterator in a list. + if (doNotEvict.find(*keysToFetch[i]) != doNotEvict.end()) + { + sum_sizes += dlSizes[i]; + lru.push_back(*keysToFetch[i]); + LRU_t::iterator lit = lru.end(); + m_lru.insert(--lit); // I dislike this way of grabbing the last iterator in a list. + } + else // it was downloaded, but a deletion happened so we have to toss it + { + cout << "removing a file that was deleted by another thread during download" << endl; + bf::remove(prefix / (*keysToFetch[i])); + } } } @@ -329,9 +347,14 @@ void Cache::doneReading(const vector &keys) boost::unique_lock s(lru_mutex); for (const string &key : keys) { - const auto &it = m_lru.find(key); - if (it != m_lru.end()) - removeFromDNE(it->lit); + removeFromDNE(key); + // most should be in the map. + // debateable whether it's faster to look up the list iterator and use it + // or whether it's faster to bypass that and use strings only. + + //const auto &it = m_lru.find(key); + //if (it != m_lru.end()) + // removeFromDNE(it->lit); } _makeSpace(0); } @@ -588,12 +611,6 @@ int Cache::ifExistsThenDelete(const string &key) auto it = m_lru.find(key); if (it != m_lru.end()) { - if (doNotEvict.find(it->lit) != doNotEvict.end()) - { - cout << "almost deleted a file being read, are we really doing that somewhere?" << endl; - return 0; - } - if (toBeDeleted.find(it->lit) == toBeDeleted.end()) { doNotEvict.erase(it->lit); diff --git a/src/Downloader.cpp b/src/Downloader.cpp index ae988f0bb..6192c9fe5 100644 --- a/src/Downloader.cpp +++ b/src/Downloader.cpp @@ -186,6 +186,15 @@ void Downloader::download(const vector &keys, vector *errno } } +bool Downloader::inProgress(const string &key) +{ + boost::shared_ptr tmp(new Download(key)); + auto it = downloads.find(tmp); + if (it != downloads.end()) + return !(*it)->finished; + return false; +} + void Downloader::setDownloadPath(const string &path) { downloadPath = path; @@ -197,6 +206,11 @@ Downloader::Download::Download(const string &source, const string &_dlPath, boos { } +Downloader::Download::Download(const string &source) : + key(source), dl_errno(0), size(0), lock(NULL), finished(false), itRan(false) +{ +} + Downloader::Download::~Download() { assert(!itRan || finished); diff --git a/src/Downloader.h b/src/Downloader.h index 07d82a739..518658de4 100644 --- a/src/Downloader.h +++ b/src/Downloader.h @@ -28,6 +28,7 @@ class Downloader void download(const std::vector &keys, std::vector *errnos, std::vector *sizes); void setDownloadPath(const std::string &path); void useThisLock(boost::mutex *); + bool inProgress(const std::string &); private: uint maxDownloads; @@ -51,6 +52,7 @@ class Downloader struct Download : public ThreadPool::Job { Download(const std::string &source, const std::string &_dlPath, boost::mutex *_lock); + Download(const std::string &source); ~Download(); void operator()(); boost::filesystem::path dlPath; diff --git a/src/IOCoordinator.cpp b/src/IOCoordinator.cpp index 752947edd..78b290d83 100755 --- a/src/IOCoordinator.cpp +++ b/src/IOCoordinator.cpp @@ -488,7 +488,7 @@ int IOCoordinator::append(const char *_filename, const uint8_t *data, size_t len // had to add this hack to prevent deadlock out: - // need to release the file lock before calling the cache fcns. + // need to release the file lock before telling Cache that we're done writing. lock.unlock(); cache->doneWriting(); @@ -802,12 +802,13 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) ScopedReadLock lock(this, filename1); ScopedWriteLock lock2(this, filename2); MetadataFile meta1(metaFile1); - MetadataFile meta2(metaFile2); + MetadataFile meta2(metaFile2.string().c_str(), MetadataFile::no_create_t()); vector objects = meta1.metadataRead(0, meta1.getLength()); - if (meta2.exists()) + if (meta2.exists()) { cout << "copyFile: overwriting a file" << endl; - meta2.removeAllEntries(); + meta2.removeAllEntries(); + } // TODO. I dislike large try-catch blocks, and large loops. Maybe a little refactoring is in order. try @@ -1089,19 +1090,34 @@ int IOCoordinator::mergeJournalInMem(boost::shared_array &objData, size { uint64_t offlen[2]; int err = ::read(journalFD, &offlen, 16); - if (err != 16) // got EOF + if (err == 0) // got EOF break; + else if (err < 16) + { + // punting on this + cout << "mergeJournalInMem: failed to read a journal entry header in one attempt. fixme..." << endl; + errno = ENODATA; + return -1; + } uint64_t startReadingAt = offlen[0]; uint64_t lengthOfRead = offlen[1]; // XXXPAT: Speculative change. Got mem errors from writing past the end of objData. The length // in the metadata is shorter than this journal entry, and not because it got crazy values. - // I think the explanation is a truncation. Remove the log here if we see good results. + // I think the explanation is a truncation. + if (startReadingAt > len) + { + //logger->log(LOG_CRIT, "mergeJournalInMem: skipping a theoretically irrelevant journal entry in %s. " + // "jstart = %llu, max = %llu", journalPath, startReadingAt, len); + ::lseek(journalFD, offlen[1], SEEK_CUR); + continue; + } + if (startReadingAt + lengthOfRead > len) { - logger->log(LOG_CRIT, "mergeJournalInMem: possibly bad journal entry in %s. jStart = %lld, jEnd = %lld, max = %lld", - journalPath, startReadingAt, startReadingAt + lengthOfRead, len); + //logger->log(LOG_CRIT, "mergeJournalInMem: possibly bad journal entry in %s. jStart = %llu, jEnd = %llu, max = %llu", + // journalPath, startReadingAt, startReadingAt + lengthOfRead, len); lengthOfRead = len - startReadingAt; } uint count = 0; @@ -1124,6 +1140,8 @@ int IOCoordinator::mergeJournalInMem(boost::shared_array &objData, size } count += err; } + if (lengthOfRead < offlen[1]) + ::lseek(journalFD, offlen[1] - lengthOfRead, SEEK_CUR); } return 0; } diff --git a/storagemanager.cnf b/storagemanager.cnf index 6195f992f..26c77ae58 100644 --- a/storagemanager.cnf +++ b/storagemanager.cnf @@ -20,6 +20,14 @@ bucket = s3-cs-test2 [LocalStorage] path = ${HOME}/storagemanager/fake-cloud +# introduce latency to fake-cloud operations. Useful for debugging. +fake_latency = n + +# values are randomized between 1 and max_latency in microseconds. +# values between 30000-50000 roughly simulate observed latency of S3 +# access from an EC2 node. +max_latency = 50000 + [Cache] cache_size = 2g path = ${HOME}/storagemanager/cache From aa65090a61bd8cf2e07b6a49be2c7f0dcc28b1a2 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Wed, 3 Jul 2019 17:18:14 -0500 Subject: [PATCH 4/9] Fixed another race. Also shortened the interval Sync creates new jobs. In this one, if a read, then a delete happened on an object being flushed by makeSpace(), it would leave the toBeDeleted struct with an invalid list iterator. --- src/Cache.cpp | 22 ++++++---------------- src/Synchronizer.h | 2 +- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/src/Cache.cpp b/src/Cache.cpp index 280e89e25..15a8db3f7 100644 --- a/src/Cache.cpp +++ b/src/Cache.cpp @@ -276,13 +276,6 @@ void Cache::read(const vector &keys) { addToDNE(mit->lit); lru.splice(lru.end(), lru, mit->lit); // move them to the back so they are last to pick for eviction - // if it's about to be deleted, stop that - TBD_t::iterator tbd_it = toBeDeleted.find(mit->lit); - if (tbd_it != toBeDeleted.end()) - { - //cout << "Saved one from being deleted" << endl; - toBeDeleted.erase(tbd_it); - } } else { @@ -448,7 +441,7 @@ void Cache::deletedObject(const string &key, size_t size) boost::unique_lock s(lru_mutex); assert(currentCacheSize >= size); M_LRU_t::iterator mit = m_lru.find(key); - assert(mit != m_lru.end()); // TODO: 5/16/19 - got this assertion using S3 by running test000, then test000 again. + assert(mit != m_lru.end()); // if it's being flushed, let makeSpace() do the deleting if (toBeDeleted.find(mit->lit) == toBeDeleted.end()) @@ -507,7 +500,7 @@ void Cache::_makeSpace(size_t size) } if (!bf::exists(prefix / *it)) - cout << prefix / *it << " doesn't exist, WTF?" << endl; + cout << prefix / *it << " doesn't exist, WTF?" << endl; // Have gotten this a couple times assert(bf::exists(prefix / *it)); /* tell Synchronizer that this key will be evicted @@ -518,7 +511,7 @@ void Cache::_makeSpace(size_t size) //logger->log(LOG_WARNING, "Cache: flushing!"); toBeDeleted.insert(it); - string key = *it; // need to make a copy; it could be in the process of being renamed + string key = *it; // need to make a copy; it could get changed after unlocking. lru_mutex.unlock(); try @@ -534,15 +527,12 @@ void Cache::_makeSpace(size_t size) } lru_mutex.lock(); - TBD_t::iterator tbd_it = toBeDeleted.find(it); - if (tbd_it != toBeDeleted.end()) + // check doNotEvict again in case this object is now being read + if (doNotEvict.find(it) == doNotEvict.end()) { - // if it's still in toBeDeleted then it is safe to delete. - // if read() happened to access it while it was flushing, it will not - // be in that set. bf::path cachedFile = prefix / *it; m_lru.erase(*it); - toBeDeleted.erase(tbd_it); + toBeDeleted.erase(it); lru.erase(it); size_t newSize = bf::file_size(cachedFile); replicator->remove(cachedFile, Replicator::LOCAL_ONLY); diff --git a/src/Synchronizer.h b/src/Synchronizer.h index 7652b2fc2..484fdfeba 100755 --- a/src/Synchronizer.h +++ b/src/Synchronizer.h @@ -85,7 +85,7 @@ class Synchronizer : public boost::noncopyable // this thread will start jobs for entries in pendingOps every 10 seconds bool die; boost::thread syncThread; - const boost::chrono::seconds syncInterval = boost::chrono::seconds(10); + const boost::chrono::seconds syncInterval = boost::chrono::seconds(1); void periodicSync(); SMLogging *logger; From 62c853595f5d6023e897adc68034042a7d97e7ff Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 5 Jul 2019 11:02:55 -0500 Subject: [PATCH 5/9] Changed the sync trigger to include outstanding journal size. Right now it's hardcoded at 50MB for testing. Will parameterize later. --- src/IOCoordinator.cpp | 15 ++++++++------- src/Synchronizer.cpp | 28 +++++++++++++++++++++------- src/Synchronizer.h | 9 +++++---- src/unit_tests.cpp | 4 ++-- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/IOCoordinator.cpp b/src/IOCoordinator.cpp index 78b290d83..62cbcb50d 100755 --- a/src/IOCoordinator.cpp +++ b/src/IOCoordinator.cpp @@ -330,7 +330,7 @@ int IOCoordinator::_write(const char *filename, const uint8_t *data, off_t offse cache->newJournalEntry(writeLength+JOURNAL_ENTRY_HEADER_SIZE); - synchronizer->newJournalEntry(i->key); + synchronizer->newJournalEntry(i->key, writeLength+JOURNAL_ENTRY_HEADER_SIZE); count += writeLength; dataRemaining -= writeLength; } @@ -443,7 +443,7 @@ int IOCoordinator::append(const char *_filename, const uint8_t *data, size_t len cache->newJournalEntry(writeLength+JOURNAL_ENTRY_HEADER_SIZE); - synchronizer->newJournalEntry(i->key); + synchronizer->newJournalEntry(i->key, writeLength+JOURNAL_ENTRY_HEADER_SIZE); count += writeLength; dataRemaining -= writeLength; } @@ -798,7 +798,7 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) return -1; } - vector newJournalEntries; + vector > newJournalEntries; ScopedReadLock lock(this, filename1); ScopedWriteLock lock2(this, filename2); MetadataFile meta1(metaFile1); @@ -853,8 +853,9 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) try { bf::copy_file(journalFile, newJournalFile); - cache->newJournalEntry(bf::file_size(newJournalFile)); - newJournalEntries.push_back(newObj.key); + size_t tmp = bf::file_size(newJournalFile); + cache->newJournalEntry(tmp); + newJournalEntries.push_back(pair(newObj.key, tmp)); } catch (bf::filesystem_error &e) { @@ -872,7 +873,7 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) cs->deleteObject(newObject.key); for (auto &jEntry : newJournalEntries) { - bf::path fullJournalPath = journalPath/(jEntry + ".journal"); + bf::path fullJournalPath = journalPath/(jEntry.first + ".journal"); cache->deletedJournal(bf::file_size(fullJournalPath)); bf::remove(fullJournalPath); } @@ -884,7 +885,7 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) lock2.unlock(); for (auto &jEntry : newJournalEntries) - sync->newJournalEntry(jEntry); + sync->newJournalEntry(jEntry.first, jEntry.second); return 0; } diff --git a/src/Synchronizer.cpp b/src/Synchronizer.cpp index 72472d30d..2d4ac8dec 100755 --- a/src/Synchronizer.cpp +++ b/src/Synchronizer.cpp @@ -58,6 +58,7 @@ Synchronizer::Synchronizer() : maxUploads(0) cachePath = cache->getCachePath(); threadPool.setMaxThreads(maxUploads); die = false; + uncommittedJournalSize = 0; syncThread = boost::thread([this] () { this->periodicSync(); }); } @@ -82,8 +83,9 @@ enum OpFlags NEW_OBJECT = 0x4, }; -void Synchronizer::_newJournalEntry(const string &key) +void Synchronizer::_newJournalEntry(const string &key, size_t size) { + uncommittedJournalSize += size; auto it = pendingOps.find(key); if (it != pendingOps.end()) { @@ -94,17 +96,29 @@ void Synchronizer::_newJournalEntry(const string &key) pendingOps[key] = boost::shared_ptr(new PendingOps(JOURNAL)); } -void Synchronizer::newJournalEntry(const string &key) +void Synchronizer::newJournalEntry(const string &key, size_t size) { boost::unique_lock s(mutex); - _newJournalEntry(key); + _newJournalEntry(key, size); + if (uncommittedJournalSize > 50000000) + { + uncommittedJournalSize = 0; + s.unlock(); + forceFlush(); + } } -void Synchronizer::newJournalEntries(const vector &keys) +void Synchronizer::newJournalEntries(const vector > &keys) { boost::unique_lock s(mutex); - for (const string &key : keys) - _newJournalEntry(key); + for (auto &keysize : keys) + _newJournalEntry(keysize.first, keysize.second); + if (uncommittedJournalSize > 50000000) + { + uncommittedJournalSize = 0; + s.unlock(); + forceFlush(); + } } void Synchronizer::newObjects(const vector &keys) @@ -228,6 +242,7 @@ void Synchronizer::periodicSync() // threadPool.currentQueueSize() << endl; for (auto &job : pendingOps) makeJob(job.first); + uncommittedJournalSize = 0; } } @@ -235,7 +250,6 @@ void Synchronizer::forceFlush() { boost::unique_lock lock(mutex); syncThread.interrupt(); - lock.unlock(); } void Synchronizer::makeJob(const string &key) diff --git a/src/Synchronizer.h b/src/Synchronizer.h index 484fdfeba..7d43a7653 100755 --- a/src/Synchronizer.h +++ b/src/Synchronizer.h @@ -29,8 +29,8 @@ class Synchronizer : public boost::noncopyable // these take keys as parameters, not full path names, ex, pass in '12345' not // 'cache/12345'. - void newJournalEntry(const std::string &key); - void newJournalEntries(const std::vector &keys); + void newJournalEntry(const std::string &key, size_t len); + void newJournalEntries(const std::vector > &keys); void newObjects(const std::vector &keys); void deletedObjects(const std::vector &keys); void flushObject(const std::string &key); @@ -42,7 +42,7 @@ class Synchronizer : public boost::noncopyable private: Synchronizer(); - void _newJournalEntry(const std::string &key); + void _newJournalEntry(const std::string &key, size_t len); void process(std::list::iterator key); void synchronize(const std::string &sourceFile, std::list::iterator &it); void synchronizeDelete(const std::string &sourceFile, std::list::iterator &it); @@ -85,8 +85,9 @@ class Synchronizer : public boost::noncopyable // this thread will start jobs for entries in pendingOps every 10 seconds bool die; boost::thread syncThread; - const boost::chrono::seconds syncInterval = boost::chrono::seconds(1); + const boost::chrono::seconds syncInterval = boost::chrono::seconds(10); void periodicSync(); + size_t uncommittedJournalSize; SMLogging *logger; Cache *cache; diff --git a/src/unit_tests.cpp b/src/unit_tests.cpp index 3ceac0d40..b342dbb49 100755 --- a/src/unit_tests.cpp +++ b/src/unit_tests.cpp @@ -1041,7 +1041,7 @@ bool syncTest1() assert(!err); assert(exists); - sync->newJournalEntry(key); + sync->newJournalEntry(key, 0); sync->forceFlush(); sleep(1); // let it do what it does @@ -1080,7 +1080,7 @@ bool syncTest1() // make the journal again, call sync->newJournalObject() makeTestJournal((journalPath / (newKey + ".journal")).string().c_str()); cache->newJournalEntry(bf::file_size(journalPath / (newKey + ".journal"))); - sync->newJournalEntry(newKey); + sync->newJournalEntry(newKey, 0); sync->forceFlush(); sleep(1); From 46265585edbeafe9589e56d1d3bd0b50d775231e Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 5 Jul 2019 11:03:50 -0500 Subject: [PATCH 6/9] Fixed a bug in the new makespace code. toBeDeleted wasn't being updated during one of the races fixed a couple commits ago. --- src/Cache.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Cache.cpp b/src/Cache.cpp index 15a8db3f7..f4c51ce95 100644 --- a/src/Cache.cpp +++ b/src/Cache.cpp @@ -548,6 +548,8 @@ void Cache::_makeSpace(size_t size) thisMuch = 0; } } + else + toBeDeleted.erase(it); } } From d0c457a06896a1394a1bdaeed88c7003b9dff548 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 5 Jul 2019 14:20:39 -0500 Subject: [PATCH 7/9] Parameterized the journal size threshold to trigger a flush. --- src/Synchronizer.cpp | 5 +++-- src/Synchronizer.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Synchronizer.cpp b/src/Synchronizer.cpp index 2d4ac8dec..ceb77e49e 100755 --- a/src/Synchronizer.cpp +++ b/src/Synchronizer.cpp @@ -59,6 +59,7 @@ Synchronizer::Synchronizer() : maxUploads(0) threadPool.setMaxThreads(maxUploads); die = false; uncommittedJournalSize = 0; + journalSizeThreshold = cache->getMaxCacheSize() / 2; syncThread = boost::thread([this] () { this->periodicSync(); }); } @@ -100,7 +101,7 @@ void Synchronizer::newJournalEntry(const string &key, size_t size) { boost::unique_lock s(mutex); _newJournalEntry(key, size); - if (uncommittedJournalSize > 50000000) + if (uncommittedJournalSize > journalSizeThreshold) { uncommittedJournalSize = 0; s.unlock(); @@ -113,7 +114,7 @@ void Synchronizer::newJournalEntries(const vector > &keys) boost::unique_lock s(mutex); for (auto &keysize : keys) _newJournalEntry(keysize.first, keysize.second); - if (uncommittedJournalSize > 50000000) + if (uncommittedJournalSize > journalSizeThreshold) { uncommittedJournalSize = 0; s.unlock(); diff --git a/src/Synchronizer.h b/src/Synchronizer.h index 7d43a7653..7a909842f 100755 --- a/src/Synchronizer.h +++ b/src/Synchronizer.h @@ -87,7 +87,7 @@ class Synchronizer : public boost::noncopyable boost::thread syncThread; const boost::chrono::seconds syncInterval = boost::chrono::seconds(10); void periodicSync(); - size_t uncommittedJournalSize; + size_t uncommittedJournalSize, journalSizeThreshold; SMLogging *logger; Cache *cache; From 948ee1aa5398e91a6510bf270f68ae7e772249d1 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 5 Jul 2019 16:47:07 -0500 Subject: [PATCH 8/9] Made the log-to-stderr stuff a cmake option. --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 27b5f04e9..ee7012776 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,7 @@ set(storagemanager_SRCS ) option(TRACE "Enable some tracing output" OFF) +option(LOG_TO_STDERR "Make the logging system also print to stderr" OFF) if (TRACE) add_definitions(-DSM_TRACE) endif() @@ -44,6 +45,10 @@ if (CMAKE_BUILD_TYPE STREQUAL Debug) set(S3_CONFIGURE_OPT --enable-debug) endif() +if (LOG_TO_STDERR) + add_definitions(-DDEBUG) +endif() + set(S3API_DIR ${CMAKE_SOURCE_DIR}/libmarias3) include(ExternalProject) From 530d9fa5aca7f7a644108f017756655f2e4ab2db Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 5 Jul 2019 16:48:30 -0500 Subject: [PATCH 9/9] Updated a comment re a sneaky bug that means an entry in the LRU does not exist on the filesystem. Haven't figured that out yet. Must be a narrow race b/c it happens rarely. --- src/Cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cache.cpp b/src/Cache.cpp index f4c51ce95..e98bf5ae1 100644 --- a/src/Cache.cpp +++ b/src/Cache.cpp @@ -500,7 +500,7 @@ void Cache::_makeSpace(size_t size) } if (!bf::exists(prefix / *it)) - cout << prefix / *it << " doesn't exist, WTF?" << endl; // Have gotten this a couple times + cout << prefix / *it << " doesn't exist, WTF?" << endl; // ran into this a couple times, still happens as of commit 948ee1aa5 assert(bf::exists(prefix / *it)); /* tell Synchronizer that this key will be evicted