From faa35ebeebd53af28febad76d887fd1c62aa23c0 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 22 May 2020 17:55:19 -0400 Subject: [PATCH] Tentative commit, hunting down a source of misbehavior. Conflicts: storage-manager/src/IOCoordinator.cpp --- storage-manager/src/Downloader.cpp | 8 ++++ storage-manager/src/IOCoordinator.cpp | 57 +++++++++++++++++++++++---- storage-manager/src/LocalStorage.cpp | 6 +++ storage-manager/src/Synchronizer.cpp | 27 +++++++++++-- 4 files changed, 88 insertions(+), 10 deletions(-) diff --git a/storage-manager/src/Downloader.cpp b/storage-manager/src/Downloader.cpp index 0c230f566..c6ea05198 100644 --- a/storage-manager/src/Downloader.cpp +++ b/storage-manager/src/Downloader.cpp @@ -18,6 +18,7 @@ #include "Downloader.h" #include "Config.h" #include "SMLogging.h" +#include "MetadataFile.h" #include #include #include @@ -166,6 +167,13 @@ void Downloader::Download::operator()() bf::remove(tmpFile); size = 0; } + if (size != MetadataFile::getLengthFromKey(key)) + { + ostringstream oss; + SMLogging *logr = SMLogging::get(); + oss << "Downloader: got a file with a bad length field. key = " << key << " actual size = " << size; + logr->log(LOG_ERR, oss.str().c_str()); + } // move it to its proper place boost::system::error_code berr; diff --git a/storage-manager/src/IOCoordinator.cpp b/storage-manager/src/IOCoordinator.cpp index 2dc3c3630..687eb434e 100644 --- a/storage-manager/src/IOCoordinator.cpp +++ b/storage-manager/src/IOCoordinator.cpp @@ -488,6 +488,8 @@ ssize_t IOCoordinator::_write(const boost::filesystem::path &filename, const uin //log error and abort l_errno = errno; logger->log(LOG_ERR,"IOCoordinator::write(): Failed newObject."); + metadata.removeEntry(newObject.offset); + replicator->remove(firstDir/newObject.key); errno = l_errno; if (count == 0) // if no data has been written yet, it's safe to return -1 here. return -1; @@ -509,14 +511,17 @@ ssize_t IOCoordinator::_write(const boost::filesystem::path &filename, const uin // get a new name for the object string oldKey = newObject.key; newObject.key = metadata.getNewKeyFromOldKey(newObject.key, err + objectOffset); - int renameErr = ::rename((firstDir/oldKey).string().c_str(), (firstDir/newObject.key).string().c_str()); + ostringstream os; + os << "IOCoordinator::write(): renaming " << oldKey << " to " << newObject.key; + logger->log(LOG_DEBUG, os.str().c_str()); + int renameErr = ::rename((cachePath/firstDir/oldKey).string().c_str(), (cachePath/firstDir/newObject.key).string().c_str()); int renameErrno = errno; if (renameErr < 0) { ostringstream oss; char buf[80]; - oss << "IOCoordinator::write(): Failed to rename " << (firstDir/oldKey).string() << " to " << - (firstDir/newObject.key).string() << "! Got " << strerror_r(renameErrno, buf, 80); + oss << "IOCoordinator::write(): Failed to rename " << (cachePath/firstDir/oldKey).string() << " to " << + (cachePath/firstDir/newObject.key).string() << "! Got " << strerror_r(renameErrno, buf, 80); logger->log(LOG_ERR, oss.str().c_str()); newObject.key = oldKey; } @@ -527,6 +532,14 @@ ssize_t IOCoordinator::_write(const boost::filesystem::path &filename, const uin newObjectKeys.push_back(newObject.key); goto out; } + + if (bf::file_size(cachePath/firstDir/newObject.key) != MetadataFile::getLengthFromKey(newObject.key)) + { + ostringstream oss; + oss << "IOCoordinator::write(): detected bad length field in " << newObject.key + << " real size = " << bf::file_size(cachePath/firstDir/newObject.key); + logger->log(LOG_ERR, oss.str().c_str()); + } cache->newObject(firstDir, newObject.key,writeLength + objectOffset); newObjectKeys.push_back(newObject.key); @@ -635,6 +648,8 @@ ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t l_errno = errno; //log error and abort logger->log(LOG_ERR,"IOCoordinator::append(): Failed newObject."); + metadata.removeEntry(newObject.offset); + replicator->remove(firstDir/newObject.key); errno = l_errno; // if no data was written successfully yet, it's safe to return -1 here. if (count == 0) @@ -655,20 +670,32 @@ ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t { string oldKey = newObject.key; newObject.key = metadata.getNewKeyFromOldKey(newObject.key, err + newObject.offset); - int renameErr = ::rename((firstDir/oldKey).string().c_str(), (firstDir/newObject.key).string().c_str()); + ostringstream os; + os << "IOCoordinator::append(): renaming " << oldKey << " to " << newObject.key; + logger->log(LOG_DEBUG, os.str().c_str()); + int renameErr = ::rename((cachePath/firstDir/oldKey).string().c_str(), (cachePath/firstDir/newObject.key).string().c_str()); int renameErrno = errno; if (renameErr < 0) { ostringstream oss; char buf[80]; - oss << "IOCoordinator::write(): Failed to rename " << (firstDir/oldKey).string() << " to " << - (firstDir/newObject.key).string() << "! Got " << strerror_r(renameErrno, buf, 80); + oss << "IOCoordinator::write(): Failed to rename " << (cachePath/firstDir/oldKey).string() << " to " << + (cachePath/firstDir/newObject.key).string() << "! Got " << strerror_r(renameErrno, buf, 80); logger->log(LOG_ERR, oss.str().c_str()); newObject.key = oldKey; } metadata.updateEntry(newObject.offset, newObject.key, err); } + + if (bf::file_size(cachePath/firstDir/newObject.key) != MetadataFile::getLengthFromKey(newObject.key)) + { + ostringstream oss; + oss << "IOCoordinator::write(): detected bad length field in " << newObject.key + << " real size = " << bf::file_size(cachePath/firstDir/newObject.key); + logger->log(LOG_ERR, oss.str().c_str()); + } + cache->newObject(firstDir, newObject.key,err); newObjectKeys.push_back(newObject.key); @@ -1034,7 +1061,7 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) for (const auto &object : objects) { bf::path journalFile = journalPath/firstDir1/(object.key + ".journal"); - metadataObject newObj = meta2.addMetadataObject(filename2, object.length); + metadataObject newObj = meta2.addMetadataObject(filename2, MetadataFile::getLengthFromKey(object.key)); assert(newObj.offset == object.offset); err = cs->copyObject(object.key, newObj.key); if (err) @@ -1049,6 +1076,22 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2) ", dest = " + filename2 + ". Object " + object.key + " does not exist in either " "cloud storage or the cache!"); + if (bf::file_size(cachedObjPath) != MetadataFile::getLengthFromKey(object.key)) + { + ostringstream oss; + oss << "CopyFile: found a size mismatch in " << cachedObjPath << + " real size = " << bf::file_size(cachedObjPath); + logger->log(LOG_ERR, oss.str().c_str()); + } + + if (MetadataFile::getLengthFromKey(object.key) != MetadataFile::getLengthFromKey(newObj.key)) + { + ostringstream oss; + oss << "CopyFile: found a size mismatch in src and dest keys src = " << object.key << + " dest = " << newObj.key; + logger->log(LOG_ERR, oss.str().c_str()); + } + // put the copy in cloudstorage err = cs->putObject(cachedObjPath.string(), newObj.key); if (err) diff --git a/storage-manager/src/LocalStorage.cpp b/storage-manager/src/LocalStorage.cpp index 4f6c31958..c80e571fc 100644 --- a/storage-manager/src/LocalStorage.cpp +++ b/storage-manager/src/LocalStorage.cpp @@ -101,8 +101,11 @@ int LocalStorage::copy(const bf::path &source, const bf::path &dest) if (err) { errno = err.value(); + ::unlink(dest.string().c_str()); return -1; } + if (bf::file_size(source) != bf::file_size(dest)) + logger->log(LOG_ERR, "LocalStorage::copy: partially copied a file somehow"); return 0; } @@ -216,6 +219,7 @@ int LocalStorage::putObject(boost::shared_array data, size_t len, const l_errno = errno; //logger->log(LOG_CRIT, "LocalStorage::putObject(): Failed to write to %s, got '%s'", c_dest, strerror_r(errno, buf, 80)); close(fd); + ::unlink(c_dest); errno = l_errno; bytesWritten += count; return err; @@ -240,6 +244,8 @@ int LocalStorage::copyObject(const string &source, const string &dest) size_t _size = bf::file_size(prefix/source); bytesRead += _size; bytesWritten += _size; + if (bf::file_size(prefix/source) != bf::file_size(prefix/dest)) + logger->log(LOG_ERR, "LocalStorage::copyObject(): partially copied a file somehow"); } return ret; } diff --git a/storage-manager/src/Synchronizer.cpp b/storage-manager/src/Synchronizer.cpp index 100a265ef..da2820065 100644 --- a/storage-manager/src/Synchronizer.cpp +++ b/storage-manager/src/Synchronizer.cpp @@ -437,7 +437,7 @@ void Synchronizer::process(list::iterator name) catch(exception &e) { // these are often self-resolving, so we will suppress logging it for 10 iterations, then escalate // to error, then to crit - if (++retryCount >= 10) + //if (++retryCount >= 10) logger->log((retryCount < 20 ? LOG_ERR : LOG_CRIT), "Synchronizer::process(): error sync'ing %s opFlags=%d, got '%s'. Retrying...", key.c_str(), pending->opFlags, e.what()); success = false; @@ -467,7 +467,7 @@ void Synchronizer::synchronize(const string &sourceFile, list::iterator { ScopedReadLock s(ioc, sourceFile); - string &key = *it; + string key = *it; size_t pos = key.find_first_of('/'); bf::path prefix = key.substr(0, pos); string cloudKey = key.substr(pos + 1); @@ -499,7 +499,6 @@ void Synchronizer::synchronize(const string &sourceFile, list::iterator if (exists) return; - // TODO: should be safe to check with Cache instead of a file existence check exists = cache->exists(prefix, cloudKey); if (!exists) { @@ -507,9 +506,17 @@ void Synchronizer::synchronize(const string &sourceFile, list::iterator return; } + if (bf::file_size(cachePath/key) != MetadataFile::getLengthFromKey(cloudKey)) + { + ostringstream oss; + oss << "Synchronizer::synchronize(): found a size mismatch in key = " << cloudKey << + " real size = " << bf::file_size(cachePath/key); + logger->log(LOG_ERR, oss.str().c_str()); + } err = cs->putObject((cachePath / key).string(), cloudKey); if (err) throw runtime_error(string("synchronize(): uploading ") + key + ", got " + strerror_r(errno, buf, 80)); + numBytesRead += mdEntry.length; bytesReadBySync += mdEntry.length; numBytesUploaded += mdEntry.length; @@ -658,6 +665,20 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list // get a new key for the resolved version & upload it string newCloudKey = MetadataFile::getNewKeyFromOldKey(cloudKey, size); string newKey = (prefix/newCloudKey).string(); + +try { + if (size != MetadataFile::getLengthFromKey(newCloudKey)) + { + ostringstream oss; + oss << "SyncWithJournal: detected the file size mismatch on the merged object somehow. " << + "key = " << newCloudKey << "real size = " << bf::file_size(prefix/newCloudKey); + logger->log(LOG_ERR, oss.str().c_str()); + } +} catch(exception &e) +{ + logger->log(LOG_ERR, "DEB4"); +} + err = cs->putObject(data, size, newCloudKey); if (err) {