Merging the prefix-ownership feature.

Squashed commit of the following: commit 4a4c3dab2e6acf942bbdfd4d760c000bc9cbfc6a Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Tue Jul 30 10:50:08 2019 -0500 Standardized a couple status msgs. commit 1b76f7e6411424c9633dcd4ebe7f61e9fce2f0ac Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Tue Jul 30 09:19:31 2019 -0500 Fixed the thread-joining-itself problem. commit 4fdb79e87496eab64c4c5af72321bc57423297ba Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Mon Jul 29 17:15:05 2019 -0500 Checkpointing. Need to release ownership differently, realized I have 1 thread trying to join itself. commit 04d0183735e9697d76a2472c6135d90755ca61b5 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Mon Jul 29 16:12:33 2019 -0500 Checkpointing a whole lot of fixes. test000 works here. commit 72e9c998c62b095cad1cf33f885f6c7697bde214 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Fri Jul 26 16:29:15 2019 -0500 Checkpointing. Started debugging. Several small fixes. Doesn't work yet. commit ab728e1481debec94d676e697954b1d164302a0c Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Fri Jul 26 13:15:47 2019 -0500 Checkpointing. Got everything to build. commit a2c6d07cdc12c45530c1d5cf4205d3aee8738d80 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Fri Jul 26 12:59:26 2019 -0500 Checkpointing. Got the library to build. commit 9f6bf19a64f512e17e6139b0fc04850cdcdb3b3a Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Fri Jul 26 12:24:46 2019 -0500 Checkpointing. Still WIP. Feature touches everything. commit a79ca8dc88a99f812432d5dca34ed54474df1933 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Thu Jul 25 16:38:20 2019 -0500 Checkpointing more changes. commit a9e81af3e4e00f8a3d30b3796a2c3aa94c04f7c0 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Thu Jul 25 15:07:44 2019 -0500 Checkpointing changes to the other classes that need to be aware of separately managed prefixes. commit d85dfaa401b49a7bb714701649dec303eb7c068c Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Wed Jul 24 14:24:23 2019 -0500 Added the new class to CMakeLists. commit 66d6d550b13be94ada107311574378bd848951cd Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Wed Jul 24 14:23:49 2019 -0500 Checkpointing. Got the new class to build except for a to-be-implemented fcn in Cache. commit e1b62dba7f05b37b9f12681a53d6632c6ce66d54 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Wed Jul 24 14:23:09 2019 -0500 Added some documentation to the object_size param. commit e671cf37c49ed084fbdec1bac50fbaa5ad7c43f9 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Wed Jul 24 10:34:54 2019 -0500 Checkpointing a new class to manage ownership of prefixes. commit e5f234ff4c05b5157d37fa17c44d7f626f5e4eb3 Author: Patrick LeBlanc <patrick.leblanc@mariadb.com> Date: Tue Jul 23 15:36:48 2019 -0500 Fixed some copy/paste typos in Metadatafile config err msgs.
2025-12-15 12:09:09 +03:00 · 2019-07-30 10:51:05 -05:00
parent 5f43f55ea5
commit 1c5979576e
21 changed files with 1640 additions and 900 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -86,6 +86,8 @@ set(storagemanager_SRCS
    src/MetadataFile.cpp
    src/Replicator.cpp
    src/Utilities.cpp
    src/Ownership.cpp
    src/PrefixCache.cpp
 )
 option(TRACE "Enable some tracing output" OFF)
--- a/src/Cache.cpp
+++ b/src/Cache.cpp
@@ -34,11 +34,10 @@ Cache * Cache::get()
    return inst;
 }
-Cache::Cache() : currentCacheSize(0)
+Cache::Cache()
 {
    Config *conf = Config::get();
    logger = SMLogging::get();
    replicator = Replicator::get();
    string stmp = conf->getValue("Cache", "cache_size");
    if (stmp.empty()) 
@@ -73,8 +72,8 @@ Cache::Cache() : currentCacheSize(0)
        throw runtime_error("Please set ObjectStorage/object_size to a number");
    }
-    prefix = conf->getValue("Cache", "path");
+    cachePrefix = conf->getValue("Cache", "path");
-    if (prefix.empty())
+    if (cachePrefix.empty())
    {
        logger->log(LOG_CRIT, "Cache/path is not set");
        throw runtime_error("Please set Cache/path in the storagemanager.cnf file");
@@ -82,17 +81,16 @@ Cache::Cache() : currentCacheSize(0)
    try 
    {
-        bf::create_directories(prefix);
+        bf::create_directories(cachePrefix);
    }
    catch (exception &e)
    {
-        logger->log(LOG_CRIT, "Failed to create %s, got: %s", prefix.string().c_str(), e.what());
+        logger->log(LOG_CRIT, "Failed to create %s, got: %s", cachePrefix.string().c_str(), e.what());
        throw e;
    }
-    //cout << "Cache got prefix " << prefix << endl;
+    //cout << "Cache got cachePrefix " << cachePrefix << endl;
    downloader.reset(new Downloader());
-    downloader->setDownloadPath(prefix.string());
+    downloader->setDownloadPath(cachePrefix);
    downloader->useThisLock(&lru_mutex);
    stmp = conf->getValue("ObjectStorage", "journal_path");
    if (stmp.empty())
@@ -111,64 +109,12 @@ Cache::Cache() : currentCacheSize(0)
        logger->log(LOG_CRIT, "Failed to create %s, got: %s", journalPrefix.string().c_str(), e.what());
        throw e;
    }
    lru_mutex.lock();    // unlocked by populate() when it's done
    boost::thread t([this] { this->populate(); });
    t.detach();
 }
 Cache::~Cache()
 {
-}
+    for (auto it = prefixCaches.begin(); it != prefixCaches.end(); ++it)
-
+        delete it->second;
 void Cache::populate()
 {
    Synchronizer *sync = Synchronizer::get();
    bf::directory_iterator dir(prefix);
    bf::directory_iterator dend;
    vector<string> newObjects;
    while (dir != dend)
    {
        // put everything in lru & m_lru
        const bf::path &p = dir->path();
        if (bf::is_regular_file(p))
        {
            lru.push_back(p.filename().string());
            auto last = lru.end();
            m_lru.insert(--last);
            currentCacheSize += bf::file_size(*dir);
            newObjects.push_back(p.filename().string());
        }
        else
            logger->log(LOG_WARNING, "Cache: found something in the cache that does not belong '%s'", p.string().c_str());
        ++dir;
    }
    sync->newObjects(newObjects);
    newObjects.clear();
    // account for what's in the journal dir
    vector<pair<string, size_t> > newJournals;
    dir = bf::directory_iterator(journalPrefix);
    while (dir != dend)
    {
        const bf::path &p = dir->path();
        if (bf::is_regular_file(p))
        {
            if (p.extension() == ".journal")
            {
                size_t s = bf::file_size(*dir);
                currentCacheSize += s;
                newJournals.push_back(pair<string, size_t>(p.stem().string(), s));
            }
            else
                logger->log(LOG_WARNING, "Cache: found a file in the journal dir that does not belong '%s'", p.string().c_str());
        }
        else
            logger->log(LOG_WARNING, "Cache: found something in the journal dir that does not belong '%s'", p.string().c_str());
        ++dir;
    }
    lru_mutex.unlock();
    sync->newJournalEntries(newJournals);
 }
 // be careful using this!  SM should be idle.  No ongoing reads or writes.
@@ -176,308 +122,88 @@ void Cache::validateCacheSize()
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
-    if (!doNotEvict.empty() || !toBeDeleted.empty())
+    for (auto it = prefixCaches.begin(); it != prefixCaches.end(); ++it)
-    {
+        it->second->validateCacheSize();
        cout << "Not safe to use validateCacheSize() at the moment." << endl;
        return;
 }
    size_t oldSize = currentCacheSize;
    currentCacheSize = 0;
    m_lru.clear();
    lru.clear();
    populate();
    if (oldSize != currentCacheSize)
        logger->log(LOG_DEBUG, "Cache::validateCacheSize(): found a discrepancy.  Actual size is %lld, had %lld.",
            currentCacheSize, oldSize);
    else
        logger->log(LOG_DEBUG, "Cache::validateCacheSize(): Cache size accounting agrees with reality for now.");
 }
 #if 0
 /*  Need to simplify this, we keep running into sneaky problems, and I just spotted a couple more.
    Just going to rewrite it.  We can revisit later if the simplified version needs improvement */
 void Cache::read(const vector<string> &keys)
 {
 /*
    move existing keys to a do-not-evict map
    fetch keys that do not exist
    after fetching, move all keys from do-not-evict to the back of the LRU
 */
    boost::unique_lock<boost::mutex> s(lru_mutex);
    vector<const string *> keysToFetch;
    uint i;
    M_LRU_t::iterator mit;
    for (const string &key : keys)
    {
        mit = m_lru.find(key);
        if (mit != m_lru.end())
            // it's in the cache, add the entry to the do-not-evict set
            addToDNE(mit->lit);
        else
            // not in the cache, put it in the list to download
            keysToFetch.push_back(&key);
    }    
    //s.unlock();
    // start downloading the keys to fetch
    // TODO: there should be a path for the common case, which is that there is nothing
    // to download.
    vector<int> dl_errnos;
    vector<size_t> sizes;
    if (!keysToFetch.empty())
        downloader->download(keysToFetch, &dl_errnos, &sizes, lru_mutex);
    size_t sum_sizes = 0;
    for (size_t &size : sizes)
        sum_sizes += size;
    //s.lock();
    // do makespace() before downloading.  Problem is, until the download is finished, this fcn can't tell which
    // downloads it was responsible for.  Need Downloader to make the call...?
    _makeSpace(sum_sizes);      
    currentCacheSize += sum_sizes;
    // move all keys to the back of the LRU
    for (i = 0; i < keys.size(); i++)
    {
        mit = m_lru.find(keys[i]);
        if (mit != m_lru.end())
        {
            lru.splice(lru.end(), lru, mit->lit);
            LRU_t::iterator lit = lru.end();
            removeFromDNE(--lit);
        }
        else if (dl_errnos[i] == 0)   // successful download
        {
            lru.push_back(keys[i]);
            LRU_t::iterator lit = lru.end();
            m_lru.insert(M_LRU_element_t(--lit));
        }
        else
        {
            // Downloader already logged it, anything to do here?
            /* brainstorming options for handling it.
             1) Should it be handled?  The caller will log a file-not-found error, and there will be a download
                failure in the log already.  
             2) Can't really DO anything can it?
            */
        }
    }
 }
 #endif
 // new simplified version
-void Cache::read(const vector<string> &keys)
+void Cache::read(const bf::path &prefix, const vector<string> &keys)
 {
-    /*  Move existing keys to the back of the LRU, start downloading nonexistant keys.
+    getPCache(prefix).read(keys);
    */
    vector<const string *> keysToFetch;
    vector<int> dlErrnos;
    vector<size_t> dlSizes;
    boost::unique_lock<boost::mutex> s(lru_mutex);
    M_LRU_t::iterator mit;
    for (const string &key : keys)
    {
        mit = m_lru.find(key);
        if (mit != m_lru.end())
        {
            addToDNE(mit->lit);
            lru.splice(lru.end(), lru, mit->lit);   // move them to the back so they are last to pick for eviction 
        }
        else
        {
            // There's window where the file has been downloaded but is not yet
            // added to the lru structs.  However it is in the DNE.  If it is in the DNE, then it is also
            // in Downloader's map.  So, this thread needs to start the download if it's not in the 
            // DNE or if there's an existing download that hasn't finished yet.  Starting the download
            // includes waiting for an existing download to finish, which from this class's pov is the
            // same thing.
            if (doNotEvict.find(key) == doNotEvict.end() || downloader->inProgress(key))
                keysToFetch.push_back(&key);
            else
                cout << "Cache: detected and stopped a racey download" << endl;
            addToDNE(key);
        }
    }
    if (keysToFetch.empty())
        return;
    downloader->download(keysToFetch, &dlErrnos, &dlSizes);
    size_t sum_sizes = 0;
    for (uint i = 0; i < keysToFetch.size(); ++i)
    {
        // downloads with size 0 didn't actually happen, either because it
        // was a preexisting download (another read() call owns it), or because
        // there was an error downloading it.  Use size == 0 as an indication of
        // what to add to the cache.  Also needs to verify that the file was not deleted,
        // indicated by existence in doNotEvict.
        if (dlSizes[i] != 0)
        {
            if (doNotEvict.find(*keysToFetch[i]) != doNotEvict.end())
            {
                sum_sizes += dlSizes[i];
                lru.push_back(*keysToFetch[i]);
                LRU_t::iterator lit = lru.end();
                m_lru.insert(--lit);  // I dislike this way of grabbing the last iterator in a list.
            }
            else    // it was downloaded, but a deletion happened so we have to toss it
            {
                cout << "removing a file that was deleted by another thread during download" << endl;
                bf::remove(prefix / (*keysToFetch[i]));
            }
        }
 }
-    // move everything in keys to the back of the lru (yes, again)
+void Cache::doneReading(const bf::path &prefix, const vector<string> &keys)
    for (const string &key : keys)
 {
-        mit = m_lru.find(key);
+    getPCache(prefix).doneReading(keys);
        if (mit != m_lru.end())    // all of the files exist, just not all of them are 'owned by' this thread.
            lru.splice(lru.end(), lru, mit->lit);
 }
-    // fix cache size
+void Cache::doneWriting(const bf::path &prefix)
-    //_makeSpace(sum_sizes);
+{
-    currentCacheSize += sum_sizes;
+    getPCache(prefix).makeSpace(0);
 }
-void Cache::doneReading(const vector<string> &keys)
+const bf::path & Cache::getCachePath() const
 {
-    boost::unique_lock<boost::mutex> s(lru_mutex);
+    return cachePrefix;
    for (const string &key : keys)
    {
        removeFromDNE(key);
        // most should be in the map.
        // debateable whether it's faster to look up the list iterator and use it
        // or whether it's faster to bypass that and use strings only.
        //const auto &it = m_lru.find(key);
        //if (it != m_lru.end())
        //    removeFromDNE(it->lit);
    }
    _makeSpace(0);
 }
-void Cache::doneWriting()
+const bf::path & Cache::getJournalPath() const
 {
    makeSpace(0);
 }
 Cache::DNEElement::DNEElement(const LRU_t::iterator &k) : key(k), refCount(1)
 {
 }
 Cache::DNEElement::DNEElement(const string &k) : sKey(k), refCount(1)
 {
 }
 void Cache::addToDNE(const DNEElement &key)
 {
    DNE_t::iterator it = doNotEvict.find(key);
    if (it != doNotEvict.end())
    {
        DNEElement &dnee = const_cast<DNEElement &>(*it);
        ++(dnee.refCount);
    }
    else
        doNotEvict.insert(key);
 }
 void Cache::removeFromDNE(const DNEElement &key)
 {
    DNE_t::iterator it = doNotEvict.find(key);
    if (it == doNotEvict.end())
        return;
    DNEElement &dnee = const_cast<DNEElement &>(*it);
    if (--(dnee.refCount) == 0)
        doNotEvict.erase(it);
 }
 const bf::path & Cache::getCachePath()
 {
    return prefix;
 }
 const bf::path & Cache::getJournalPath()
 {
    return journalPrefix;
 }
-void Cache::exists(const vector<string> &keys, vector<bool> *out) const
+const bf::path Cache::getCachePath(const bf::path &prefix) const
 {
-    out->resize(keys.size());
+    return cachePrefix/prefix;
    boost::unique_lock<boost::mutex> s(lru_mutex);
    for (uint i = 0; i < keys.size(); i++)
        (*out)[i] = (m_lru.find(keys[i]) != m_lru.end());
 }
-bool Cache::exists(const string &key) const
+const bf::path Cache::getJournalPath(const bf::path &prefix) const
 {
-    boost::unique_lock<boost::mutex> s(lru_mutex);
+    return journalPrefix/prefix;
    return m_lru.find(key) != m_lru.end();
 }
-void Cache::newObject(const string &key, size_t size)
+void Cache::exists(const bf::path &prefix, const vector<string> &keys, vector<bool> *out)
 {
-    boost::unique_lock<boost::mutex> s(lru_mutex);
+    getPCache(prefix).exists(keys, out);
    assert(m_lru.find(key) == m_lru.end());
    //_makeSpace(size);
    lru.push_back(key);
    LRU_t::iterator back = lru.end();
    m_lru.insert(--back);
    currentCacheSize += size;
 }
-void Cache::newJournalEntry(size_t size)
+bool Cache::exists(const bf::path &prefix, const string &key)
 {
-    boost::unique_lock<boost::mutex> s(lru_mutex);
+    return getPCache(prefix).exists(key);
    //_makeSpace(size);
    currentCacheSize += size;
 }
-void Cache::deletedJournal(size_t size)
+void Cache::newObject(const bf::path &prefix, const string &key, size_t size)
 {
-    boost::unique_lock<boost::mutex> s(lru_mutex);
+    getPCache(prefix).newObject(key, size);
    assert(currentCacheSize >= size);
    currentCacheSize -= size;
 }
-void Cache::deletedObject(const string &key, size_t size)
+void Cache::newJournalEntry(const bf::path &prefix, size_t size)
 {
-    boost::unique_lock<boost::mutex> s(lru_mutex);
+    getPCache(prefix).newJournalEntry(size);
    assert(currentCacheSize >= size);
    M_LRU_t::iterator mit = m_lru.find(key);
    assert(mit != m_lru.end());
    // if it's being flushed, let makeSpace() do the deleting
    if (toBeDeleted.find(mit->lit) == toBeDeleted.end())
    {
        doNotEvict.erase(mit->lit);
        lru.erase(mit->lit);
        m_lru.erase(mit);
        currentCacheSize -= size;
 }
 void Cache::deletedJournal(const bf::path &prefix, size_t size)
 {
    getPCache(prefix).deletedJournal(size);
 }
 void Cache::deletedObject(const bf::path &prefix, const string &key, size_t size)
 {
    getPCache(prefix).deletedObject(key, size);
 }
 void Cache::setMaxCacheSize(size_t size)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
-    if (size < maxCacheSize)
+    
        _makeSpace(maxCacheSize - size);
    maxCacheSize = size;
    for (auto it = prefixCaches.begin(); it != prefixCaches.end(); ++it)
        it->second->setMaxCacheSize(size);
 }
-void Cache::makeSpace(size_t size)
+void Cache::makeSpace(const bf::path &prefix, size_t size)
 {
-    boost::unique_lock<boost::mutex> s(lru_mutex);
+    getPCache(prefix).makeSpace(size);
    _makeSpace(size);
 }
 size_t Cache::getMaxCacheSize() const
@@ -485,188 +211,101 @@ size_t Cache::getMaxCacheSize() const
    return maxCacheSize;
 }
-// call this holding lru_mutex
+void Cache::rename(const bf::path &prefix, const string &oldKey, const string &newKey, ssize_t sizediff)
 void Cache::_makeSpace(size_t size)
 {
-    ssize_t thisMuch = currentCacheSize + size - maxCacheSize;
+    getPCache(prefix).rename(oldKey, newKey, sizediff);
    if (thisMuch <= 0)
        return;
    LRU_t::iterator it;
    while (thisMuch > 0 && !lru.empty())
    {
        it = lru.begin();
        // find the first element not being either read() right now or being processed by another
        // makeSpace() call.
        while (it != lru.end())
        {
            // make sure it's not currently being read or being flushed by another _makeSpace() call
            if ((doNotEvict.find(it) == doNotEvict.end()) && (toBeDeleted.find(it) == toBeDeleted.end()))
                break;
            ++it;
        }
        if (it == lru.end())
        {
            // nothing can be deleted right now
            return;
 }
-        if (!bf::exists(prefix / *it))
+int Cache::ifExistsThenDelete(const bf::path &prefix, const string &key)
            cout << prefix / *it << " doesn't exist, WTF?" << endl;  // ran into this a couple times, still happens as of commit 948ee1aa5
        assert(bf::exists(prefix / *it));
        /* 
            tell Synchronizer that this key will be evicted
            delete the file
            remove it from our structs
            update current size
        */
        //logger->log(LOG_WARNING, "Cache:  flushing!");
        toBeDeleted.insert(it);
        string key = *it;    // need to make a copy; it could get changed after unlocking.
        lru_mutex.unlock();
        try 
 {
-            Synchronizer::get()->flushObject(key);
+    return getPCache(prefix).ifExistsThenDelete(key);
        }
        catch (...)
        {
            // it gets logged by Sync
            lru_mutex.lock();
            toBeDeleted.erase(it);
            continue;
        }
        lru_mutex.lock();
        // check doNotEvict again in case this object is now being read
        if (doNotEvict.find(it) == doNotEvict.end())
        {
            bf::path cachedFile = prefix / *it;
            m_lru.erase(*it);
            toBeDeleted.erase(it);
            lru.erase(it);
            size_t newSize = bf::file_size(cachedFile);
            replicator->remove(cachedFile, Replicator::LOCAL_ONLY);
            if (newSize < currentCacheSize)
            {
                currentCacheSize -= newSize;
                thisMuch -= newSize;
            }
            else
            {
                logger->log(LOG_WARNING, "Cache::makeSpace(): accounting error.  Almost wrapped currentCacheSize on flush.");
                currentCacheSize = 0;
                thisMuch = 0;
            }
        }
        else
            toBeDeleted.erase(it);
    }
 }
-void Cache::rename(const string &oldKey, const string &newKey, ssize_t sizediff)
+size_t Cache::getCurrentCacheSize()
 {
-    // rename it in the LRU
+    size_t totalSize = 0;
    // erase/insert to rehash it everywhere else
    boost::unique_lock<boost::mutex> s(lru_mutex);
    auto it = m_lru.find(oldKey);
    if (it == m_lru.end())
        return;
-    auto lit = it->lit;
+    for (auto it = prefixCaches.begin(); it != prefixCaches.end(); ++it)
-    m_lru.erase(it);
+        totalSize += it->second->getCurrentCacheSize();
-    int refCount = 0;
+    return totalSize;
    auto dne_it = doNotEvict.find(lit);
    if (dne_it != doNotEvict.end())
    {
        refCount = dne_it->refCount;
        doNotEvict.erase(dne_it);
 }
-    auto tbd_it = toBeDeleted.find(lit);
+size_t Cache::getCurrentCacheElementCount()
    bool hasTBDEntry = (tbd_it != toBeDeleted.end());
    if (hasTBDEntry)
        toBeDeleted.erase(tbd_it);
    *lit = newKey;
    if (hasTBDEntry)
        toBeDeleted.insert(lit);
    if (refCount != 0)
 {
-        pair<DNE_t::iterator, bool> dne_tmp = doNotEvict.insert(lit);
+    size_t totalCount = 0;
-        const_cast<DNEElement &>(*(dne_tmp.first)).refCount = refCount;
+
    for (auto it = prefixCaches.begin(); it != prefixCaches.end(); ++it)
        totalCount += it->second->getCurrentCacheElementCount();
    return totalCount;
 }
-    m_lru.insert(lit);
+size_t Cache::getCurrentCacheSize(const bf::path &prefix)
-    currentCacheSize += sizediff;
+{
    return getPCache(prefix).getCurrentCacheSize();
 }
-int Cache::ifExistsThenDelete(const string &key)
+size_t Cache::getCurrentCacheElementCount(const bf::path &prefix)
 {
-    bf::path cachedPath = prefix / key;
+    return getPCache(prefix).getCurrentCacheElementCount();
    bf::path journalPath = journalPrefix / (key + ".journal");
    boost::unique_lock<boost::mutex> s(lru_mutex);
    bool objectExists = false;
    auto it = m_lru.find(key);
    if (it != m_lru.end())
    {
        if (toBeDeleted.find(it->lit) == toBeDeleted.end())
        {
            doNotEvict.erase(it->lit);
            lru.erase(it->lit);
            m_lru.erase(it);
            objectExists = true;
        }
        else  // let makeSpace() delete it if it's already in progress
            return 0;
    }
    bool journalExists = bf::exists(journalPath);
    //assert(objectExists == bf::exists(cachedPath));
    size_t objectSize = (objectExists ? bf::file_size(cachedPath) : 0);
    //size_t objectSize = (objectExists ? MetadataFile::getLengthFromKey(key) : 0);
    size_t journalSize = (journalExists ? bf::file_size(journalPath) : 0);
    currentCacheSize -= (objectSize + journalSize);
    //assert(!objectExists || objectSize == bf::file_size(cachedPath));
    return (objectExists ? 1 : 0) | (journalExists ? 2 : 0);
 }
 size_t Cache::getCurrentCacheSize() const
 {
    return currentCacheSize;
 }
 size_t Cache::getCurrentCacheElementCount() const
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    assert(m_lru.size() == lru.size());
    return m_lru.size();
 }
 void Cache::reset()
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    m_lru.clear();
    lru.clear();
    toBeDeleted.clear();
    doNotEvict.clear();
-    bf::directory_iterator dir;
+    for (auto it = prefixCaches.begin(); it != prefixCaches.end(); ++it)
-    bf::directory_iterator dend;
+        it->second->reset();
-    for (dir = bf::directory_iterator(prefix); dir != dend; ++dir)
+}
        bf::remove_all(dir->path());
-    for (dir = bf::directory_iterator(journalPrefix); dir != dend; ++dir)
+void Cache::newPrefix(const bf::path &prefix)
-        bf::remove_all(dir->path());
+{
-    currentCacheSize = 0;
+    boost::unique_lock<boost::mutex> s(lru_mutex);
    //cerr << "Cache: making new prefix " << prefix.string() << endl;
    assert(prefixCaches.find(prefix) == prefixCaches.end());
    prefixCaches[prefix] = NULL;
    s.unlock();
    PrefixCache *pCache = new PrefixCache(prefix);
    s.lock();
    prefixCaches[prefix] = pCache;
 }
 void Cache::dropPrefix(const bf::path &prefix)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    auto *pCache = prefixCaches[prefix];
    prefixCaches.erase(prefix);
    s.unlock();
    delete pCache;
 }
 inline PrefixCache & Cache::getPCache(const bf::path &prefix)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    //cerr << "Getting pcache for " << prefix.string() << endl;
    PrefixCache *ret;
    auto it = prefixCaches.find(prefix);
    assert(it != prefixCaches.end());
    ret = it->second;
    while (ret == NULL)  // wait for the new PC to init
    {
        s.unlock();
        sleep(1);
        s.lock();
        ret = prefixCaches[prefix];
    }
    return *ret;
 }
 Downloader * Cache::getDownloader() const
 {
    return downloader.get();
 }
 void Cache::shutdown()
@@ -675,48 +314,4 @@ void Cache::shutdown()
    downloader.reset();
 }
 /* The helper classes */
 Cache::M_LRU_element_t::M_LRU_element_t(const string *k) : key(k)
 {}
 Cache::M_LRU_element_t::M_LRU_element_t(const string &k) : key(&k)
 {}
 Cache::M_LRU_element_t::M_LRU_element_t(const LRU_t::iterator &i) : key(&(*i)), lit(i)
 {}
 inline size_t Cache::KeyHasher::operator()(const M_LRU_element_t &l) const
 {
    return hash<string>()(*(l.key));
 }
 inline bool Cache::KeyEquals::operator()(const M_LRU_element_t &l1, const M_LRU_element_t &l2) const
 {
    return (*(l1.key) == *(l2.key));
 }
 inline size_t Cache::DNEHasher::operator()(const DNEElement &l) const
 {
    return (l.sKey.empty() ? hash<string>()(*(l.key)) : hash<string>()(l.sKey));
 }
 inline bool Cache::DNEEquals::operator()(const DNEElement &l1, const DNEElement &l2) const
 {
    const string *s1, *s2;
    s1 = l1.sKey.empty() ? &(*(l1.key)) : &(l1.sKey);
    s2 = l2.sKey.empty() ? &(*(l2.key)) : &(l2.sKey);
    return (*s1 == *s2);
 }
 inline bool Cache::TBDLess::operator()(const LRU_t::iterator &i1, const LRU_t::iterator &i2) const
 {
    return *i1 < *i2;
 }
 }
--- a/src/Cache.h
+++ b/src/Cache.h
@@ -1,15 +1,19 @@
 #ifndef CACHE_H_
 #define CACHE_H_
 /* PrefixCache manages the cache for one prefix managed by SM. 
   Cache is a map of prefix -> Cache, and holds the items
   that should be centralized like the Downloader
 */
 #include "Downloader.h"
 #include "SMLogging.h"
-#include "Replicator.h"
+#include "PrefixCache.h"
 #include <string>
 #include <vector>
-#include <list>
+#include <map>
 #include <unordered_set>
 #include <boost/utility.hpp>
 #include <boost/filesystem/path.hpp>
 #include <boost/thread/mutex.hpp>
@@ -26,120 +30,66 @@ class Cache : public boost::noncopyable
        //reading fcns
        // read() marks objects to be read s.t. they do not get flushed.
        // after reading them, unlock the 'logical file', and call doneReading().
-        void read(const std::vector<std::string> &keys);
+        void read(const boost::filesystem::path &prefix, const std::vector<std::string> &keys);
-        void doneReading(const std::vector<std::string> &keys);
+        void doneReading(const boost::filesystem::path &prefix, const std::vector<std::string> &keys);
-        bool exists(const std::string &key) const;
+        bool exists(const boost::filesystem::path &prefix, const std::string &key);
-        void exists(const std::vector<std::string> &keys, std::vector<bool> *out) const;
+        void exists(const boost::filesystem::path &prefix, const std::vector<std::string> &keys, 
            std::vector<bool> *out);
        // writing fcns
-        // new*() fcns tell the cache data was added.  After writing a set of objects,
+        // new*() fcns tell the Cache data was added.  After writing a set of objects,
        // unlock the 'logical file', and call doneWriting().
-        void newObject(const std::string &key, size_t size);
+        void newObject(const boost::filesystem::path &prefix, const std::string &key, size_t size);
-        void newJournalEntry(size_t size);
+        void newJournalEntry(const boost::filesystem::path &prefix, size_t size);
-        void doneWriting();
+        void doneWriting(const boost::filesystem::path &prefix);
-        void deletedObject(const std::string &key, size_t size);
+        void deletedObject(const boost::filesystem::path &prefix, const std::string &key, size_t size);
-        void deletedJournal(size_t size);
+        void deletedJournal(const boost::filesystem::path &prefix, size_t size);
        // an 'atomic' existence check & delete.  Covers the object and journal.  Does not delete the files.
        // returns 0 if it didn't exist, 1 if the object exists, 2 if the journal exists, and 3 (1 | 2) if both exist
        // This should be called while holding the file lock for key because it touches the journal file.
-        int ifExistsThenDelete(const std::string &key);   
+        int ifExistsThenDelete(const boost::filesystem::path &prefix, const std::string &key);   
        // rename is used when an old obj gets merged with its journal file
        // the size will change in that process; sizediff is by how much
-        void rename(const std::string &oldKey, const std::string &newKey, ssize_t sizediff);
+        void rename(const boost::filesystem::path &prefix, const std::string &oldKey, 
            const std::string &newKey, ssize_t sizediff);
        void setMaxCacheSize(size_t size);
-        void makeSpace(size_t size);
+        void makeSpace(const boost::filesystem::path &prefix, size_t size);
-        size_t getCurrentCacheSize() const;
+        size_t getCurrentCacheSize();
-        size_t getCurrentCacheElementCount() const;
+        size_t getCurrentCacheElementCount();
        size_t getCurrentCacheSize(const boost::filesystem::path &prefix);
        size_t getCurrentCacheElementCount(const boost::filesystem::path &prefix);
        size_t getMaxCacheSize() const;
        Downloader * getDownloader() const;
        void newPrefix(const boost::filesystem::path &prefix);
        void dropPrefix(const boost::filesystem::path &prefix);
        void shutdown();
        // test helpers
-        const boost::filesystem::path &getCachePath();
+        const boost::filesystem::path &getCachePath() const;
-        const boost::filesystem::path &getJournalPath();
+        const boost::filesystem::path &getJournalPath() const;
-        // this will delete everything in the cache and journal paths, and empty all Cache structures.
+        const boost::filesystem::path getCachePath(const boost::filesystem::path &prefix) const;
        const boost::filesystem::path getJournalPath(const boost::filesystem::path &prefix) const;
        // this will delete everything in the Cache and journal paths, and empty all Cache structures.
        void reset();
        void validateCacheSize();
    private:
        Cache();
-        boost::filesystem::path prefix;
+        SMLogging *logger;
        boost::filesystem::path cachePrefix;
        boost::filesystem::path journalPrefix;
        size_t maxCacheSize;
        size_t objectSize;
        size_t currentCacheSize;
        boost::scoped_ptr<Downloader> downloader;
        Replicator *replicator;
        SMLogging *logger;
-        void populate();
+        PrefixCache & getPCache(const boost::filesystem::path &prefix);
        void _makeSpace(size_t size);
-        /* The main cache structures */
+        std::map<boost::filesystem::path, PrefixCache *> prefixCaches;
-        // lru owns the string memory for the filenames it manages.  m_lru and DNE point to those strings.
+        mutable boost::mutex lru_mutex;   // protects the prefixCaches
        typedef std::list<std::string> LRU_t;
        LRU_t lru;
        struct M_LRU_element_t
        {
            M_LRU_element_t(const std::string &);
            M_LRU_element_t(const std::string *);
            M_LRU_element_t(const LRU_t::iterator &);
            const std::string *key;
            LRU_t::iterator lit;
        };
        struct KeyHasher
        {
            size_t operator()(const M_LRU_element_t &l) const;
        };
        struct KeyEquals 
        {
            bool operator()(const M_LRU_element_t &l1, const M_LRU_element_t &l2) const;
        };
        typedef std::unordered_set<M_LRU_element_t, KeyHasher, KeyEquals> M_LRU_t;
        M_LRU_t m_lru;   // the LRU entries as a hash table
        /* The do-not-evict list stuff. */
        struct DNEElement
        {
            DNEElement(const LRU_t::iterator &);
            DNEElement(const std::string &);
            LRU_t::iterator key;
            std::string sKey;
            uint refCount;
        };
        struct DNEHasher
        {
            size_t operator()(const DNEElement &d) const;
        };
        struct DNEEquals
        {
            bool operator()(const DNEElement &d1, const DNEElement &d2) const;
        };
        typedef std::unordered_set<DNEElement, DNEHasher, DNEEquals> DNE_t;
        DNE_t doNotEvict;
        void addToDNE(const DNEElement &);
        void removeFromDNE(const DNEElement &);
        // the to-be-deleted set.  Elements removed from the LRU but not yet deleted will be here.
        // Elements are inserted and removed by makeSpace().  If read() references a file that is in this,
        // it will remove it, signalling to makeSpace that it should not be deleted
        struct TBDLess
        {
            bool operator()(const LRU_t::iterator &, const LRU_t::iterator &) const;
        };
        typedef std::set<LRU_t::iterator, TBDLess> TBD_t;
        TBD_t toBeDeleted;
        mutable boost::mutex lru_mutex;   // protects the main cache structures & the do-not-evict set
 };
--- a/src/Downloader.cpp
+++ b/src/Downloader.cpp
@@ -7,6 +7,7 @@
 #include <boost/filesystem.hpp>
 using namespace std;
 namespace bf = boost::filesystem;
 namespace storagemanager
 {
@@ -33,107 +34,24 @@ Downloader::~Downloader()
 {
 }
-void Downloader::useThisLock(boost::mutex *mutex)
+void Downloader::download(const vector<const string *> &keys, vector<int> *errnos, vector<size_t> *sizes,
-{
+    const bf::path &prefix, boost::mutex *cache_lock)
    lock = mutex;
 }
 /*
 inline boost::mutex * Downloader::getDownloadMutex()
 {
    return &download_mutex;
 }
 */
 #if 0
 /*  This is another fcn with a bug too subtle to identify right now.  Writing up a simplified
    version; we can revisit later if necessary */
 void Downloader::download(const vector<const string *> &keys, vector<int> *errnos, vector<size_t> *sizes)
 {
    uint counter = keys.size();
    boost::condition condvar;
    boost::mutex m;
    DownloadListener listener(&counter, &condvar, &m);
    boost::shared_ptr<Download> dls[keys.size()];
    bool inserted[keys.size()];
    Downloads_t::iterator iterators[keys.size()];
    uint i;
    for (i = 0; i < keys.size(); i++)
        dls[i].reset(new Download(keys[i], this));
    boost::unique_lock<boost::mutex> s(download_mutex);
    for (i = 0; i < keys.size(); i++)
    {
        auto &dl = dls[i];
        pair<Downloads_t::iterator, bool> ret = downloads.insert(dl);
        iterators[i] = ret.first;
        inserted[i] = ret.second;
        if (inserted[i])
        {
            dl->listeners.push_back(&listener);
            workers.addJob(dl);
        }
        else
        {
            dl = *(ret.first);  // point to the existing download.  Redundant with the iterators array.  Don't care yet.
            (*iterators[i])->listeners.push_back(&listener);
        }
    }
    // wait for the downloads to finish
    boost::unique_lock<boost::mutex> dl_lock(m);
    s.unlock();
    while (counter > 0)
        condvar.wait(dl_lock);
    dl_lock.unlock();
    // remove the entries inserted by this call
    sizes->resize(keys.size());
    s.lock();
    for (i = 0; i < keys.size(); i++)
    {
        if (inserted[i])
        {
            (*sizes)[i] = (*iterators[i])->size;
            downloads.erase(iterators[i]);
        }
        else
            (*sizes)[i] = 0;
    }
    s.unlock();
    // check for errors & propagate
    errnos->resize(keys.size());
    char buf[80];
    for (i = 0; i < keys.size(); i++)
    {
        auto &dl = dls[i];
        (*errnos)[i] = dl->dl_errno;
        if (dl->dl_errno != 0)
            logger->log(LOG_ERR, "Downloader: failed to download %s, got '%s'", keys[i]->c_str(), strerror_r(dl->dl_errno, buf, 80));
    }
 }
 #endif
 void Downloader::download(const vector<const string *> &keys, vector<int> *errnos, vector<size_t> *sizes)
 {
    uint counter = keys.size();
    boost::condition condvar;
    DownloadListener listener(&counter, &condvar);
    vector<boost::shared_ptr<Download> > ownedDownloads(keys.size());
-    // the caller is holding a mutex
+    // the caller is holding cache_lock
    /*  if a key is already being downloaded, attach listener to that Download instance.
        if it is not already being downloaded, make a new Download instance.
        wait for the listener to tell us that it's done.
    */
    boost::unique_lock<boost::mutex> s(lock);
    for (uint i = 0; i < keys.size(); i++)
    {
-        boost::shared_ptr<Download> newDL(new Download(*keys[i], downloadPath, lock));
+        boost::shared_ptr<Download> newDL(new Download(*keys[i], prefix, cache_lock));
        auto it = downloads.find(newDL);   // kinda sucks to have to search this way.
        if (it == downloads.end())
        {
@@ -157,15 +75,17 @@ void Downloader::download(const vector<const string *> &keys, vector<int> *errno
                (*it)->listeners.push_back(&listener);
        }
    }
    s.unlock();
    // wait for the downloads to finish
    while (counter > 0)
-        condvar.wait(*lock);
+        condvar.wait(*cache_lock);
    // check success, gather sizes from downloads started by this thread
    sizes->resize(keys.size());
    errnos->resize(keys.size());
    char buf[80];
    s.lock();
    for (uint i = 0; i < keys.size(); i++)
    {
        if (ownedDownloads[i])
@@ -189,19 +109,21 @@ void Downloader::download(const vector<const string *> &keys, vector<int> *errno
 bool Downloader::inProgress(const string &key)
 {
    boost::shared_ptr<Download> tmp(new Download(key));
    boost::unique_lock<boost::mutex> s(lock);
    auto it = downloads.find(tmp);
    if (it != downloads.end())
        return !(*it)->finished;
    return false;
 }
-void Downloader::setDownloadPath(const string &path)
+void Downloader::setDownloadPath(const bf::path &path)
 {
-    downloadPath = path;
+    //downloadPath = path;
 }
 /* The helper fcns */
-Downloader::Download::Download(const string &source, const string &_dlPath, boost::mutex *_lock) : 
+Downloader::Download::Download(const string &source, const bf::path &_dlPath, boost::mutex *_lock) : 
                dlPath(_dlPath), key(source), dl_errno(0), size(0), lock(_lock), finished(false), itRan(false)
 {
 }
@@ -222,11 +144,12 @@ void Downloader::Download::operator()()
    CloudStorage *storage = CloudStorage::get();
    // TODO: we should have it download to a tmp path, then mv it to the cache dir to avoid
    // Cache seeing an incomplete download after a crash
    int err = storage->getObject(key, (dlPath / key).string(), &size);
    if (err != 0)
    {
        dl_errno = errno;
-        boost::filesystem::remove(dlPath / key);
+        bf::remove(dlPath / key);
        size = 0;
    }
--- a/src/Downloader.h
+++ b/src/Downloader.h
@@ -25,15 +25,16 @@ class Downloader
        // caller owns the memory for the strings.
        // errors are reported through errnos
-        void download(const std::vector<const std::string *> &keys, std::vector<int> *errnos, std::vector<size_t> *sizes);
+        void download(const std::vector<const std::string *> &keys, 
-        void setDownloadPath(const std::string &path);
+            std::vector<int> *errnos, std::vector<size_t> *sizes, const boost::filesystem::path &prefix,
-        void useThisLock(boost::mutex *);
+            boost::mutex *lock);
-        bool inProgress(const std::string &);
+        void setDownloadPath(const boost::filesystem::path &path);
        bool inProgress(const std::string &);  // call this holding the cache's lock
    private:
        uint maxDownloads;
-        std::string downloadPath;
+        //boost::filesystem::path downloadPath;
-        boost::mutex *lock;
+        boost::mutex lock;
        class DownloadListener
        {
@@ -51,7 +52,7 @@ class Downloader
        */
        struct Download : public ThreadPool::Job
        {
-            Download(const std::string &source, const std::string &_dlPath, boost::mutex *_lock);
+            Download(const std::string &source, const boost::filesystem::path &_dlPath, boost::mutex *);
            Download(const std::string &source);
            ~Download();
            void operator()();
--- a/src/IOCoordinator.cpp
+++ b/src/IOCoordinator.cpp
@@ -80,11 +80,6 @@ IOCoordinator * IOCoordinator::get()
    return ioc;
 }
 void IOCoordinator::willRead(const char *, off_t, size_t)
 {
    // not sure we will implement this.
 }
 int IOCoordinator::loadObject(int fd, uint8_t *data, off_t offset, size_t length) const
 {
    size_t count = 0;
@@ -135,7 +130,8 @@ ssize_t IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset,
        release read lock
        put together the response in data
    */
-    bf::path p(_filename);
+    bf::path p = ownership.get(_filename);
    const bf::path firstDir = *(p.begin());
    const char *filename = p.string().c_str();
    ScopedReadLock fileLock(this, filename);
@@ -158,7 +154,7 @@ ssize_t IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset,
    vector<string> keys;
    for (const auto &object : relevants)
        keys.push_back(object.key);
-    cache->read(keys);
+    cache->read(firstDir, keys);
    // open the journal files and objects that exist to prevent them from being 
    // deleted mid-operation
@@ -169,11 +165,11 @@ ssize_t IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset,
        // later.  not thinking about it for now.
        // open all of the journal files that exist
-        string filename = (journalPath/(key + ".journal")).string();
+        string jFilename = (journalPath/firstDir/(key + ".journal")).string();
-        int fd = ::open(filename.c_str(), O_RDONLY);
+        int fd = ::open(jFilename.c_str(), O_RDONLY);
        if (fd >= 0)
        {
-            keyToJournalName[key] = filename;
+            keyToJournalName[key] = jFilename;
            journalFDs[key] = fd;
            fdMinders[mindersIndex++].fd = fd;
            //fdMinders.push_back(SharedCloser(fd));
@@ -182,27 +178,27 @@ ssize_t IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset,
        {
            int l_errno = errno;
            fileLock.unlock();
-            cache->doneReading(keys);
+            cache->doneReading(firstDir, keys);
            logger->log(LOG_CRIT, "IOCoordinator::read(): Got an unexpected error opening %s, error was '%s'",
-                filename.c_str(), strerror_r(l_errno, buf, 80));
+                jFilename.c_str(), strerror_r(l_errno, buf, 80));
            errno = l_errno;
            return -1;
        }
        // open all of the objects
-        filename = (cachePath/key).string();
+        string oFilename = (cachePath/firstDir/key).string();
-        fd = ::open(filename.c_str(), O_RDONLY);
+        fd = ::open(oFilename.c_str(), O_RDONLY);
        if (fd < 0)
        {
            int l_errno = errno;
            fileLock.unlock();
-            cache->doneReading(keys);
+            cache->doneReading(firstDir, keys);
            logger->log(LOG_CRIT, "IOCoordinator::read(): Got an unexpected error opening %s, error was '%s'",
-                filename.c_str(), strerror_r(l_errno, buf, 80));
+                oFilename.c_str(), strerror_r(l_errno, buf, 80));
            errno = l_errno;
            return -1;
        }
-        keyToObjectName[key] = filename;
+        keyToObjectName[key] = oFilename;
        objectFDs[key] = fd;
        fdMinders[mindersIndex++].fd = fd;
        //fdMinders.push_back(SharedCloser(fd));
@@ -239,7 +235,7 @@ ssize_t IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset,
        if (err) 
        {
            fileLock.unlock();
-            cache->doneReading(keys);
+            cache->doneReading(firstDir, keys);
            if (count == 0)
                return -1;
            else
@@ -251,24 +247,26 @@ ssize_t IOCoordinator::read(const char *_filename, uint8_t *data, off_t offset,
 out:
    fileLock.unlock();
-    cache->doneReading(keys);
+    cache->doneReading(firstDir, keys);
    // all done
    return count;
 }
 ssize_t IOCoordinator::write(const char *_filename, const uint8_t *data, off_t offset, size_t length)
 {
-    bf::path p(_filename);
+    bf::path p = ownership.get(_filename);
    const bf::path firstDir = *(p.begin());
    const char *filename = p.string().c_str();
    ScopedWriteLock lock(this, filename);
-    int ret = _write(filename, data, offset, length);
+    int ret = _write(filename, data, offset, length, firstDir);
    lock.unlock();
-    cache->doneWriting();
+    cache->doneWriting(firstDir);
    return ret;
 }
-ssize_t IOCoordinator::_write(const char *filename, const uint8_t *data, off_t offset, size_t length)
+ssize_t IOCoordinator::_write(const char *filename, const uint8_t *data, off_t offset, size_t length,
    const bf::path &firstDir)
 {
    int err = 0;
    ssize_t count = 0;
@@ -311,7 +309,7 @@ ssize_t IOCoordinator::_write(const char *filename, const uint8_t *data, off_t o
            }
            //cache->makeSpace(writeLength+JOURNAL_ENTRY_HEADER_SIZE);
-            err = replicator->addJournalEntry(i->key.c_str(),&data[count],objectOffset,writeLength);
+            err = replicator->addJournalEntry((firstDir/i->key).string().c_str(),&data[count],objectOffset,writeLength);
            assert((uint) err == writeLength);
            if (err <= 0)
@@ -328,9 +326,9 @@ ssize_t IOCoordinator::_write(const char *filename, const uint8_t *data, off_t o
            if ((writeLength + objectOffset) > i->length)
                metadata.updateEntryLength(i->offset, (writeLength + objectOffset));
-            cache->newJournalEntry(writeLength+JOURNAL_ENTRY_HEADER_SIZE);
+            cache->newJournalEntry(firstDir, writeLength+JOURNAL_ENTRY_HEADER_SIZE);
-            synchronizer->newJournalEntry(i->key, writeLength+JOURNAL_ENTRY_HEADER_SIZE);
+            synchronizer->newJournalEntry(firstDir, i->key, writeLength+JOURNAL_ENTRY_HEADER_SIZE);
            count += writeLength;
            dataRemaining -= writeLength;
        }
@@ -364,7 +362,7 @@ ssize_t IOCoordinator::_write(const char *filename, const uint8_t *data, off_t o
        metadataObject newObject = metadata.addMetadataObject(filename,(writeLength + objectOffset));
        // send to replicator
-        err = replicator->newObject(newObject.key.c_str(),&data[count],objectOffset,writeLength);
+        err = replicator->newObject((firstDir/newObject.key).string().c_str(),&data[count],objectOffset,writeLength);
        assert((uint) err == writeLength);
        if (err <= 0)
        {
@@ -377,13 +375,13 @@ ssize_t IOCoordinator::_write(const char *filename, const uint8_t *data, off_t o
            //log error and abort
        }
-        cache->newObject(newObject.key,writeLength + objectOffset);
+        cache->newObject(firstDir, newObject.key,writeLength + objectOffset);
        newObjectKeys.push_back(newObject.key);
        count += writeLength;
        dataRemaining -= writeLength;
    }
-    synchronizer->newObjects(newObjectKeys);
+    synchronizer->newObjects(firstDir, newObjectKeys);
    replicator->updateMetadata(filename, metadata);
@@ -392,7 +390,8 @@ ssize_t IOCoordinator::_write(const char *filename, const uint8_t *data, off_t o
 ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t length)
 {
-    bf::path p(_filename);
+    bf::path p = ownership.get(_filename);
    const bf::path firstDir = *(p.begin());
    const char *filename = p.string().c_str();
    int err;
@@ -430,7 +429,7 @@ ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t
            //cache->makeSpace(writeLength+JOURNAL_ENTRY_HEADER_SIZE);
-            err = replicator->addJournalEntry(i->key.c_str(),&data[count],i->length,writeLength);
+            err = replicator->addJournalEntry((firstDir/i->key).string().c_str(),&data[count],i->length,writeLength);
            assert((uint) err == writeLength);
            if (err <= 0)
            {
@@ -441,9 +440,9 @@ ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t
            }
            metadata.updateEntryLength(i->offset, (writeLength + i->length));
-            cache->newJournalEntry(writeLength+JOURNAL_ENTRY_HEADER_SIZE);
+            cache->newJournalEntry(firstDir, writeLength+JOURNAL_ENTRY_HEADER_SIZE);
-            synchronizer->newJournalEntry(i->key, writeLength+JOURNAL_ENTRY_HEADER_SIZE);
+            synchronizer->newJournalEntry(firstDir, i->key, writeLength+JOURNAL_ENTRY_HEADER_SIZE);
            count += writeLength;
            dataRemaining -= writeLength;
        }
@@ -466,7 +465,7 @@ ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t
        metadataObject newObject = metadata.addMetadataObject(filename,writeLength);
        // write the new object
-        err = replicator->newObject(newObject.key.c_str(),&data[count],0,writeLength);
+        err = replicator->newObject((firstDir/newObject.key).string().c_str(),&data[count],0,writeLength);
        assert((uint) err == writeLength);
        if (err <= 0)
        {
@@ -477,20 +476,20 @@ ssize_t IOCoordinator::append(const char *_filename, const uint8_t *data, size_t
            goto out;
            //log error and abort
        }
-        cache->newObject(newObject.key,writeLength);
+        cache->newObject(firstDir, newObject.key,writeLength);
        newObjectKeys.push_back(newObject.key);
        count += writeLength;
        dataRemaining -= writeLength;
    }
-    synchronizer->newObjects(newObjectKeys);
+    synchronizer->newObjects(firstDir, newObjectKeys);
    replicator->updateMetadata(filename, metadata);
    // had to add this hack to prevent deadlock
 out:
    // need to release the file lock before telling Cache that we're done writing.
    lock.unlock();
-    cache->doneWriting();
+    cache->doneWriting(firstDir);
    return count;
 }
@@ -498,26 +497,28 @@ out:
 // TODO: might need to support more open flags, ex: O_EXCL
 int IOCoordinator::open(const char *_filename, int openmode, struct stat *out)
 {
-    bf::path p = _filename;  // normalize it
+    bf::path p = ownership.get(_filename);
    const char *filename = p.string().c_str();
-    ScopedReadLock s(this, filename);
+    boost::scoped_ptr<ScopedFileLock> s;
    if (openmode & O_CREAT || openmode | O_TRUNC)
        s.reset(new ScopedWriteLock(this, filename));
    else
        s.reset(new ScopedReadLock(this, filename));    
    MetadataFile meta(filename, MetadataFile::no_create_t());
    if ((openmode & O_CREAT) && !meta.exists())
        replicator->updateMetadata(filename, meta);   // this will end up creating filename
    if ((openmode & O_TRUNC) && meta.exists())
-    {
+        _truncate(p, 0, s.get());
-        s.unlock();
+        
        truncate(filename, 0);
        s.lock();
    }
    return meta.stat(out);
 }
 int IOCoordinator::listDirectory(const char *dirname, vector<string> *listing)
 {
-    bf::path p(metaPath / dirname);
+    bf::path p(metaPath / ownership.get(dirname));
    listing->clear();
    if (!bf::exists(p))
@@ -544,7 +545,7 @@ int IOCoordinator::listDirectory(const char *dirname, vector<string> *listing)
 int IOCoordinator::stat(const char *_path, struct stat *out)
 {
-    bf::path p(_path);
+    bf::path p = ownership.get(_path);
    const char *path = p.string().c_str();
    if (bf::is_directory(metaPath/p))
@@ -556,6 +557,16 @@ int IOCoordinator::stat(const char *_path, struct stat *out)
 }
 int IOCoordinator::truncate(const char *_path, size_t newSize)
 {
    bf::path p = ownership.get(_path);
    const char *path = p.string().c_str();
    ScopedWriteLock lock(this, path);
    return _truncate(p, newSize, &lock);
 }
 int IOCoordinator::_truncate(const bf::path &bfpath, size_t newSize, ScopedFileLock *lock)
 {
    /*
        grab the write lock.
@@ -568,13 +579,12 @@ int IOCoordinator::truncate(const char *_path, size_t newSize)
        tell cache they were deleted
        tell synchronizer they were deleted
    */
-    bf::path p(_path);
+    const bf::path firstDir = *(bfpath.begin());
-    const char *path = p.string().c_str();
+    const char *path = bfpath.string().c_str();
    Synchronizer *synchronizer = Synchronizer::get();  // needs to init sync here to break circular dependency...
    int err;
    ScopedWriteLock lock(this, path);
    MetadataFile meta(path, MetadataFile::no_create_t());
    if (!meta.exists())
    {
@@ -590,9 +600,9 @@ int IOCoordinator::truncate(const char *_path, size_t newSize)
    if (filesize < newSize)
    {
        uint8_t zero = 0;
-        err = _write(path, &zero, newSize - 1, 1);
+        err = _write(path, &zero, newSize - 1, 1, firstDir);
-        lock.unlock();
+        lock->unlock();
-        cache->doneWriting();
+        cache->doneWriting(firstDir);
        if (err < 0)
            return -1;
        return 0;
@@ -620,15 +630,15 @@ int IOCoordinator::truncate(const char *_path, size_t newSize)
    vector<string> deletedObjects;
    for (; i < objects.size(); ++i)
    {
-        int result = cache->ifExistsThenDelete(objects[i].key);
+        int result = cache->ifExistsThenDelete(firstDir, objects[i].key);
        if (result & 0x1)
-            replicator->remove(cachePath / objects[i].key);
+            replicator->remove(cachePath/firstDir/objects[i].key);
        if (result & 0x2)
-            replicator->remove(journalPath / (objects[i].key + ".journal"));
+            replicator->remove(journalPath/firstDir/(objects[i].key + ".journal"));
        deletedObjects.push_back(objects[i].key);
    }
    if (!deletedObjects.empty())
-        synchronizer->deletedObjects(deletedObjects);
+        synchronizer->deletedObjects(firstDir, deletedObjects);
    return 0;
 }
@@ -647,8 +657,9 @@ void IOCoordinator::deleteMetaFile(const bf::path &file)
    Synchronizer *synchronizer = Synchronizer::get();
    // this is kind of ugly.  We need to lock on 'file' relative to metaPath, and without the .meta extension
-    string pita = file.string().substr(metaPath.string().length());   // get rid of metapath
+    string pita = file.string().substr(metaPath.string().length() + 1);   // get rid of metapath
    pita = pita.substr(0, pita.length() - 5);   // get rid of the extension
    const bf::path firstDir = *(bf::path(pita).begin());
    ScopedWriteLock lock(this, pita);
    //cout << "file is " << file.string() << " locked on " << pita << endl;
@@ -661,14 +672,14 @@ void IOCoordinator::deleteMetaFile(const bf::path &file)
    for (auto &object : objects)
    {
        //cout << "deleting " << object.key << endl;
-        int result = cache->ifExistsThenDelete(object.key);
+        int result = cache->ifExistsThenDelete(firstDir, object.key);
        if (result & 0x1)
-            replicator->remove(cachePath/object.key);
+            replicator->remove(cachePath/firstDir/object.key);
        if (result & 0x2)
-            replicator->remove(journalPath/(object.key + ".journal"));
+            replicator->remove(journalPath/firstDir/(object.key + ".journal"));
        deletedObjects.push_back(object.key);
    }
-    synchronizer->deletedObjects(deletedObjects);
+    synchronizer->deletedObjects(firstDir, deletedObjects);
 }
 void IOCoordinator::remove(const bf::path &p)
@@ -718,7 +729,7 @@ int IOCoordinator::unlink(const char *path)
    /* TODO!  We need to make sure the input params to IOC fcns don't go up to parent dirs,
        ex, if path = '../../../blahblah'. */
-    bf::path p(metaPath/path);
+    bf::path p(metaPath/ownership.get(path));
    try 
    {
@@ -764,8 +775,10 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2)
        write the new metadata object
    */
-    const bf::path p1(_filename1);
+    const bf::path p1 = ownership.get(_filename1);
-    const bf::path p2(_filename2);
+    const bf::path p2 = ownership.get(_filename2);
    const bf::path firstDir1 = *(p1.begin());
    const bf::path firstDir2 = *(p2.begin());
    const char *filename1 = p1.string().c_str();
    const char *filename2 = p2.string().c_str();
@@ -815,7 +828,7 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2)
    {
        for (const auto &object : objects)
        {
-            bf::path journalFile = journalPath/(object.key + ".journal");
+            bf::path journalFile = journalPath/firstDir1/(object.key + ".journal");
            metadataObject newObj = meta2.addMetadataObject(filename2, object.length);
            assert(newObj.offset == object.offset);
            // TODO: failure here makes a lot of noise in the log file
@@ -826,7 +839,7 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2)
                if (errno == ENOENT)
                {
                    // it's not in cloudstorage, see if it's in the cache
-                    bf::path cachedObjPath = cachePath/object.key;
+                    bf::path cachedObjPath = cachePath/firstDir1/object.key;
                    bool objExists = bf::exists(cachedObjPath);
                    if (!objExists)
                        throw CFException(ENOENT, string("IOCoordinator::copyFile(): source = ") + filename1 + 
@@ -849,12 +862,12 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2)
            // if there's a journal file for this object, make a copy
            if (bf::exists(journalFile))
            {
-                bf::path newJournalFile = journalPath/(newObj.key + ".journal");
+                bf::path newJournalFile = journalPath/firstDir2/(newObj.key + ".journal");
                try 
                {
                    bf::copy_file(journalFile, newJournalFile);
                    size_t tmp = bf::file_size(newJournalFile);
-                    cache->newJournalEntry(tmp);
+                    cache->newJournalEntry(firstDir2, tmp);
                    newJournalEntries.push_back(pair<string, size_t>(newObj.key, tmp));
                }
                catch (bf::filesystem_error &e)
@@ -873,8 +886,8 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2)
            cs->deleteObject(newObject.key);
        for (auto &jEntry : newJournalEntries)
        {
-            bf::path fullJournalPath = journalPath/(jEntry.first + ".journal");
+            bf::path fullJournalPath = journalPath/firstDir2/(jEntry.first + ".journal");
-            cache->deletedJournal(bf::file_size(fullJournalPath));
+            cache->deletedJournal(firstDir2, bf::file_size(fullJournalPath));
            bf::remove(fullJournalPath);
        }
        errno = e.l_errno;
@@ -885,7 +898,7 @@ int IOCoordinator::copyFile(const char *_filename1, const char *_filename2)
    lock2.unlock();
    for (auto &jEntry : newJournalEntries)
-        sync->newJournalEntry(jEntry.first, jEntry.second);
+        sync->newJournalEntry(firstDir2, jEntry.first, jEntry.second);
    return 0;
 }
@@ -1147,18 +1160,12 @@ int IOCoordinator::mergeJournalInMem(boost::shared_array<uint8_t> &objData, size
    return 0;
 }
 void IOCoordinator::renameObject(const string &oldKey, const string &newKey)
 {
    // does anything need to be done here?
 }
 void IOCoordinator::readLock(const string &filename)
 {
    boost::unique_lock<boost::mutex> s(lockMutex);
    //cout << "read-locking " << filename << endl;
-    //assert(filename[0] == '/');
+    assert(filename[0] != '/');
    auto ins = locks.insert(pair<string, RWLock *>(filename, NULL));
    if (ins.second)
        ins.first->second = new RWLock();
@@ -1183,7 +1190,7 @@ void IOCoordinator::writeLock(const string &filename)
    boost::unique_lock<boost::mutex> s(lockMutex);
    //cout << "write-locking " << filename << endl;
-    //assert(filename[0] == '/');
+    assert(filename[0] != '/');
    auto ins = locks.insert(pair<string, RWLock *>(filename, NULL));
    if (ins.second)
        ins.first->second = new RWLock();
--- a/src/IOCoordinator.h
+++ b/src/IOCoordinator.h
@@ -18,6 +18,7 @@
 #include "RWLock.h"
 #include "Replicator.h"
 #include "Utilities.h"
 #include "Ownership.h"
 namespace storagemanager
 {
@@ -30,7 +31,6 @@ class IOCoordinator : public boost::noncopyable
        static IOCoordinator *get();
        virtual ~IOCoordinator();
        void willRead(const char *filename, off_t offset, size_t length);
        /* TODO: make read, write, append return a ssize_t */
        ssize_t read(const char *filename, uint8_t *data, off_t offset, size_t length);
        ssize_t write(const char *filename, const uint8_t *data, off_t offset, size_t length);
@@ -58,7 +58,6 @@ class IOCoordinator : public boost::noncopyable
        /* Lock manipulation fcns.  They can lock on any param given to them.  For convention's sake,
           the parameter should mostly be the abs filename being accessed. */
        void renameObject(const std::string &oldKey, const std::string &newKey);
        void readLock(const std::string &filename);
        void writeLock(const std::string &filename);
        void readUnlock(const std::string &filename);
@@ -75,6 +74,7 @@ class IOCoordinator : public boost::noncopyable
        Cache *cache;
        SMLogging *logger;
        Replicator *replicator;
        Ownership ownership;   // ACK!  Need a new name for this!
        size_t objectSize;
        boost::filesystem::path journalPath;
@@ -87,7 +87,9 @@ class IOCoordinator : public boost::noncopyable
        void remove(const boost::filesystem::path &path);
        void deleteMetaFile(const boost::filesystem::path &file);
-        ssize_t _write(const char *filename, const uint8_t *data, off_t offset, size_t length);
+        int _truncate(const boost::filesystem::path &path, size_t newsize, ScopedFileLock *lock);
        ssize_t _write(const char *filename, const uint8_t *data, off_t offset, size_t length, 
            const boost::filesystem::path &firstDir);
        int loadObjectAndJournal(const char *objFilename, const char *journalFilename, 
            uint8_t *data, off_t offset, size_t length) const;
--- a/src/MetadataFile.cpp
+++ b/src/MetadataFile.cpp
@@ -49,7 +49,7 @@ MetadataFile::MetadataConfig::MetadataConfig()
    catch (...)
    {
        logger->log(LOG_CRIT, "ObjectStorage/object_size must be set to a numeric value");
-        throw runtime_error("Please set ObjectStorage/journal_path in the storagemanager.cnf file");
+        throw runtime_error("Please set ObjectStorage/object)size in the storagemanager.cnf file");
    }
    try
@@ -57,8 +57,8 @@ MetadataFile::MetadataConfig::MetadataConfig()
        msMetadataPath = config->getValue("ObjectStorage", "metadata_path");
        if (msMetadataPath.empty())
        {
-            logger->log(LOG_CRIT, "ObjectStorage/journal_path is not set");
+            logger->log(LOG_CRIT, "ObjectStorage/metadata_path is not set");
-            throw runtime_error("Please set ObjectStorage/journal_path in the storagemanager.cnf file");
+            throw runtime_error("Please set ObjectStorage/metadata_path in the storagemanager.cnf file");
        }
    }
    catch (...)
--- a/src/Ownership.cpp
+++ b/src/Ownership.cpp
@@ -0,0 +1,303 @@
 #include "Ownership.h"
 #include "Config.h"
 #include "Cache.h"
 #include "Synchronizer.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <boost/filesystem.hpp>
 using namespace std;
 namespace bf=boost::filesystem;
 namespace storagemanager
 {
 Ownership::Ownership()
 {
    Config *config = Config::get();
    logger = SMLogging::get();
    string sPrefixDepth = config->getValue("ObjectStorage", "common_prefix_depth");
    if (sPrefixDepth.empty())
    {
        const char *msg = "Ownership: Need to specify ObjectStorage/common_prefix_depth in the storagemanager.cnf file";
        logger->log(LOG_CRIT, msg);
        throw runtime_error(msg);
    }
    try
    {
        prefixDepth = stoul(sPrefixDepth, NULL, 0);
    }
    catch (invalid_argument &e)
    {
        const char *msg = "Ownership: Invalid value in ObjectStorage/common_prefix_depth";
        logger->log(LOG_CRIT, msg);
        throw runtime_error(msg);
    }
    metadataPrefix = config->getValue("ObjectStorage", "metadata_path");
    if (metadataPrefix.empty())
    {
        const char *msg = "Ownership: Need to specify ObjectStorage/metadata_path in the storagemanager.cnf file";
        logger->log(LOG_CRIT, msg);
        throw runtime_error(msg);
    }
    monitor = new Monitor(this);
 }
 Ownership::~Ownership()
 {
    delete monitor;
    for (auto &it : ownedPrefixes)
        releaseOwnership(it.first, true);
 }
 bf::path Ownership::get(const bf::path &p)
 {
    bf::path ret, prefix;
    bf::path::const_iterator pit;
    uint i;
    //cerr << "Ownership::get() param = " << p.string() << endl;
    if (prefixDepth > 0)
    {
        for (i = 0, pit = p.begin(); i <= prefixDepth && pit != p.end(); ++i, ++pit)
            ;
        if (pit != p.end())
            prefix = *pit;
        //cerr << "prefix is " << prefix.string() << endl;
        for (; pit != p.end(); ++pit)
            ret /= *pit;
        if (ret.empty())
        {
            //cerr << "returning ''" << endl;
            return ret;
        }
    }
    else
    {
        ret = p;
        prefix = *(p.begin());
    }
    mutex.lock();
    if (ownedPrefixes.find(prefix) == ownedPrefixes.end())
    {
        mutex.unlock();
        takeOwnership(prefix);
    }
    else
    {
        // todo...  replace this polling, and the similar polling in Cache, with proper condition vars.
        while (ownedPrefixes[prefix] == false)
        {
            mutex.unlock();
            sleep(1);
            mutex.lock();
        }
        mutex.unlock();
    }
    //cerr << "returning " << ret.string() << endl;
    return ret;
 }
 // minor timesaver
 #define TOUCH(p, f) { \
    int fd = ::open((metadataPrefix/p/f).string().c_str(), O_TRUNC | O_CREAT | O_WRONLY, 0660); \
    if (fd >= 0) \
        ::close(fd); \
    else \
    { \
        char buf[80]; int saved_errno = errno; \
        cerr << "failed to touch " << metadataPrefix/p/f << " got " << strerror_r(saved_errno, buf, 80) << endl; \
    } \
 }
 #define DELETE(p, f) ::unlink((metadataPrefix/p/f).string().c_str());
 void Ownership::touchFlushing(const bf::path &prefix, volatile bool *doneFlushing) const
 {
    while (!*doneFlushing)
    {
        TOUCH(prefix, "FLUSHING");
        try
        {
            boost::this_thread::sleep_for(boost::chrono::seconds(1));
        }
        catch (boost::thread_interrupted &)
        { }
    }
 }
 void Ownership::releaseOwnership(const bf::path &p, bool isDtor)
 {
    logger->log(LOG_DEBUG, "Ownership: releasing ownership of %s", p.string().c_str());
    boost::unique_lock<boost::mutex> s(mutex);
    auto it = ownedPrefixes.find(p);
    if (it == ownedPrefixes.end())
    {
        logger->log(LOG_DEBUG, "Ownership::releaseOwnership(): told to disown %s, but do not own it", p.string().c_str());
        return;
    }
    ownedPrefixes.erase(it);
    if (isDtor)
    {
        // This is a quick release.  If this is being destroyed, then it is through the graceful
        // shutdown mechanism, which will flush data separately.
        DELETE(p, "OWNED");
        DELETE(p, "FLUSHING");      
        return; 
    }
    s.unlock();
    volatile bool done = false;
    // start flushing
    boost::thread xfer([this, &p, &done] { this->touchFlushing(p, &done); });
    Cache::get()->dropPrefix(p);
    Synchronizer::get()->dropPrefix(p);
    done = true;
    xfer.interrupt();
    xfer.join();
    // update state
    DELETE(p, "OWNED");
    DELETE(p, "FLUSHING");
 }
 void Ownership::_takeOwnership(const bf::path &p)
 {
    logger->log(LOG_DEBUG, "Ownership: taking ownership of %s", p.string().c_str());
    bf::create_directories(metadataPrefix/p);
    DELETE(p, "FLUSHING");
    DELETE(p, "REQUEST_TRANSFER");
    // TODO: need to consider errors taking ownership
    TOUCH(p, "OWNED");
    mutex.lock();
    ownedPrefixes[p] = true;
    mutex.unlock();
    Synchronizer::get()->newPrefix(p);
    Cache::get()->newPrefix(p);
 }
 void Ownership::takeOwnership(const bf::path &p)
 {
    boost::unique_lock<boost::mutex> s(mutex);
    auto it = ownedPrefixes.find(p);
    if (it != ownedPrefixes.end())
        return;
    ownedPrefixes[p] = NULL;
    s.unlock();
    bool okToTransfer = false;
    struct stat statbuf;
    int err;
    char buf[80];
    bf::path ownedPath = metadataPrefix/p/"OWNED";
    bf::path flushingPath = metadataPrefix/p/"FLUSHING";
    // if it's not already owned, then we can take possession
    err = ::stat(ownedPath.string().c_str(), &statbuf);
    if (err && errno == ENOENT)
    {
        _takeOwnership(p);
        return;
    }
    TOUCH(p, "REQUEST_TRANSFER");
    time_t lastFlushTime = time(NULL);
    while (!okToTransfer && time(NULL) < lastFlushTime + 10)
    {
        // if the OWNED file is deleted or if the flushing file isn't touched after 10 secs
        // it is ok to take possession.
        err = ::stat(ownedPath.string().c_str(), &statbuf);
        if (err) 
        {
            if (errno == ENOENT)
                okToTransfer = true;
            else
                logger->log(LOG_CRIT, "Ownership::takeOwnership(): got '%s' doing stat of %s", strerror_r(errno, buf, 80), 
                    ownedPath.string().c_str());
        }
        err = ::stat(flushingPath.string().c_str(), &statbuf);
        if (err && errno != ENOENT)
            logger->log(LOG_CRIT, "Ownership::takeOwnership(): got '%s' doing stat of %s", strerror_r(errno, buf, 80), 
                flushingPath.string().c_str());
        else
        {
            logger->log(LOG_DEBUG, "Ownership: waiting to get %s", p.string().c_str());
            if (!err)
                lastFlushTime = statbuf.st_mtime;
        }
        if (!okToTransfer)
            sleep(1);
    }
    _takeOwnership(p);
 }
 Ownership::Monitor::Monitor(Ownership *_owner) : owner(_owner), stop(false)
 {
    thread = boost::thread([this] { this->watchForInterlopers(); });
 }
 Ownership::Monitor::~Monitor()
 {
    stop = true;
    thread.interrupt();
    thread.join();
 }
 void Ownership::Monitor::watchForInterlopers()
 {
    // look for requests to transfer ownership
    struct stat statbuf;
    int err;
    char buf[80];
    vector<bf::path> releaseList;
    while (!stop)
    {
        releaseList.clear();
        boost::unique_lock<boost::mutex> s(owner->mutex);
        for (auto &prefix : owner->ownedPrefixes)
        {
            if (stop)
                break;
            if (prefix.second == false)
                continue;
            bf::path p(owner->metadataPrefix/(prefix.first)/"REQUEST_TRANSFER");
            const char *cp = p.string().c_str();
            err = ::stat(cp, &statbuf);
            // release it if there's a release request only.  Log it if there's an error other than
            // that the file isn't there.
            if (err == 0)
                releaseList.push_back(prefix.first);
            if (err < 0 && errno != ENOENT)
                owner->logger->log(LOG_ERR, "Runner::watchForInterlopers(): failed to stat %s, got %s", cp, 
                    strerror_r(errno, buf, 80));
        }
        s.unlock();
        for (auto &prefix : releaseList)
            owner->releaseOwnership(prefix);
        if (stop)
            break;
        try 
        { 
            boost::this_thread::sleep_for(boost::chrono::seconds(1));
        }
        catch (boost::thread_interrupted &) 
        { }
    }
 }
 }
--- a/src/Ownership.h
+++ b/src/Ownership.h
@@ -0,0 +1,60 @@
 #ifndef OWNERSHIP_H_
 #define OWNERSHIP_H_
 #include <boost/filesystem/path.hpp>
 #include <boost/thread.hpp>
 #include <map>
 #include "SMLogging.h"
 /* This class tracks the ownership of each prefix and manages ownership transfer. 
   Could we come up with a better name btw? */
 namespace storagemanager
 {
 class Ownership : public boost::noncopyable
 {
    public:
        Ownership();
        ~Ownership();
        bool sharedFS();
        // returns the path "right shifted" by prefixDepth, and with ownership of that path.
        // on error it returns an empty path.
        boost::filesystem::path get(const boost::filesystem::path &);
    private:
        uint prefixDepth;
        boost::filesystem::path metadataPrefix;
        SMLogging *logger;
        void touchFlushing(const boost::filesystem::path &, volatile bool *) const;
        void takeOwnership(const boost::filesystem::path &);
        void releaseOwnership(const boost::filesystem::path &, bool isDtor = false);
        void _takeOwnership(const boost::filesystem::path &);
        struct Monitor
        {
            Monitor(Ownership *);
            ~Monitor();
            boost::thread thread;
            Ownership *owner;
            volatile bool stop;
            void watchForInterlopers();
        };
        // maps a prefix to a state.  ownedPrefixes[p] == false means it's being init'd, == true means it's ready for use.
        std::map<boost::filesystem::path, bool> ownedPrefixes;
        Monitor *monitor;
        boost::mutex mutex;
 };
 inline bool Ownership::sharedFS()
 {
    return prefixDepth >= 0;
 }
 }
 #endif
--- a/src/PrefixCache.cpp
+++ b/src/PrefixCache.cpp
@@ -0,0 +1,631 @@
 #include "PrefixCache.h"
 #include "Cache.h"
 #include "Config.h"
 #include "Downloader.h"
 #include "Synchronizer.h"
 #include <iostream>
 #include <syslog.h>
 #include <boost/filesystem.hpp>
 #include <boost/thread.hpp>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 using namespace std;
 namespace bf = boost::filesystem;
 namespace storagemanager
 {
 PrefixCache::PrefixCache(const bf::path &prefix) : firstDir(prefix), currentCacheSize(0)
 {
    Config *conf = Config::get();
    logger = SMLogging::get();
    replicator = Replicator::get();
    downloader = Cache::get()->getDownloader();
    string stmp = conf->getValue("Cache", "cache_size");
    if (stmp.empty()) 
    {
        logger->log(LOG_CRIT, "Cache/cache_size is not set");
        throw runtime_error("Please set Cache/cache_size in the storagemanager.cnf file");
    }
    try
    {
        maxCacheSize = stoul(stmp);
    }
    catch (invalid_argument &)
    {
        logger->log(LOG_CRIT, "Cache/cache_size is not a number");
        throw runtime_error("Please set Cache/cache_size to a number");
    }
    //cout << "Cache got cache size " << maxCacheSize << endl;
    stmp = conf->getValue("ObjectStorage", "object_size");
    if (stmp.empty()) 
    {
        logger->log(LOG_CRIT, "ObjectStorage/object_size is not set");
        throw runtime_error("Please set ObjectStorage/object_size in the storagemanager.cnf file");
    }
    try
    {
        objectSize = stoul(stmp);
    }
    catch (invalid_argument &)
    {
        logger->log(LOG_CRIT, "ObjectStorage/object_size is not a number");
        throw runtime_error("Please set ObjectStorage/object_size to a number");
    }
    cachePrefix = conf->getValue("Cache", "path");
    if (cachePrefix.empty())
    {
        logger->log(LOG_CRIT, "Cache/path is not set");
        throw runtime_error("Please set Cache/path in the storagemanager.cnf file");
    }
    cachePrefix /= firstDir;
    try 
    {
        bf::create_directories(cachePrefix);
    }
    catch (exception &e)
    {
        logger->log(LOG_CRIT, "Failed to create %s, got: %s", cachePrefix.string().c_str(), e.what());
        throw e;
    }
    stmp = conf->getValue("ObjectStorage", "journal_path");
    if (stmp.empty())
    {
        logger->log(LOG_CRIT, "ObjectStorage/journal_path is not set");
        throw runtime_error("Please set ObjectStorage/journal_path in the storagemanager.cnf file");
    }
    journalPrefix = stmp;
    journalPrefix /= firstDir;
    bf::create_directories(journalPrefix);
    try 
    {
        bf::create_directories(journalPrefix);
    }
    catch (exception &e)
    {
        logger->log(LOG_CRIT, "Failed to create %s, got: %s", journalPrefix.string().c_str(), e.what());
        throw e;
    }
    lru_mutex.lock();    // unlocked by populate() when it's done
    boost::thread t([this] { this->populate(); });
    t.detach();
 }
 PrefixCache::~PrefixCache()
 {
    /*  This and shutdown() need to do whatever is necessary to leave cache contents in a safe
        state on disk.  Does anything need to be done toward that?
    */
 }
 void PrefixCache::populate()
 {
    Synchronizer *sync = Synchronizer::get();
    bf::directory_iterator dir(cachePrefix);
    bf::directory_iterator dend;
    vector<string> newObjects;
    while (dir != dend)
    {
        // put everything in lru & m_lru
        const bf::path &p = dir->path();
        if (bf::is_regular_file(p))
        {
            lru.push_back(p.filename().string());
            auto last = lru.end();
            m_lru.insert(--last);
            currentCacheSize += bf::file_size(*dir);
            newObjects.push_back(p.filename().string());
        }
        else
            logger->log(LOG_WARNING, "Cache: found something in the cache that does not belong '%s'", p.string().c_str());
        ++dir;
    }
    sync->newObjects(firstDir, newObjects);
    newObjects.clear();
    // account for what's in the journal dir
    vector<pair<string, size_t> > newJournals;
    dir = bf::directory_iterator(journalPrefix);
    while (dir != dend)
    {
        const bf::path &p = dir->path();
        if (bf::is_regular_file(p))
        {
            if (p.extension() == ".journal")
            {
                size_t s = bf::file_size(*dir);
                currentCacheSize += s;
                newJournals.push_back(pair<string, size_t>(p.stem().string(), s));
            }
            else
                logger->log(LOG_WARNING, "Cache: found a file in the journal dir that does not belong '%s'", p.string().c_str());
        }
        else
            logger->log(LOG_WARNING, "Cache: found something in the journal dir that does not belong '%s'", p.string().c_str());
        ++dir;
    }
    lru_mutex.unlock();
    sync->newJournalEntries(firstDir, newJournals);
 }
 // be careful using this!  SM should be idle.  No ongoing reads or writes.
 void PrefixCache::validateCacheSize()
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    if (!doNotEvict.empty() || !toBeDeleted.empty())
    {
        cout << "Not safe to use validateCacheSize() at the moment." << endl;
        return;
    }
    size_t oldSize = currentCacheSize;
    currentCacheSize = 0;
    m_lru.clear();
    lru.clear();
    populate();
    if (oldSize != currentCacheSize)
        logger->log(LOG_DEBUG, "PrefixCache::validateCacheSize(): found a discrepancy.  Actual size is %lld, had %lld.",
            currentCacheSize, oldSize);
    else
        logger->log(LOG_DEBUG, "PrefixCache::validateCacheSize(): Cache size accounting agrees with reality for now.");
 }
 void PrefixCache::read(const vector<string> &keys)
 {
    /*  Move existing keys to the back of the LRU, start downloading nonexistant keys.
    */
    vector<const string *> keysToFetch;
    vector<int> dlErrnos;
    vector<size_t> dlSizes;
    boost::unique_lock<boost::mutex> s(lru_mutex);
    M_LRU_t::iterator mit;
    for (const string &key : keys)
    {
        mit = m_lru.find(key);
        if (mit != m_lru.end())
        {
            addToDNE(mit->lit);
            lru.splice(lru.end(), lru, mit->lit);   // move them to the back so they are last to pick for eviction 
        }
        else
        {
            // There's window where the file has been downloaded but is not yet
            // added to the lru structs.  However it is in the DNE.  If it is in the DNE, then it is also
            // in Downloader's map.  So, this thread needs to start the download if it's not in the 
            // DNE or if there's an existing download that hasn't finished yet.  Starting the download
            // includes waiting for an existing download to finish, which from this class's pov is the
            // same thing.
            if (doNotEvict.find(key) == doNotEvict.end() || downloader->inProgress(key))
                keysToFetch.push_back(&key);
            else
                cout << "Cache: detected and stopped a racey download" << endl;
            addToDNE(key);
        }
    }
    if (keysToFetch.empty())
        return;
    downloader->download(keysToFetch, &dlErrnos, &dlSizes, cachePrefix, &lru_mutex);
    size_t sum_sizes = 0;
    for (uint i = 0; i < keysToFetch.size(); ++i)
    {
        // downloads with size 0 didn't actually happen, either because it
        // was a preexisting download (another read() call owns it), or because
        // there was an error downloading it.  Use size == 0 as an indication of
        // what to add to the cache.  Also needs to verify that the file was not deleted,
        // indicated by existence in doNotEvict.
        if (dlSizes[i] != 0)
        {
            if (doNotEvict.find(*keysToFetch[i]) != doNotEvict.end())
            {
                sum_sizes += dlSizes[i];
                lru.push_back(*keysToFetch[i]);
                LRU_t::iterator lit = lru.end();
                m_lru.insert(--lit);  // I dislike this way of grabbing the last iterator in a list.
            }
            else    // it was downloaded, but a deletion happened so we have to toss it
            {
                cout << "removing a file that was deleted by another thread during download" << endl;
                bf::remove(cachePrefix / (*keysToFetch[i]));
            }
        }
    }
    // move everything in keys to the back of the lru (yes, again)
    for (const string &key : keys)
    {
        mit = m_lru.find(key);
        if (mit != m_lru.end())    // all of the files exist, just not all of them are 'owned by' this thread.
            lru.splice(lru.end(), lru, mit->lit);
    }
    // fix cache size
    //_makeSpace(sum_sizes);
    currentCacheSize += sum_sizes;
 }
 void PrefixCache::doneReading(const vector<string> &keys)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    for (const string &key : keys)
    {
        removeFromDNE(key);
        // most should be in the map.
        // debateable whether it's faster to look up the list iterator and use it
        // or whether it's faster to bypass that and use strings only.
        //const auto &it = m_lru.find(key);
        //if (it != m_lru.end())
        //    removeFromDNE(it->lit);
    }
    _makeSpace(0);
 }
 void PrefixCache::doneWriting()
 {
    makeSpace(0);
 }
 PrefixCache::DNEElement::DNEElement(const LRU_t::iterator &k) : key(k), refCount(1)
 {
 }
 PrefixCache::DNEElement::DNEElement(const string &k) : sKey(k), refCount(1)
 {
 }
 void PrefixCache::addToDNE(const DNEElement &key)
 {
    DNE_t::iterator it = doNotEvict.find(key);
    if (it != doNotEvict.end())
    {
        DNEElement &dnee = const_cast<DNEElement &>(*it);
        ++(dnee.refCount);
    }
    else
        doNotEvict.insert(key);
 }
 void PrefixCache::removeFromDNE(const DNEElement &key)
 {
    DNE_t::iterator it = doNotEvict.find(key);
    if (it == doNotEvict.end())
        return;
    DNEElement &dnee = const_cast<DNEElement &>(*it);
    if (--(dnee.refCount) == 0)
        doNotEvict.erase(it);
 }
 const bf::path & PrefixCache::getCachePath()
 {
    return cachePrefix;
 }
 const bf::path & PrefixCache::getJournalPath()
 {
    return journalPrefix;
 }
 void PrefixCache::exists(const vector<string> &keys, vector<bool> *out) const
 {
    out->resize(keys.size());
    boost::unique_lock<boost::mutex> s(lru_mutex);
    for (uint i = 0; i < keys.size(); i++)
        (*out)[i] = (m_lru.find(keys[i]) != m_lru.end());
 }
 bool PrefixCache::exists(const string &key) const
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    return m_lru.find(key) != m_lru.end();
 }
 void PrefixCache::newObject(const string &key, size_t size)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    assert(m_lru.find(key) == m_lru.end());
    //_makeSpace(size);
    lru.push_back(key);
    LRU_t::iterator back = lru.end();
    m_lru.insert(--back);
    currentCacheSize += size;
 }
 void PrefixCache::newJournalEntry(size_t size)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    //_makeSpace(size);
    currentCacheSize += size;
 }
 void PrefixCache::deletedJournal(size_t size)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    assert(currentCacheSize >= size);
    currentCacheSize -= size;
 }
 void PrefixCache::deletedObject(const string &key, size_t size)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    assert(currentCacheSize >= size);
    M_LRU_t::iterator mit = m_lru.find(key);
    assert(mit != m_lru.end());
    // if it's being flushed, let makeSpace() do the deleting
    if (toBeDeleted.find(mit->lit) == toBeDeleted.end())
    {
        doNotEvict.erase(mit->lit);
        lru.erase(mit->lit);
        m_lru.erase(mit);
        currentCacheSize -= size;
    }
 }
 void PrefixCache::setMaxCacheSize(size_t size)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    if (size < maxCacheSize)
        _makeSpace(maxCacheSize - size);
    maxCacheSize = size;
 }
 void PrefixCache::makeSpace(size_t size)
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    _makeSpace(size);
 }
 size_t PrefixCache::getMaxCacheSize() const
 {
    return maxCacheSize;
 }
 // call this holding lru_mutex
 void PrefixCache::_makeSpace(size_t size)
 {
    ssize_t thisMuch = currentCacheSize + size - maxCacheSize;
    if (thisMuch <= 0)
        return;
    LRU_t::iterator it;
    while (thisMuch > 0 && !lru.empty())
    {
        it = lru.begin();
        // find the first element not being either read() right now or being processed by another
        // makeSpace() call.
        while (it != lru.end())
        {
            // make sure it's not currently being read or being flushed by another _makeSpace() call
            if ((doNotEvict.find(it) == doNotEvict.end()) && (toBeDeleted.find(it) == toBeDeleted.end()))
                break;
            ++it;
        }
        if (it == lru.end())
        {
            // nothing can be deleted right now
            return;
        }
        if (!bf::exists(cachePrefix / *it))
            cout << cachePrefix / *it << " doesn't exist, WTF?" << endl;  // ran into this a couple times, still happens as of commit 948ee1aa5
        assert(bf::exists(cachePrefix / *it));
        /* 
            tell Synchronizer that this key will be evicted
            delete the file
            remove it from our structs
            update current size
        */
        //logger->log(LOG_WARNING, "Cache:  flushing!");
        toBeDeleted.insert(it);
        string key = *it;    // need to make a copy; it could get changed after unlocking.
        lru_mutex.unlock();
        try 
        {
            Synchronizer::get()->flushObject(firstDir, key);
        }
        catch (...)
        {
            // it gets logged by Sync
            lru_mutex.lock();
            toBeDeleted.erase(it);
            continue;
        }
        lru_mutex.lock();
        // check doNotEvict again in case this object is now being read
        if (doNotEvict.find(it) == doNotEvict.end())
        {
            bf::path cachedFile = cachePrefix / *it;
            m_lru.erase(*it);
            toBeDeleted.erase(it);
            lru.erase(it);
            size_t newSize = bf::file_size(cachedFile);
            replicator->remove(cachedFile, Replicator::LOCAL_ONLY);
            if (newSize < currentCacheSize)
            {
                currentCacheSize -= newSize;
                thisMuch -= newSize;
            }
            else
            {
                logger->log(LOG_WARNING, "PrefixCache::makeSpace(): accounting error.  Almost wrapped currentCacheSize on flush.");
                currentCacheSize = 0;
                thisMuch = 0;
            }
        }
        else
            toBeDeleted.erase(it);
    }
 }
 void PrefixCache::rename(const string &oldKey, const string &newKey, ssize_t sizediff)
 {
    // rename it in the LRU
    // erase/insert to rehash it everywhere else
    boost::unique_lock<boost::mutex> s(lru_mutex);
    auto it = m_lru.find(oldKey);
    if (it == m_lru.end())
        return;
    auto lit = it->lit;
    m_lru.erase(it);
    int refCount = 0;
    auto dne_it = doNotEvict.find(lit);
    if (dne_it != doNotEvict.end())
    {
        refCount = dne_it->refCount;
        doNotEvict.erase(dne_it);
    }
    auto tbd_it = toBeDeleted.find(lit);
    bool hasTBDEntry = (tbd_it != toBeDeleted.end());
    if (hasTBDEntry)
        toBeDeleted.erase(tbd_it);
    *lit = newKey;
    if (hasTBDEntry)
        toBeDeleted.insert(lit);
    if (refCount != 0)
    {
        pair<DNE_t::iterator, bool> dne_tmp = doNotEvict.insert(lit);
        const_cast<DNEElement &>(*(dne_tmp.first)).refCount = refCount;
    }
    m_lru.insert(lit);
    currentCacheSize += sizediff;
 }
 int PrefixCache::ifExistsThenDelete(const string &key)
 {
    bf::path cachedPath = cachePrefix / key;
    bf::path journalPath = journalPrefix / (key + ".journal");
    boost::unique_lock<boost::mutex> s(lru_mutex);
    bool objectExists = false;
    auto it = m_lru.find(key);
    if (it != m_lru.end())
    {
        if (toBeDeleted.find(it->lit) == toBeDeleted.end())
        {
            doNotEvict.erase(it->lit);
            lru.erase(it->lit);
            m_lru.erase(it);
            objectExists = true;
        }
        else  // let makeSpace() delete it if it's already in progress
            return 0;
    }
    bool journalExists = bf::exists(journalPath);
    //assert(objectExists == bf::exists(cachedPath));
    size_t objectSize = (objectExists ? bf::file_size(cachedPath) : 0);
    //size_t objectSize = (objectExists ? MetadataFile::getLengthFromKey(key) : 0);
    size_t journalSize = (journalExists ? bf::file_size(journalPath) : 0);
    currentCacheSize -= (objectSize + journalSize);
    //assert(!objectExists || objectSize == bf::file_size(cachedPath));
    return (objectExists ? 1 : 0) | (journalExists ? 2 : 0);
 }
 size_t PrefixCache::getCurrentCacheSize() const
 {
    return currentCacheSize;
 }
 size_t PrefixCache::getCurrentCacheElementCount() const
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    assert(m_lru.size() == lru.size());
    return m_lru.size();
 }
 void PrefixCache::reset()
 {
    boost::unique_lock<boost::mutex> s(lru_mutex);
    m_lru.clear();
    lru.clear();
    toBeDeleted.clear();
    doNotEvict.clear();
    bf::directory_iterator dir;
    bf::directory_iterator dend;
    for (dir = bf::directory_iterator(cachePrefix); dir != dend; ++dir)
        bf::remove_all(dir->path());
    for (dir = bf::directory_iterator(journalPrefix); dir != dend; ++dir)
        bf::remove_all(dir->path());
    currentCacheSize = 0;
 }
 void PrefixCache::shutdown()
 {
    /* Does this need to do something anymore? */
 }
 /* The helper classes */
 PrefixCache::M_LRU_element_t::M_LRU_element_t(const string *k) : key(k)
 {}
 PrefixCache::M_LRU_element_t::M_LRU_element_t(const string &k) : key(&k)
 {}
 PrefixCache::M_LRU_element_t::M_LRU_element_t(const LRU_t::iterator &i) : key(&(*i)), lit(i)
 {}
 inline size_t PrefixCache::KeyHasher::operator()(const M_LRU_element_t &l) const
 {
    return hash<string>()(*(l.key));
 }
 inline bool PrefixCache::KeyEquals::operator()(const M_LRU_element_t &l1, const M_LRU_element_t &l2) const
 {
    return (*(l1.key) == *(l2.key));
 }
 inline size_t PrefixCache::DNEHasher::operator()(const DNEElement &l) const
 {
    return (l.sKey.empty() ? hash<string>()(*(l.key)) : hash<string>()(l.sKey));
 }
 inline bool PrefixCache::DNEEquals::operator()(const DNEElement &l1, const DNEElement &l2) const
 {
    const string *s1, *s2;
    s1 = l1.sKey.empty() ? &(*(l1.key)) : &(l1.sKey);
    s2 = l2.sKey.empty() ? &(*(l2.key)) : &(l2.sKey);
    return (*s1 == *s2);
 }
 inline bool PrefixCache::TBDLess::operator()(const LRU_t::iterator &i1, const LRU_t::iterator &i2) const
 {
    return *i1 < *i2;
 }
 }
--- a/src/PrefixCache.h
+++ b/src/PrefixCache.h
@@ -0,0 +1,153 @@
 #ifndef PREFIXCACHE_H_
 #define PREFIXCACHE_H_
 /* PrefixCache manages the cache for one prefix managed by SM. 
   Cache is a map of prefix -> PrefixCache, and holds the items
   that should be centralized like the Downloader
 */
 #include "Downloader.h"
 #include "SMLogging.h"
 #include "Replicator.h"
 #include <string>
 #include <vector>
 #include <list>
 #include <unordered_set>
 #include <boost/utility.hpp>
 #include <boost/filesystem/path.hpp>
 #include <boost/thread/mutex.hpp>
 namespace storagemanager
 {
 class PrefixCache : public boost::noncopyable
 {
    public:
        PrefixCache(const boost::filesystem::path &prefix);
        virtual ~PrefixCache();
        //reading fcns
        // read() marks objects to be read s.t. they do not get flushed.
        // after reading them, unlock the 'logical file', and call doneReading().
        void read(const std::vector<std::string> &keys);
        void doneReading(const std::vector<std::string> &keys);
        bool exists(const std::string &key) const;
        void exists(const std::vector<std::string> &keys, std::vector<bool> *out) const;
        // writing fcns
        // new*() fcns tell the PrefixCache data was added.  After writing a set of objects,
        // unlock the 'logical file', and call doneWriting().
        void newObject(const std::string &key, size_t size);
        void newJournalEntry(size_t size);
        void doneWriting();
        void deletedObject(const std::string &key, size_t size);
        void deletedJournal(size_t size);
        // an 'atomic' existence check & delete.  Covers the object and journal.  Does not delete the files.
        // returns 0 if it didn't exist, 1 if the object exists, 2 if the journal exists, and 3 (1 | 2) if both exist
        // This should be called while holding the file lock for key because it touches the journal file.
        int ifExistsThenDelete(const std::string &key);   
        // rename is used when an old obj gets merged with its journal file
        // the size will change in that process; sizediff is by how much
        void rename(const std::string &oldKey, const std::string &newKey, ssize_t sizediff);
        void setMaxCacheSize(size_t size);
        void makeSpace(size_t size);
        size_t getCurrentCacheSize() const;
        size_t getCurrentCacheElementCount() const;
        size_t getMaxCacheSize() const;
        void shutdown();
        // test helpers
        const boost::filesystem::path &getCachePath();
        const boost::filesystem::path &getJournalPath();
        // this will delete everything in the PrefixCache and journal paths, and empty all PrefixCache structures.
        void reset();
        void validateCacheSize();
    private:
        PrefixCache();
        boost::filesystem::path cachePrefix;
        boost::filesystem::path journalPrefix;
        boost::filesystem::path firstDir;
        size_t maxCacheSize;
        size_t objectSize;
        size_t currentCacheSize;
        Replicator *replicator;
        SMLogging *logger;
        Downloader *downloader;
        void populate();
        void _makeSpace(size_t size);
        /* The main PrefixCache structures */
        // lru owns the string memory for the filenames it manages.  m_lru and DNE point to those strings.
        typedef std::list<std::string> LRU_t;
        LRU_t lru;
        struct M_LRU_element_t
        {
            M_LRU_element_t(const std::string &);
            M_LRU_element_t(const std::string *);
            M_LRU_element_t(const LRU_t::iterator &);
            const std::string *key;
            LRU_t::iterator lit;
        };
        struct KeyHasher
        {
            size_t operator()(const M_LRU_element_t &l) const;
        };
        struct KeyEquals 
        {
            bool operator()(const M_LRU_element_t &l1, const M_LRU_element_t &l2) const;
        };
        typedef std::unordered_set<M_LRU_element_t, KeyHasher, KeyEquals> M_LRU_t;
        M_LRU_t m_lru;   // the LRU entries as a hash table
        /* The do-not-evict list stuff. */
        struct DNEElement
        {
            DNEElement(const LRU_t::iterator &);
            DNEElement(const std::string &);
            LRU_t::iterator key;
            std::string sKey;
            uint refCount;
        };
        struct DNEHasher
        {
            size_t operator()(const DNEElement &d) const;
        };
        struct DNEEquals
        {
            bool operator()(const DNEElement &d1, const DNEElement &d2) const;
        };
        typedef std::unordered_set<DNEElement, DNEHasher, DNEEquals> DNE_t;
        DNE_t doNotEvict;
        void addToDNE(const DNEElement &);
        void removeFromDNE(const DNEElement &);
        // the to-be-deleted set.  Elements removed from the LRU but not yet deleted will be here.
        // Elements are inserted and removed by makeSpace().  If read() references a file that is in this,
        // it will remove it, signalling to makeSpace that it should not be deleted
        struct TBDLess
        {
            bool operator()(const LRU_t::iterator &, const LRU_t::iterator &) const;
        };
        typedef std::set<LRU_t::iterator, TBDLess> TBD_t;
        TBD_t toBeDeleted;
        mutable boost::mutex lru_mutex;   // protects the main PrefixCache structures & the do-not-evict set
 };
 }
 #endif
--- a/src/ReadTask.cpp
+++ b/src/ReadTask.cpp
@@ -56,7 +56,6 @@ bool ReadTask::run()
    // todo: do the reading and writing in chunks
    // todo: need to make this use O_DIRECT on the IOC side
    ioc->willRead(cmd->filename, cmd->offset, cmd->count);
    ssize_t err;
    while ((uint) resp->returnCode < cmd->count)
    {
--- a/src/Replicator.cpp
+++ b/src/Replicator.cpp
@@ -121,6 +121,7 @@ int Replicator::addJournalEntry(const char *filename, const uint8_t *data, off_t
    size_t count = 0;
    int version = 1;
    string journalFilename = msJournalPath + "/" + string(filename) + ".journal";
    boost::filesystem::path firstDir = *(boost::filesystem::path(filename).begin());
    uint64_t thisEntryMaxOffset = (offset + length - 1);
    bool exists = boost::filesystem::exists(journalFilename);
@@ -135,7 +136,7 @@ int Replicator::addJournalEntry(const char *filename, const uint8_t *data, off_t
        assert((uint) err == header.length() + 1);
        if (err <= 0)
            return err;
-        Cache::get()->newJournalEntry(header.length() + 1);
+        Cache::get()->newJournalEntry(firstDir, header.length() + 1);
    }
    else
    {
--- a/src/Synchronizer.cpp
+++ b/src/Synchronizer.cpp
@@ -59,8 +59,8 @@ Synchronizer::Synchronizer() : maxUploads(0)
    threadPool.reset(new ThreadPool());
    threadPool->setMaxThreads(maxUploads);
    die = false;
    uncommittedJournalSize = 0;
    journalSizeThreshold = cache->getMaxCacheSize() / 2;
    blockNewJobs = false;
    syncThread = boost::thread([this] () { this->periodicSync(); });
 }
@@ -84,9 +84,28 @@ enum OpFlags
    NEW_OBJECT = 0x4,
 };
-void Synchronizer::_newJournalEntry(const string &key, size_t size)
+/* XXXPAT.  Need to revisit this later.  To make multiple prefix functionality as minimal as possible,
 I limited the changes to key string manipulation where possible.  The keys it manages have the prefix they 
 belong to prepended.  So key 12345 in prefix p1 becomes p1/12345.  This is not the most elegant or performant
 option, just the least invasive.
 */
 void Synchronizer::newPrefix(const bf::path &p)
 {
-    uncommittedJournalSize += size;
+    uncommittedJournalSize[p] = 0;
 }
 void Synchronizer::dropPrefix(const bf::path &p)
 {
    syncNow(p);
    boost::unique_lock<boost::mutex> s(mutex);
    uncommittedJournalSize.erase(p);
 }
 void Synchronizer::_newJournalEntry(const bf::path &prefix, const string &_key, size_t size)
 {
    string key = (prefix/_key).string();
    uncommittedJournalSize[prefix] += size;
    auto it = pendingOps.find(key);
    if (it != pendingOps.end())
    {
@@ -97,61 +116,64 @@ void Synchronizer::_newJournalEntry(const string &key, size_t size)
    pendingOps[key] = boost::shared_ptr<PendingOps>(new PendingOps(JOURNAL));
 }
-void Synchronizer::newJournalEntry(const string &key, size_t size)
+void Synchronizer::newJournalEntry(const bf::path &prefix, const string &_key, size_t size)
 {
    boost::unique_lock<boost::mutex> s(mutex);
-    _newJournalEntry(key, size);
+    _newJournalEntry(prefix, _key, size);
-    if (uncommittedJournalSize > journalSizeThreshold)
+    if (uncommittedJournalSize[prefix] > journalSizeThreshold)
    {
-        uncommittedJournalSize = 0;
+        uncommittedJournalSize[prefix] = 0;
        s.unlock();
        forceFlush();
    }
 }
-void Synchronizer::newJournalEntries(const vector<pair<string, size_t> > &keys)
+void Synchronizer::newJournalEntries(const bf::path &prefix, const vector<pair<string, size_t> > &keys)
 {
    boost::unique_lock<boost::mutex> s(mutex);
    for (auto &keysize : keys)
-        _newJournalEntry(keysize.first, keysize.second);
+        _newJournalEntry(prefix, keysize.first, keysize.second);
-    if (uncommittedJournalSize > journalSizeThreshold)
+    if (uncommittedJournalSize[prefix] > journalSizeThreshold)
    {
-        uncommittedJournalSize = 0;
+        uncommittedJournalSize[prefix] = 0;
        s.unlock();
        forceFlush();
    }
 }
-void Synchronizer::newObjects(const vector<string> &keys)
+void Synchronizer::newObjects(const bf::path &prefix, const vector<string> &keys)
 {
    boost::unique_lock<boost::mutex> s(mutex);
-    for (const string &key : keys)
+    for (const string &_key : keys)
    {
-        assert(pendingOps.find(key) == pendingOps.end());
+        bf::path key(prefix/_key);
        assert(pendingOps.find(key.string()) == pendingOps.end());
        //makeJob(key);
-        pendingOps[key] = boost::shared_ptr<PendingOps>(new PendingOps(NEW_OBJECT));
+        pendingOps[key.string()] = boost::shared_ptr<PendingOps>(new PendingOps(NEW_OBJECT));
    }
 }
-void Synchronizer::deletedObjects(const vector<string> &keys)
+void Synchronizer::deletedObjects(const bf::path &prefix, const vector<string> &keys)
 {
    boost::unique_lock<boost::mutex> s(mutex);
-    for (const string &key : keys)
+    for (const string &_key : keys)
    {
-        auto it = pendingOps.find(key);
+        bf::path key(prefix/_key);
        auto it = pendingOps.find(key.string());
        if (it != pendingOps.end())
            it->second->opFlags |= DELETE;
        else
-            pendingOps[key] = boost::shared_ptr<PendingOps>(new PendingOps(DELETE));
+            pendingOps[key.string()] = boost::shared_ptr<PendingOps>(new PendingOps(DELETE));
    }
    // would be good to signal to the things in opsInProgress that these were deleted.  That would
    // quiet down the logging somewhat.  How to do that efficiently, and w/o gaps or deadlock...
 }
-void Synchronizer::flushObject(const string &key)
+void Synchronizer::flushObject(const bf::path &prefix, const string &_key)
 {
    string key = (prefix/_key).string();
    boost::unique_lock<boost::mutex> s(mutex);
    // if there is something to do on key, it should be either in pendingOps or opsInProgress
@@ -192,7 +214,7 @@ void Synchronizer::flushObject(const string &key)
    bool keyExists, journalExists;
    int err;
    do {
-        err = cs->exists(key.c_str(), &keyExists);
+        err = cs->exists(_key.c_str(), &keyExists);
        if (err)
        {
            char buf[80];
@@ -239,14 +261,37 @@ void Synchronizer::periodicSync()
            //logger->log(LOG_DEBUG,"Synchronizer Force Flush.");
        }
        lock.lock();
        if (blockNewJobs)
            continue;
        //cout << "Sync'ing " << pendingOps.size() << " objects" << " queue size is " <<
        //    threadPool.currentQueueSize() << endl;
        for (auto &job : pendingOps)
            makeJob(job.first);
-        uncommittedJournalSize = 0;
+        for (auto it = uncommittedJournalSize.begin(); it != uncommittedJournalSize.end(); ++it)
            it->second = 0;
    }
 }
 void Synchronizer::syncNow(const bf::path &prefix)
 {
    boost::unique_lock<boost::mutex> lock(mutex);
    // this is pretty hacky.  when time permits, implement something better.
    //
    // Issue all of the pendingOps for the given prefix
    // recreate the threadpool (dtor returns once all jobs have finished)
    // resume normal operation
    blockNewJobs = true;
    for (auto &job : pendingOps)
        if (job.first.find(prefix.string()) == 0)
            makeJob(job.first);
    uncommittedJournalSize[prefix] = 0;
    threadPool.reset(new ThreadPool());
    threadPool->setMaxThreads(maxUploads);
    blockNewJobs = false;
 }
 void Synchronizer::forceFlush()
 {
    boost::unique_lock<boost::mutex> lock(mutex);
@@ -361,6 +406,9 @@ void Synchronizer::synchronize(const string &sourceFile, list<string>::iterator
    ScopedReadLock s(ioc, sourceFile);
    string &key = *it;
    size_t pos = key.find_first_of('/');
    bf::path prefix = key.substr(0, pos);
    string cloudKey = key.substr(pos + 1);
    char buf[80];
    bool exists = false;
    int err;
@@ -374,7 +422,7 @@ void Synchronizer::synchronize(const string &sourceFile, list<string>::iterator
    const metadataObject *mdEntry;
    bool entryExists = md.getEntry(MetadataFile::getOffsetFromKey(key), &mdEntry);
-    if (!entryExists || key != mdEntry->key)
+    if (!entryExists || cloudKey != mdEntry->key)
    {
        logger->log(LOG_DEBUG, "synchronize(): %s does not exist in metadata for %s.  This suggests truncation.", key.c_str(), sourceFile.c_str());
        return;
@@ -382,7 +430,7 @@ void Synchronizer::synchronize(const string &sourceFile, list<string>::iterator
    //assert(key == mdEntry->key);  <-- This could fail b/c of truncation + a write/append before this job runs.
-    err = cs->exists(key, &exists);
+    err = cs->exists(cloudKey, &exists);
    if (err)
        throw runtime_error(string("synchronize(): checking existence of ") + key + ", got " + 
            strerror_r(errno, buf, 80));
@@ -390,14 +438,14 @@ void Synchronizer::synchronize(const string &sourceFile, list<string>::iterator
        return;
    // TODO: should be safe to check with Cache instead of a file existence check
-    exists = cache->exists(key);
+    exists = cache->exists(prefix, cloudKey);
    if (!exists)
    {
        logger->log(LOG_DEBUG, "synchronize(): was told to upload %s but it does not exist locally", key.c_str());
        return;
    }
-    err = cs->putObject((cachePath / key).string(), key);
+    err = cs->putObject((cachePath / key).string(), cloudKey);
    if (err)
        throw runtime_error(string("synchronize(): uploading ") + key + ", got " + strerror_r(errno, buf, 80));
    replicator->remove((cachePath/key).string().c_str(), Replicator::NO_LOCAL);
@@ -406,7 +454,8 @@ void Synchronizer::synchronize(const string &sourceFile, list<string>::iterator
 void Synchronizer::synchronizeDelete(const string &sourceFile, list<string>::iterator &it)
 {
    ScopedWriteLock s(ioc, sourceFile);
-    cs->deleteObject(*it);
+    string cloudKey = it->substr(it->find('/') + 1);
    cs->deleteObject(cloudKey);
 }
 void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>::iterator &lit)
@@ -415,6 +464,10 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
    char buf[80];
    string key = *lit;
    size_t pos = key.find_first_of('/');
    bf::path prefix = key.substr(0, pos);
    string cloudKey = key.substr(pos + 1);
    MetadataFile md(sourceFile.c_str(), MetadataFile::no_create_t());
    if (!md.exists())
@@ -425,7 +478,7 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
    const metadataObject *mdEntry;
    bool metaExists = md.getEntry(MetadataFile::getOffsetFromKey(key), &mdEntry);
-    if (!metaExists || key != mdEntry->key)
+    if (!metaExists || cloudKey != mdEntry->key)
    {
        logger->log(LOG_DEBUG, "synchronizeWithJournal(): %s does not exist in metadata for %s.  This suggests truncation.", key.c_str(), sourceFile.c_str());
        return;
@@ -442,12 +495,12 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
        // sanity check + add'l info.  Test whether the object exists in cloud storage.  If so, complain,
        // and run synchronize() instead.
        bool existsOnCloud;
-        int err = cs->exists(key, &existsOnCloud);
+        int err = cs->exists(cloudKey, &existsOnCloud);
        if (err)
            throw runtime_error(string("Synchronizer: cs->exists() failed: ") + strerror_r(errno, buf, 80));
        if (!existsOnCloud)
        {
-            if (cache->exists(key))
+            if (cache->exists(prefix, cloudKey))
            {
                logger->log(LOG_DEBUG, "synchronizeWithJournal(): %s has no journal and does not exist in the cloud, calling "
                    "synchronize() instead.  Need to explain how this happens.", key.c_str());
@@ -470,13 +523,13 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
    boost::shared_array<uint8_t> data;
    size_t count = 0, size = mdEntry->length;
-    bool oldObjIsCached = cache->exists(key);
+    bool oldObjIsCached = cache->exists(prefix, cloudKey);
    // get the base object if it is not already cached
    // merge it with its journal file
    if (!oldObjIsCached)
    {
-        err = cs->getObject(key, &data, &size);
+        err = cs->getObject(cloudKey, &data, &size);
        if (err)
        {
            if (errno == ENOENT)
@@ -524,14 +577,15 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
    }
    // get a new key for the resolved version & upload it
-    string newKey = MetadataFile::getNewKeyFromOldKey(key, size);
+    string newCloudKey = MetadataFile::getNewKeyFromOldKey(key, size);
-    err = cs->putObject(data, size, newKey);
+    string newKey = (prefix/newCloudKey).string();
    err = cs->putObject(data, size, newCloudKey);
    if (err)
    {
        // try to delete it in cloud storage... unlikely it is there in the first place, and if it is
        // this probably won't work
        int l_errno = errno;
-        cs->deleteObject(newKey);    
+        cs->deleteObject(newCloudKey);    
        throw runtime_error(string("Synchronizer: putObject() failed: ") + strerror_r(l_errno, buf, 80));
    }
@@ -562,22 +616,21 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
            }
            count += err;
        }
-        cache->rename(key, newKey, size - bf::file_size(oldCachePath));
+        cache->rename(prefix, cloudKey, newCloudKey, size - bf::file_size(oldCachePath));
        replicator->remove(oldCachePath);
    }
    // update the metadata for the source file
-    md.updateEntry(MetadataFile::getOffsetFromKey(key), newKey, size);
+    md.updateEntry(MetadataFile::getOffsetFromKey(key), newCloudKey, size);
    replicator->updateMetadata(sourceFile.c_str(), md);
    rename(key, newKey);
    ioc->renameObject(key, newKey);
    // delete the old object & journal file
-    cache->deletedJournal(bf::file_size(journalName));
+    cache->deletedJournal(prefix, bf::file_size(journalName));
    replicator->remove(journalName);
-    cs->deleteObject(key);
+    cs->deleteObject(cloudKey);
 }
 void Synchronizer::rename(const string &oldKey, const string &newKey)
@@ -639,4 +692,12 @@ void Synchronizer::PendingOps::wait(boost::mutex *m)
    }
 }
 Synchronizer::Job::Job(Synchronizer *s, std::list<std::string>::iterator i) : sync(s), it(i)
 { }
 void Synchronizer::Job::operator()()
 { 
    sync->process(it);
 }
 }
--- a/src/Synchronizer.h
+++ b/src/Synchronizer.h
@@ -21,7 +21,6 @@ namespace storagemanager
 class Cache;  // break circular dependency in header files
 class IOCoordinator;
 /* TODO: Need to think about how errors are handled / propagated */
 class Synchronizer : public boost::noncopyable
 {
    public:
@@ -30,12 +29,16 @@ class Synchronizer : public boost::noncopyable
        // these take keys as parameters, not full path names, ex, pass in '12345' not
        // 'cache/12345'.
-        void newJournalEntry(const std::string &key, size_t len);
+        void newJournalEntry(const boost::filesystem::path &firstDir, const std::string &key, size_t len);
-        void newJournalEntries(const std::vector<std::pair<std::string, size_t> > &keys);
+        void newJournalEntries(const boost::filesystem::path &firstDir, const std::vector<std::pair<std::string, size_t> > &keys);
-        void newObjects(const std::vector<std::string> &keys);
+        void newObjects(const boost::filesystem::path &firstDir, const std::vector<std::string> &keys);
-        void deletedObjects(const std::vector<std::string> &keys);        
+        void deletedObjects(const boost::filesystem::path &firstDir, const std::vector<std::string> &keys);        
-        void flushObject(const std::string &key);
+        void flushObject(const boost::filesystem::path &firstDir, const std::string &key);
-        void forceFlush();
+        void forceFlush();  // ideally, make a version of this that takes a firstDir parameter
        void newPrefix(const boost::filesystem::path &p);
        void dropPrefix(const boost::filesystem::path &p);
        // for testing primarily
        boost::filesystem::path getJournalPath();
@@ -43,7 +46,7 @@ class Synchronizer : public boost::noncopyable
    private:
        Synchronizer();
-        void _newJournalEntry(const std::string &key, size_t len);
+        void _newJournalEntry(const boost::filesystem::path &firstDir, const std::string &key, size_t len);
        void process(std::list<std::string>::iterator key);
        void synchronize(const std::string &sourceFile, std::list<std::string>::iterator &it);
        void synchronizeDelete(const std::string &sourceFile, std::list<std::string>::iterator &it);
@@ -66,8 +69,8 @@ class Synchronizer : public boost::noncopyable
        struct Job : public ThreadPool::Job
        {
-            Job(Synchronizer *s, std::list<std::string>::iterator i) : sync(s), it(i) { }
+            Job(Synchronizer *s, std::list<std::string>::iterator i);
-            void operator()() { sync->process(it); }
+            void operator()();
            Synchronizer *sync;
            std::list<std::string>::iterator it;
        };
@@ -88,7 +91,11 @@ class Synchronizer : public boost::noncopyable
        boost::thread syncThread;
        const boost::chrono::seconds syncInterval = boost::chrono::seconds(10);
        void periodicSync();
-        size_t uncommittedJournalSize, journalSizeThreshold;
+        std::map<boost::filesystem::path, size_t> uncommittedJournalSize;
        size_t journalSizeThreshold;
        bool blockNewJobs;
        void syncNow(const boost::filesystem::path &prefix);  // a synchronous version of forceFlush()
        SMLogging *logger;
        Cache *cache;
--- a/src/Utilities.cpp
+++ b/src/Utilities.cpp
@@ -4,10 +4,19 @@
 namespace storagemanager
 {
-ScopedReadLock::ScopedReadLock(IOCoordinator *i, const std::string &k) : ioc(i), locked(false), key(k)
+ScopedFileLock::ScopedFileLock(IOCoordinator *i, const std::string &k) : ioc(i), locked(false), key(k)
 {
 }
 ScopedFileLock::~ScopedFileLock()
 {
 }
 ScopedReadLock::ScopedReadLock(IOCoordinator *i, const std::string &k) : ScopedFileLock(i, k)
 {
    lock();
 }
 ScopedReadLock::~ScopedReadLock()
 {
    unlock();
@@ -29,10 +38,11 @@ void ScopedReadLock::unlock()
    }
 }
-ScopedWriteLock::ScopedWriteLock(IOCoordinator *i, const std::string &k) : ioc(i), locked(false), key(k)
+ScopedWriteLock::ScopedWriteLock(IOCoordinator *i, const std::string &k) : ScopedFileLock(i, k)
 {
    lock();
 }
 ScopedWriteLock::~ScopedWriteLock()
 {
    unlock();
--- a/src/Utilities.h
+++ b/src/Utilities.h
@@ -11,28 +11,34 @@ class IOCoordinator;
 // a few utility classes we've coded here and there, now de-duped and centralized.
 // modify as necessary.
-struct ScopedReadLock
+
 struct ScopedFileLock
 {
-    ScopedReadLock(IOCoordinator *i, const std::string &k);
+    ScopedFileLock(IOCoordinator *i, const std::string &k);
-    ~ScopedReadLock();
+    virtual ~ScopedFileLock();
-    void lock();
+    
-    void unlock();
+    virtual void lock() = 0;
    virtual void unlock() = 0;
    IOCoordinator *ioc;
    bool locked;
    const std::string key;
 };
-struct ScopedWriteLock
+struct ScopedReadLock : public ScopedFileLock
 {
-    ScopedWriteLock(IOCoordinator *i, const std::string &k);
+    ScopedReadLock(IOCoordinator *i, const std::string &k);
-    ~ScopedWriteLock();
+    virtual ~ScopedReadLock();
    void lock();
    void unlock();
 };
-    IOCoordinator *ioc;
+struct ScopedWriteLock : public ScopedFileLock
-    bool locked;
+{
-    const std::string key;
+    ScopedWriteLock(IOCoordinator *i, const std::string &k);
    virtual ~ScopedWriteLock();
    void lock();
    void unlock();
 };
 struct ScopedCloser
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -68,11 +68,10 @@ int main(int argc, char** argv)
    SessionManager* sm = SessionManager::get();
    ret = sm->start();
    cache->shutdown();
    delete sync;
    delete cache;
    delete ioc;
--- a/src/unit_tests.cpp
+++ b/src/unit_tests.cpp
@@ -891,9 +891,9 @@ bool cacheTest1()
    // make sure nothing shows up in the cache path for files that don't exist
    v_bogus.push_back("does-not-exist");
-    cache->read(v_bogus);
+    cache->read("", v_bogus);
    assert(!bf::exists(cachePath / "does-not-exist"));
-    cache->exists(v_bogus, &exists);
+    cache->exists("", v_bogus, &exists);
    assert(exists.size() == 1);
    assert(!exists[0]);
@@ -901,21 +901,21 @@ bool cacheTest1()
    string realFile("storagemanager.cnf");
    bf::copy_file(realFile, storagePath / realFile, bf::copy_option::overwrite_if_exists);
    v_bogus[0] = realFile;
-    cache->read(v_bogus);
+    cache->read("", v_bogus);
    assert(bf::exists(cachePath / realFile));
    exists.clear();
-    cache->exists(v_bogus, &exists);
+    cache->exists("", v_bogus, &exists);
    assert(exists.size() == 1);
    assert(exists[0]);
    size_t currentSize = cache->getCurrentCacheSize();
    assert(currentSize == bf::file_size(cachePath / realFile));
    // lie about the file being deleted and then replaced
-    cache->deletedObject(realFile, currentSize);
+    cache->deletedObject("", realFile, currentSize);
    assert(cache->getCurrentCacheSize() == 0);
-    cache->newObject(realFile, currentSize);
+    cache->newObject("", realFile, currentSize);
    assert(cache->getCurrentCacheSize() == currentSize);
-    cache->exists(v_bogus, &exists);
+    cache->exists("", v_bogus, &exists);
    assert(exists.size() == 1);
    assert(exists[0]);
@@ -1025,13 +1025,13 @@ bool syncTest1()
    makeTestJournal((journalPath/journalName).string().c_str());
    makeTestMetadata((metaPath/"test-file.meta").string().c_str());
-    cache->newObject(key, bf::file_size(cachePath/key));
+    cache->newObject("", key, bf::file_size(cachePath/key));
-    cache->newJournalEntry(bf::file_size(journalPath/journalName));
+    cache->newJournalEntry("", bf::file_size(journalPath/journalName));
    vector<string> vObj;
    vObj.push_back(key);
-    sync->newObjects(vObj);
+    sync->newObjects("", vObj);
    sync->forceFlush();
    sleep(1);  // wait for the job to run
@@ -1041,12 +1041,12 @@ bool syncTest1()
    assert(!err);
    assert(exists);
-    sync->newJournalEntry(key, 0);
+    sync->newJournalEntry("", key, 0);
    sync->forceFlush();
    sleep(1);  // let it do what it does
    // check that the original objects no longer exist
-    assert(!cache->exists(key));
+    assert(!cache->exists("", key));
    assert(!bf::exists(journalPath/journalName));
    // Replicator doesn't implement all of its functionality yet, need to delete key from the cache manually for now
@@ -1063,24 +1063,24 @@ bool syncTest1()
        foundIt = (MetadataFile::getSourceFromKey(newKey) == "test-file");
        if (foundIt)
        {
-            assert(cache->exists(newKey));
+            assert(cache->exists("", newKey));
            cs->deleteObject(newKey);
            break;
        }
    }
    assert(foundIt);
-    cache->makeSpace(cache->getMaxCacheSize());   // clear the cache & make it call sync->flushObject() 
+    cache->makeSpace("", cache->getMaxCacheSize());   // clear the cache & make it call sync->flushObject() 
    // the key should now be back in cloud storage and deleted from the cache
-    assert(!cache->exists(newKey));
+    assert(!cache->exists("", newKey));
    err = cs->exists(newKey, &exists);
    assert(!err && exists);
    // make the journal again, call sync->newJournalObject()
    makeTestJournal((journalPath / (newKey + ".journal")).string().c_str());
-    cache->newJournalEntry(bf::file_size(journalPath / (newKey + ".journal")));
+    cache->newJournalEntry("", bf::file_size(journalPath / (newKey + ".journal")));
-    sync->newJournalEntry(newKey, 0);
+    sync->newJournalEntry("", newKey, 0);
    sync->forceFlush();
    sleep(1);
@@ -1102,7 +1102,7 @@ bool syncTest1()
    vector<string> keys;
    for (bf::directory_iterator dir(fakeCloudPath); dir != bf::directory_iterator(); ++dir)
        keys.push_back(dir->path().filename().string());
-    sync->deletedObjects(keys);
+    sync->deletedObjects("", keys);
    sync->forceFlush();
    sleep(1);
    ::unlink((metaPath/"test-file.meta").string().c_str());
@@ -1318,11 +1318,11 @@ void IOCUnlink()
    makeTestObject(cachedObjPath.string().c_str());
    makeTestJournal(cachedJournalPath.string().c_str());
-    cache->newObject(cachedObjPath.filename().string(), bf::file_size(cachedObjPath));
+    cache->newObject("", cachedObjPath.filename().string(), bf::file_size(cachedObjPath));
-    cache->newJournalEntry(bf::file_size(cachedJournalPath));
+    cache->newJournalEntry("", bf::file_size(cachedJournalPath));
    vector<string> keys;
    keys.push_back(cachedObjPath.filename().string());
-    sync->newObjects(keys);
+    sync->newObjects("", keys);
    //sync->newJournalEntry(keys[0]);    don't want to end up renaming it
    sync->forceFlush();
    sleep(1);
@@ -1385,7 +1385,7 @@ void IOCCopyFile1()
    makeTestMetadata(sourcePath.string().c_str());
    makeTestObject((csPath/testObjKey).string().c_str());
    makeTestJournal((journalPath/(string(testObjKey) + ".journal")).string().c_str());
-    cache->newJournalEntry(bf::file_size(journalPath/(string(testObjKey) + ".journal")));
+    cache->newJournalEntry("", bf::file_size(journalPath/(string(testObjKey) + ".journal")));
    int err = ioc->copyFile("copyfile1/source", "copyfile2/dest");
    assert(!err);
@@ -1445,8 +1445,8 @@ void IOCCopyFile3()
    makeTestMetadata(sourcePath.string().c_str());
    makeTestObject((cachePath/testObjKey).string().c_str());
    makeTestJournal((journalPath/(string(testObjKey) + ".journal")).string().c_str());
-    cache->newObject(testObjKey, bf::file_size(cachePath/testObjKey));
+    cache->newObject("", testObjKey, bf::file_size(cachePath/testObjKey));
-    cache->newJournalEntry(bf::file_size(journalPath/(string(testObjKey) + ".journal")));
+    cache->newJournalEntry("", bf::file_size(journalPath/(string(testObjKey) + ".journal")));
    int err = ioc->copyFile("copyfile3/source", "copyfile4/dest");
    assert(!err);
--- a/storagemanager.cnf
+++ b/storagemanager.cnf
@@ -1,11 +1,41 @@
 [ObjectStorage]
 service = LocalStorage
 # This is a tuneable value, but also implies a maximum capacity.
 # Each file managed by StorageManager is broken into chunks of
 # object_size bytes.  Each chunk is stored in the cache as a file,
 # so the filesystem the cache is put on needs to have enough inodes to
 # support at least cache_size/object_size files.
 #
 # Regarding tuning, object stores do not support modifying stored data;
 # entire objects must be replaced on modification, and entire
 # objects are fetched on read.  An access pattern that includes
 # frequently accessing small amounts of data will benefit from 
 # a smaller object_size.  An access pattern where data
 # is accessed in large chunks will benefit from a larger object_size.
 #
 # Another limitation to consider is the get/put rate imposed by the
 # cloud provider.  If that is the limitation, increasing object_size
 # will result in higher transfer rates.
 object_size = 5M
 metadata_path = ${HOME}/storagemanager/metadata
 journal_path = ${HOME}/storagemanager/journal
 max_concurrent_downloads = 20
 max_concurrent_uploads = 20
 # This is the depth of the common prefix that all files managed by SM have
 # Ex: /usr/local/mariadb/columnstore/data1, and 
 # /usr/local/mariadb/columnstore/data2 differ at the 5th directory element,
 # so they have a common prefix depth of 4.
 #
 # This value is used to manage the ownership of prefixes between
 # StorageManager instances that sharing a filesystem.
 # 
 # -1 is a special value indicating that there is no filesystem shared
 # between SM instances.
 common_prefix_depth = 4
 [S3]
 region = us-east-2
 bucket = s3-cs-test2