You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Wrote a version of mergejournal that uses more mem but reduces the io op
requirement. Added a safety valve; if the journal file is > 100MB, it will fall back to the previous IO op heavy but mem friendly version. This also includes a compiler warning fix for smls & smput.
This commit is contained in:
@ -1083,19 +1083,6 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
if (objFD < 0)
|
if (objFD < 0)
|
||||||
return ret;
|
return ret;
|
||||||
ScopedCloser s1(objFD);
|
ScopedCloser s1(objFD);
|
||||||
journalFD = ::open(journal, O_RDONLY);
|
|
||||||
if (journalFD < 0)
|
|
||||||
return ret;
|
|
||||||
ScopedCloser s2(journalFD);
|
|
||||||
|
|
||||||
// grab the journal header, make sure the version is 1, and get the max offset
|
|
||||||
|
|
||||||
boost::shared_array<char> headertxt = seekToEndOfHeader1(journalFD, &l_bytesRead);
|
|
||||||
stringstream ss;
|
|
||||||
ss << headertxt.get();
|
|
||||||
boost::property_tree::ptree header;
|
|
||||||
boost::property_tree::json_parser::read_json(ss, header);
|
|
||||||
assert(header.get<int>("version") == 1);
|
|
||||||
|
|
||||||
ret.reset(new uint8_t[len]);
|
ret.reset(new uint8_t[len]);
|
||||||
|
|
||||||
@ -1117,14 +1104,39 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
else if (err == 0)
|
else if (err == 0)
|
||||||
{
|
{
|
||||||
// at the EOF of the object. The journal may contain entries that append to the data,
|
// at the EOF of the object. The journal may contain entries that append to the data,
|
||||||
// so 0-fill the remaining bytes.
|
|
||||||
memset(&ret[count], 0, len-count);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
count += err;
|
count += err;
|
||||||
}
|
}
|
||||||
l_bytesRead += count;
|
l_bytesRead += count;
|
||||||
|
|
||||||
|
// mergeJournalInMem has lower a IOPS requirement than the fully general code in this fcn. Use
|
||||||
|
// that if the caller requested the whole object to be merged
|
||||||
|
if (offset == 0 && (ssize_t) len >= ::lseek(objFD, 0, SEEK_END))
|
||||||
|
{
|
||||||
|
size_t mjimBytesRead = 0;
|
||||||
|
int mjimerr = mergeJournalInMem(ret, len, journal, &mjimBytesRead);
|
||||||
|
if (mjimerr)
|
||||||
|
{
|
||||||
|
ret.reset();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
l_bytesRead += mjimBytesRead;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
journalFD = ::open(journal, O_RDONLY);
|
||||||
|
if (journalFD < 0)
|
||||||
|
return ret;
|
||||||
|
ScopedCloser s2(journalFD);
|
||||||
|
|
||||||
|
boost::shared_array<char> headertxt = seekToEndOfHeader1(journalFD, &l_bytesRead);
|
||||||
|
stringstream ss;
|
||||||
|
ss << headertxt.get();
|
||||||
|
boost::property_tree::ptree header;
|
||||||
|
boost::property_tree::json_parser::read_json(ss, header);
|
||||||
|
assert(header.get<int>("version") == 1);
|
||||||
|
|
||||||
// start processing the entries
|
// start processing the entries
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
@ -1134,8 +1146,7 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
break;
|
break;
|
||||||
assert(err == 16);
|
assert(err == 16);
|
||||||
l_bytesRead += 16;
|
l_bytesRead += 16;
|
||||||
|
|
||||||
//cout << "MJ: got offset " << offlen[0] << " length " << offlen[1] << endl;
|
|
||||||
// if this entry overlaps, read the overlapping section
|
// if this entry overlaps, read the overlapping section
|
||||||
uint64_t lastJournalOffset = offlen[0] + offlen[1];
|
uint64_t lastJournalOffset = offlen[0] + offlen[1];
|
||||||
uint64_t lastBufOffset = offset + len;
|
uint64_t lastBufOffset = offset + len;
|
||||||
@ -1144,8 +1155,6 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
uint64_t startReadingAt = max(offlen[0], (uint64_t) offset);
|
uint64_t startReadingAt = max(offlen[0], (uint64_t) offset);
|
||||||
uint64_t lengthOfRead = min(lastBufOffset, lastJournalOffset) - startReadingAt;
|
uint64_t lengthOfRead = min(lastBufOffset, lastJournalOffset) - startReadingAt;
|
||||||
|
|
||||||
//cout << "MJ: startReadingAt = " << startReadingAt << " offlen[0] = " << offlen[0] << endl;
|
|
||||||
|
|
||||||
// seek to the portion of the entry to start reading at
|
// seek to the portion of the entry to start reading at
|
||||||
if (startReadingAt != offlen[0])
|
if (startReadingAt != offlen[0])
|
||||||
::lseek(journalFD, startReadingAt - offlen[0], SEEK_CUR);
|
::lseek(journalFD, startReadingAt - offlen[0], SEEK_CUR);
|
||||||
@ -1184,9 +1193,95 @@ boost::shared_array<uint8_t> IOCoordinator::mergeJournal(const char *object, con
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// MergeJournalInMem is a specialized version of mergeJournal(). TODO: refactor if possible.
|
// MergeJournalInMem is a specialized version of mergeJournal(). This is currently only used by Synchronizer
|
||||||
|
// and mergeJournal(), and only for merging the whole object with the whole journal.
|
||||||
int IOCoordinator::mergeJournalInMem(boost::shared_array<uint8_t> &objData, size_t len, const char *journalPath,
|
int IOCoordinator::mergeJournalInMem(boost::shared_array<uint8_t> &objData, size_t len, const char *journalPath,
|
||||||
size_t *_bytesReadOut) const
|
size_t *_bytesReadOut) const
|
||||||
|
{
|
||||||
|
// if the journal is over some size threshold (100MB for now why not),
|
||||||
|
// use the original low-mem-usage version
|
||||||
|
if (len > (100 << 20))
|
||||||
|
return mergeJournalInMem_bigJ(objData, len, journalPath, _bytesReadOut);
|
||||||
|
|
||||||
|
size_t l_bytesRead = 0;
|
||||||
|
int journalFD = ::open(journalPath, O_RDONLY);
|
||||||
|
if (journalFD < 0)
|
||||||
|
return -1;
|
||||||
|
ScopedCloser s(journalFD);
|
||||||
|
|
||||||
|
// grab the journal header and make sure the version is 1
|
||||||
|
boost::shared_array<char> headertxt = seekToEndOfHeader1(journalFD, &l_bytesRead);
|
||||||
|
stringstream ss;
|
||||||
|
ss << headertxt.get();
|
||||||
|
boost::property_tree::ptree header;
|
||||||
|
boost::property_tree::json_parser::read_json(ss, header);
|
||||||
|
assert(header.get<int>("version") == 1);
|
||||||
|
|
||||||
|
// read the journal file into memory
|
||||||
|
size_t journalBytes = ::lseek(journalFD, 0, SEEK_END) - l_bytesRead;
|
||||||
|
::lseek(journalFD, l_bytesRead, SEEK_SET);
|
||||||
|
boost::scoped_array<uint8_t> journalData(new uint8_t[journalBytes]);
|
||||||
|
size_t readCount = 0;
|
||||||
|
while (readCount < journalBytes)
|
||||||
|
{
|
||||||
|
ssize_t err = ::read(journalFD, &journalData[readCount], journalBytes - readCount);
|
||||||
|
if (err < 0)
|
||||||
|
{
|
||||||
|
char buf[80];
|
||||||
|
int l_errno = errno;
|
||||||
|
logger->log(LOG_ERR, "mergeJournalInMem: got %s", strerror_r(errno, buf, 80));
|
||||||
|
errno = l_errno;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (err == 0)
|
||||||
|
{
|
||||||
|
logger->log(LOG_ERR, "mergeJournalInMem: got early EOF");
|
||||||
|
errno = ENODATA; // is there a better errno for early EOF?
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
readCount += err;
|
||||||
|
l_bytesRead += err;
|
||||||
|
}
|
||||||
|
|
||||||
|
// start processing the entries
|
||||||
|
size_t offset = 0;
|
||||||
|
while (offset < journalBytes)
|
||||||
|
{
|
||||||
|
if (offset + 16 >= journalBytes)
|
||||||
|
{
|
||||||
|
logger->log(LOG_ERR, "mergeJournalInMem: got early EOF");
|
||||||
|
errno = ENODATA; // is there a better errno for early EOF?
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
uint64_t *offlen = (uint64_t *) &journalData[offset];
|
||||||
|
offset += 16;
|
||||||
|
|
||||||
|
uint64_t startReadingAt = offlen[0];
|
||||||
|
uint64_t lengthOfRead = offlen[1];
|
||||||
|
|
||||||
|
if (startReadingAt > len)
|
||||||
|
{
|
||||||
|
offset += offlen[1];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (startReadingAt + lengthOfRead > len)
|
||||||
|
lengthOfRead = len - startReadingAt;
|
||||||
|
if (offset + lengthOfRead > journalBytes)
|
||||||
|
{
|
||||||
|
logger->log(LOG_ERR, "mergeJournalInMem: got early EOF");
|
||||||
|
errno = ENODATA; // is there a better errno for early EOF?
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
memcpy(&objData[startReadingAt], &journalData[offset], lengthOfRead);
|
||||||
|
offset += offlen[1];
|
||||||
|
}
|
||||||
|
*_bytesReadOut = l_bytesRead;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int IOCoordinator::mergeJournalInMem_bigJ(boost::shared_array<uint8_t> &objData, size_t len, const char *journalPath,
|
||||||
|
size_t *_bytesReadOut) const
|
||||||
{
|
{
|
||||||
size_t l_bytesRead = 0;
|
size_t l_bytesRead = 0;
|
||||||
int journalFD = ::open(journalPath, O_RDONLY);
|
int journalFD = ::open(journalPath, O_RDONLY);
|
||||||
@ -1201,7 +1296,6 @@ int IOCoordinator::mergeJournalInMem(boost::shared_array<uint8_t> &objData, size
|
|||||||
boost::property_tree::ptree header;
|
boost::property_tree::ptree header;
|
||||||
boost::property_tree::json_parser::read_json(ss, header);
|
boost::property_tree::json_parser::read_json(ss, header);
|
||||||
assert(header.get<int>("version") == 1);
|
assert(header.get<int>("version") == 1);
|
||||||
//size_t maxJournalOffset = header.get<size_t>("max_offset");
|
|
||||||
|
|
||||||
// start processing the entries
|
// start processing the entries
|
||||||
while (1)
|
while (1)
|
||||||
@ -1222,23 +1316,15 @@ int IOCoordinator::mergeJournalInMem(boost::shared_array<uint8_t> &objData, size
|
|||||||
uint64_t startReadingAt = offlen[0];
|
uint64_t startReadingAt = offlen[0];
|
||||||
uint64_t lengthOfRead = offlen[1];
|
uint64_t lengthOfRead = offlen[1];
|
||||||
|
|
||||||
// XXXPAT: Speculative change. Got mem errors from writing past the end of objData. The length
|
|
||||||
// in the metadata is shorter than this journal entry, and not because it got crazy values.
|
|
||||||
// I think the explanation is a truncation.
|
|
||||||
if (startReadingAt > len)
|
if (startReadingAt > len)
|
||||||
{
|
{
|
||||||
//logger->log(LOG_CRIT, "mergeJournalInMem: skipping a theoretically irrelevant journal entry in %s. "
|
|
||||||
// "jstart = %llu, max = %llu", journalPath, startReadingAt, len);
|
|
||||||
::lseek(journalFD, offlen[1], SEEK_CUR);
|
::lseek(journalFD, offlen[1], SEEK_CUR);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (startReadingAt + lengthOfRead > len)
|
if (startReadingAt + lengthOfRead > len)
|
||||||
{
|
|
||||||
//logger->log(LOG_CRIT, "mergeJournalInMem: possibly bad journal entry in %s. jStart = %llu, jEnd = %llu, max = %llu",
|
|
||||||
// journalPath, startReadingAt, startReadingAt + lengthOfRead, len);
|
|
||||||
lengthOfRead = len - startReadingAt;
|
lengthOfRead = len - startReadingAt;
|
||||||
}
|
|
||||||
uint count = 0;
|
uint count = 0;
|
||||||
while (count < lengthOfRead)
|
while (count < lengthOfRead)
|
||||||
{
|
{
|
||||||
@ -1267,8 +1353,6 @@ int IOCoordinator::mergeJournalInMem(boost::shared_array<uint8_t> &objData, size
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void IOCoordinator::readLock(const string &filename)
|
void IOCoordinator::readLock(const string &filename)
|
||||||
{
|
{
|
||||||
boost::unique_lock<boost::mutex> s(lockMutex);
|
boost::unique_lock<boost::mutex> s(lockMutex);
|
||||||
|
@ -63,10 +63,15 @@ class IOCoordinator : public boost::noncopyable
|
|||||||
boost::shared_array<uint8_t> mergeJournal(const char *objectPath, const char *journalPath, off_t offset,
|
boost::shared_array<uint8_t> mergeJournal(const char *objectPath, const char *journalPath, off_t offset,
|
||||||
size_t len, size_t *sizeRead) const;
|
size_t len, size_t *sizeRead) const;
|
||||||
|
|
||||||
// this version modifies object data in memory, given the journal filename
|
// this version modifies object data in memory, given the journal filename. Processes the whole object
|
||||||
|
// and whole journal file.
|
||||||
int mergeJournalInMem(boost::shared_array<uint8_t> &objData, size_t len, const char *journalPath,
|
int mergeJournalInMem(boost::shared_array<uint8_t> &objData, size_t len, const char *journalPath,
|
||||||
size_t *sizeRead) const;
|
size_t *sizeRead) const;
|
||||||
|
|
||||||
|
// this version of MJIM has a higher IOPS requirement and lower mem usage.
|
||||||
|
int mergeJournalInMem_bigJ(boost::shared_array<uint8_t> &objData, size_t len, const char *journalPath,
|
||||||
|
size_t *sizeRead) const;
|
||||||
|
|
||||||
// this version takes already-open file descriptors, and an already-allocated buffer as input.
|
// this version takes already-open file descriptors, and an already-allocated buffer as input.
|
||||||
// file descriptor are positioned, eh, best not to assume anything about their positions
|
// file descriptor are positioned, eh, best not to assume anything about their positions
|
||||||
// on return.
|
// on return.
|
||||||
|
@ -669,7 +669,8 @@ void Synchronizer::synchronizeWithJournal(const string &sourceFile, list<string>
|
|||||||
count += err;
|
count += err;
|
||||||
}
|
}
|
||||||
numBytesWritten += size;
|
numBytesWritten += size;
|
||||||
cache->rename(prefix, cloudKey, newCloudKey, size - bf::file_size(oldCachePath));
|
assert(bf::file_size(oldCachePath) == MetadataFile:getLengthFromKey(cloudKey));
|
||||||
|
cache->rename(prefix, cloudKey, newCloudKey, size - MetadataFile::getLengthFromKey(cloudKey));
|
||||||
replicator->remove(oldCachePath);
|
replicator->remove(oldCachePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,12 +157,12 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
char prefix[8192];
|
char prefix[8192];
|
||||||
makePathPrefix(prefix, 8192);
|
int prefixlen = makePathPrefix(prefix, 8192);
|
||||||
|
|
||||||
if (SMOnline())
|
if (SMOnline())
|
||||||
lsOnline(strncat(prefix, argv[1], 8192));
|
lsOnline(strncat(prefix, argv[1], 8192 - prefixlen));
|
||||||
else
|
else
|
||||||
lsOffline(strncat(prefix, argv[1], 8192));
|
lsOffline(strncat(prefix, argv[1], 8192 - prefixlen));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -172,9 +172,9 @@ int main(int argc, char **argv)
|
|||||||
int prefixlen = makePathPrefix(prefix, 8192);
|
int prefixlen = makePathPrefix(prefix, 8192);
|
||||||
|
|
||||||
if (SMOnline())
|
if (SMOnline())
|
||||||
putOnline(strncat(prefix, argv[1], 8192), prefixlen);
|
putOnline(strncat(prefix, argv[1], 8192 - prefixlen), prefixlen);
|
||||||
else
|
else
|
||||||
putOffline(strncat(prefix, argv[1], 8192), prefixlen);
|
putOffline(strncat(prefix, argv[1], 8192 - prefixlen), prefixlen);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user