From 06f24df724bf6ed40463cb0279573dfcaf07e8a1 Mon Sep 17 00:00:00 2001 From: Patrice Linel Date: Wed, 17 Apr 2019 22:32:09 -0500 Subject: [PATCH] change signature array in a std::set ! lookup performance is now log(N). About 10x performance can be expected on cpimport containing varchars. Signed-off-by: Patrice Linel --- writeengine/bulk/we_colbufmgr.cpp | 2 +- writeengine/dictionary/we_dctnry.cpp | 41 +++++++++++------------ writeengine/dictionary/we_dctnry.h | 13 ++++++- writeengine/dictionary/we_dctnrystore.cpp | 2 +- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/writeengine/bulk/we_colbufmgr.cpp b/writeengine/bulk/we_colbufmgr.cpp index 123847260..36ecc218b 100644 --- a/writeengine/bulk/we_colbufmgr.cpp +++ b/writeengine/bulk/we_colbufmgr.cpp @@ -45,7 +45,7 @@ namespace { // Minimum time to wait for a condition, so as to periodically wake up and // check the global job status, to see if the job needs to terminate. -const int COND_WAIT_SECONDS = 3; +const int COND_WAIT_SECONDS = 1; } namespace WriteEngine diff --git a/writeengine/dictionary/we_dctnry.cpp b/writeengine/dictionary/we_dctnry.cpp index 4e93ca60a..d477349ad 100644 --- a/writeengine/dictionary/we_dctnry.cpp +++ b/writeengine/dictionary/we_dctnry.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -108,7 +109,6 @@ Dctnry::Dctnry() : &m_endHeader, HDR_UNIT_SIZE); m_curFbo = INVALID_NUM; m_curLbid = INVALID_LBID; - memset(m_sigArray, 0, MAX_STRING_CACHE_SIZE * sizeof(Signature)); m_arraySize = 0; clear();//files @@ -130,14 +130,16 @@ Dctnry::~Dctnry() ******************************************************************************/ void Dctnry::freeStringCache( ) { - for (int i = 0; i < m_arraySize; i++) + std::set::iterator it; + for (it=m_sigArray.begin(); it!=m_sigArray.end(); it++) { - delete [] m_sigArray[i].signature; - m_sigArray[i].signature = 0; + Signature sig = *it; + delete [] sig.signature; + sig.signature = 0; } - memset(m_sigArray, 0, MAX_STRING_CACHE_SIZE * sizeof(Signature)); m_arraySize = 0; + m_sigArray.clear(); } /******************************************************************************* @@ -161,7 +163,6 @@ int Dctnry::init() m_curOp = 0; memset( m_curBlock.data, 0, sizeof(m_curBlock.data)); m_curBlock.lbid = INVALID_LBID; - memset(m_sigArray, 0, MAX_STRING_CACHE_SIZE * sizeof(Signature)); m_arraySize = 0; return NO_ERROR; @@ -623,19 +624,17 @@ int Dctnry::openDctnry(const OID& dctnryOID, ******************************************************************************/ bool Dctnry::getTokenFromArray(Signature& sig) { - for (int i = 0; i < (int)m_arraySize ; i++ ) - { - if (sig.size == m_sigArray[i].size) - { - if (!memcmp(sig.signature, m_sigArray[i].signature, sig.size)) - { - sig.token = m_sigArray[i].token; - return true; - }//endif sig compare - }//endif size compare - } + std::set::iterator it; + it = m_sigArray.find(sig); + if ( it == m_sigArray.end()){ + return false; + }else{ + Signature sigfound = *it; + sig.token = sigfound.token; + return true; + } - return false; + return false; } /******************************************************************************* @@ -1329,7 +1328,7 @@ void Dctnry::preLoadStringCache( const DataBlock& fileBlock ) memcpy(aSig.signature, &fileBlock.data[offBeg], len); aSig.token.op = op; aSig.token.fbo = m_curLbid; - m_sigArray[op - 1] = aSig; + m_sigArray.insert(aSig); offEnd = offBeg; hdrOffsetBeg += HDR_UNIT_SIZE; @@ -1368,7 +1367,7 @@ void Dctnry::addToStringCache( const Signature& newSig ) memcpy(asig.signature, newSig.signature, newSig.size ); asig.size = newSig.size; asig.token = newSig.token; - m_sigArray[m_arraySize] = asig; + m_sigArray.insert(asig); m_arraySize++; } @@ -1461,7 +1460,7 @@ int Dctnry::updateDctnry(unsigned char* sigValue, int& sigSize, sig.signature = new unsigned char[sigSize]; memcpy (sig.signature, sigValue, sigSize); sig.token = token; - m_sigArray[m_arraySize] = sig; + m_sigArray.insert(sig); m_arraySize++; } diff --git a/writeengine/dictionary/we_dctnry.h b/writeengine/dictionary/we_dctnry.h index 58d429142..0417a906f 100644 --- a/writeengine/dictionary/we_dctnry.h +++ b/writeengine/dictionary/we_dctnry.h @@ -56,6 +56,17 @@ typedef struct Signature Token token; } Signature; +struct sig_compare { + bool operator() (const Signature& a, const Signature& b) const { + if (a.size == b.size){ + return memcmp(a.signature,b.signature,a.size)<0;} + else if (a.size& oids); virtual int numOfBlocksInFile(); - Signature m_sigArray[MAX_STRING_CACHE_SIZE]; // string cache + std::set m_sigArray; int m_arraySize; // num strings in m_sigArray // m_dctnryHeader used for hdr when readSubBlockEntry is used to read a blk diff --git a/writeengine/dictionary/we_dctnrystore.cpp b/writeengine/dictionary/we_dctnrystore.cpp index 8e2982b57..4a7fefe73 100644 --- a/writeengine/dictionary/we_dctnrystore.cpp +++ b/writeengine/dictionary/we_dctnrystore.cpp @@ -133,7 +133,7 @@ const int DctnryStore::updateDctnryStore(unsigned char* sigValue, sig.signature = new unsigned char[sigSize]; memcpy (sig.signature, sigValue, sigSize); sig.token = token; - m_dctnry.m_sigArray[m_dctnry.m_arraySize] = sig; + m_dctnry.m_sigArray.insert(sig) = sig; m_dctnry.m_arraySize++; }