diff --git a/ndb/include/ndbapi/NdbIndexScanOperation.hpp b/ndb/include/ndbapi/NdbIndexScanOperation.hpp index f854fa93945..82aed04a9fc 100644 --- a/ndb/include/ndbapi/NdbIndexScanOperation.hpp +++ b/ndb/include/ndbapi/NdbIndexScanOperation.hpp @@ -86,26 +86,25 @@ public: /** * Define bound on index key in range scan. * - * Each index key can have not null lower and/or upper bound, or can - * be set equal to not null value. The bounds can be defined in any - * order but a duplicate definition is an error. + * Each index key can have lower and/or upper bound, or can be set + * equal to a value. The bounds can be defined in any order but + * a duplicate definition is an error. * - * The scan is most effective when bounds are given for an initial - * sequence of non-nullable index keys, and all but the last one is an - * equality. In this case the scan returns a contiguous range from - * each ordered index fragment. + * The bounds must specify a single range i.e. they are on an initial + * sequence of index keys and the condition is equality for all but + * (at most) the last key which has a lower and/or upper bound. * - * @note This release implements only the case described above, - * except for the non-nullable limitation. Other sets of - * bounds return error or empty result set. + * NULL is treated like a normal value which is less than any not-NULL + * value and equal to another NULL value. To search for NULL use + * setBound with null pointer (0). * - * @note In this release a null key value satisfies any lower - * bound and no upper bound. This may change. + * An index stores also all-NULL keys (this may become optional). + * Doing index scan with empty bound set returns all table tuples. * * @param attrName Attribute name, alternatively: - * @param anAttrId Index column id (starting from 0). + * @param anAttrId Index column id (starting from 0) * @param type Type of bound - * @param value Pointer to bound value + * @param value Pointer to bound value, 0 for NULL * @param len Value length in bytes. * Fixed per datatype and can be omitted * @return 0 if successful otherwise -1 diff --git a/ndb/src/kernel/blocks/dbtux/Dbtux.hpp b/ndb/src/kernel/blocks/dbtux/Dbtux.hpp index c1a56bfe86e..c5732eea01b 100644 --- a/ndb/src/kernel/blocks/dbtux/Dbtux.hpp +++ b/ndb/src/kernel/blocks/dbtux/Dbtux.hpp @@ -446,6 +446,7 @@ private: Uint32 m_descPage; // descriptor page Uint16 m_descOff; // offset within the page Uint16 m_numAttrs; + bool m_storeNullKey; union { Uint32 nextPool; }; @@ -469,6 +470,7 @@ private: Uint32 m_descPage; // copy from index level Uint16 m_descOff; Uint16 m_numAttrs; + bool m_storeNullKey; TreeHead m_tree; TupLoc m_freeLoc; // one node pre-allocated for insert DLList m_scanList; // current scans on this fragment @@ -993,7 +995,8 @@ Dbtux::Index::Index() : m_numFrags(0), m_descPage(RNIL), m_descOff(0), - m_numAttrs(0) + m_numAttrs(0), + m_storeNullKey(false) { for (unsigned i = 0; i < MaxIndexFragments; i++) { m_fragId[i] = ZNIL; @@ -1012,6 +1015,7 @@ Dbtux::Frag::Frag(ArrayPool& scanOpPool) : m_descPage(RNIL), m_descOff(0), m_numAttrs(ZNIL), + m_storeNullKey(false), m_tree(), m_freeLoc(), m_scanList(scanOpPool), diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp index 6ae3c3c1197..1b8755a1dc4 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp @@ -62,15 +62,15 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, Cons } } else { jam(); - // not NULL < NULL - ret = -1; + // not NULL > NULL + ret = +1; break; } } else { if (! entryData.ah().isNULL()) { jam(); - // NULL > not NULL - ret = +1; + // NULL < not NULL + ret = -1; break; } } @@ -116,15 +116,15 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, Tabl } } else { jam(); - // not NULL < NULL - ret = -1; + // not NULL > NULL + ret = +1; break; } } else { if (*entryKey != 0) { jam(); - // NULL > not NULL - ret = +1; + // NULL < not NULL + ret = -1; break; } } @@ -180,36 +180,41 @@ Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigne // get and skip bound type type = boundInfo[0]; boundInfo += 1; - ndbrequire(! boundInfo.ah().isNULL()); - if (! entryData.ah().isNULL()) { - jam(); - // current attribute - const unsigned index = boundInfo.ah().getAttributeId(); - const DescAttr& descAttr = descEnt.m_descAttr[index]; - const unsigned typeId = descAttr.m_typeId; - ndbrequire(entryData.ah().getAttributeId() == descAttr.m_primaryAttrId); - // full data size - const unsigned size1 = boundInfo.ah().getDataSize(); - ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize()); - const unsigned size2 = min(size1, len2); - len2 -= size2; - // compare - const Uint32* const p1 = &boundInfo[AttributeHeaderSize]; - const Uint32* const p2 = &entryData[AttributeHeaderSize]; - int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size2); - // XXX until data format errors are handled - ndbrequire(ret != NdbSqlUtil::CmpError); - if (ret != 0) { + if (! boundInfo.ah().isNULL()) { + if (! entryData.ah().isNULL()) { jam(); - return ret; + // current attribute + const unsigned index = boundInfo.ah().getAttributeId(); + const DescAttr& descAttr = descEnt.m_descAttr[index]; + const unsigned typeId = descAttr.m_typeId; + ndbrequire(entryData.ah().getAttributeId() == descAttr.m_primaryAttrId); + // full data size + const unsigned size1 = boundInfo.ah().getDataSize(); + ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize()); + const unsigned size2 = min(size1, len2); + len2 -= size2; + // compare + const Uint32* const p1 = &boundInfo[AttributeHeaderSize]; + const Uint32* const p2 = &entryData[AttributeHeaderSize]; + int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size2); + // XXX until data format errors are handled + ndbrequire(ret != NdbSqlUtil::CmpError); + if (ret != 0) { + jam(); + return ret; + } + } else { + jam(); + // not NULL > NULL + return +1; } } else { jam(); - /* - * NULL is bigger than any bound, thus the boundary is always to - * the left of NULL. - */ - return -1; + if (! entryData.ah().isNULL()) { + jam(); + // NULL < not NULL + return -1; + } } boundInfo += AttributeHeaderSize + boundInfo.ah().getDataSize(); entryData += AttributeHeaderSize + entryData.ah().getDataSize(); @@ -258,32 +263,37 @@ Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigne // get and skip bound type type = boundInfo[0]; boundInfo += 1; - ndbrequire(! boundInfo.ah().isNULL()); - if (*entryKey != 0) { - jam(); - // current attribute - const unsigned index = boundInfo.ah().getAttributeId(); - const DescAttr& descAttr = descEnt.m_descAttr[index]; - const unsigned typeId = descAttr.m_typeId; - // full data size - const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc); - // compare - const Uint32* const p1 = &boundInfo[AttributeHeaderSize]; - const Uint32* const p2 = *entryKey; - int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size1); - // XXX until data format errors are handled - ndbrequire(ret != NdbSqlUtil::CmpError); - if (ret != 0) { + if (! boundInfo.ah().isNULL()) { + if (*entryKey != 0) { jam(); - return ret; + // current attribute + const unsigned index = boundInfo.ah().getAttributeId(); + const DescAttr& descAttr = descEnt.m_descAttr[index]; + const unsigned typeId = descAttr.m_typeId; + // full data size + const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc); + // compare + const Uint32* const p1 = &boundInfo[AttributeHeaderSize]; + const Uint32* const p2 = *entryKey; + int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size1); + // XXX until data format errors are handled + ndbrequire(ret != NdbSqlUtil::CmpError); + if (ret != 0) { + jam(); + return ret; + } + } else { + jam(); + // not NULL > NULL + return +1; } } else { jam(); - /* - * NULL is bigger than any bound, thus the boundary is always to - * the left of NULL. - */ - return -1; + if (*entryKey != 0) { + jam(); + // NULL < not NULL + return -1; + } } boundInfo += AttributeHeaderSize + boundInfo.ah().getDataSize(); entryKey += 1; diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp index 471752ea031..24b030bf8ec 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp @@ -82,8 +82,8 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal) ent.m_fragBit = fragBit; // read search key readKeyAttrs(frag, ent, 0, c_searchKey); - // check if all keys are null - { + if (! frag.m_storeNullKey) { + // check if all keys are null const unsigned numAttrs = frag.m_numAttrs; bool allNull = true; for (unsigned i = 0; i < numAttrs; i++) { diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp index 0612f191830..83944f96b96 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp @@ -85,6 +85,7 @@ Dbtux::execTUXFRAGREQ(Signal* signal) fragPtr.p->m_fragOff = req->fragOff; fragPtr.p->m_fragId = req->fragId; fragPtr.p->m_numAttrs = req->noOfAttr; + fragPtr.p->m_storeNullKey = true; // not yet configurable fragPtr.p->m_tupIndexFragPtrI = req->tupIndexFragPtrI; fragPtr.p->m_tupTableFragPtrI[0] = req->tupTableFragPtrI[0]; fragPtr.p->m_tupTableFragPtrI[1] = req->tupTableFragPtrI[1]; @@ -111,6 +112,7 @@ Dbtux::execTUXFRAGREQ(Signal* signal) indexPtr.p->m_tableId = req->primaryTableId; indexPtr.p->m_fragOff = req->fragOff; indexPtr.p->m_numAttrs = req->noOfAttr; + indexPtr.p->m_storeNullKey = true; // not yet configurable // allocate attribute descriptors if (! allocDescEnt(indexPtr)) { jam(); diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp index 8280ee0b7d5..706e40ecbe0 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp @@ -137,7 +137,7 @@ Dbtux::execTUX_BOUND_INFO(Signal* signal) const Uint32* const data = (Uint32*)sig + TuxBoundInfo::SignalLength; unsigned offset = 5; // walk through entries - while (offset + 2 < req->boundAiLength) { + while (offset + 2 <= req->boundAiLength) { jam(); const unsigned type = data[offset]; if (type > 4) { diff --git a/ndb/src/kernel/blocks/dbtux/Times.txt b/ndb/src/kernel/blocks/dbtux/Times.txt index c272f464c84..c4744a23c07 100644 --- a/ndb/src/kernel/blocks/dbtux/Times.txt +++ b/ndb/src/kernel/blocks/dbtux/Times.txt @@ -21,11 +21,11 @@ shows ms / 1000 rows for each and pct overhead c 1 million rows, index on PK, full table scan, full index scan -shows ms / 1000 rows for each and index time pct +shows ms / 1000 rows for each and index time overhead d 1 million rows, index on PK, read table via each pk, scan index for each pk -shows ms / 1000 rows for each and index time pct +shows ms / 1000 rows for each and index time overhead samples 10% of all PKs (100,000 pk reads, 100,000 scans) 040616 mc02/a 40 ms 87 ms 114 pct @@ -66,12 +66,20 @@ optim 11 mc02/a 43 ms 63 ms 46 pct optim 12 mc02/a 38 ms 55 ms 43 pct mc02/b 47 ms 77 ms 63 pct - mc02/c 10 ms 14 ms 147 pct - mc02/d 176 ms 281 ms 159 pct + mc02/c 10 ms 14 ms 47 pct + mc02/d 176 ms 281 ms 59 pct optim 13 mc02/a 40 ms 57 ms 42 pct mc02/b 47 ms 77 ms 61 pct - mc02/c 9 ms 13 ms 150 pct - mc02/d 170 ms 256 ms 150 pct + mc02/c 9 ms 13 ms 50 pct + mc02/d 170 ms 256 ms 50 pct + +after wl-1884 store all-NULL keys (the tests have pctnull=10 per column) +[ what happened to PK read performance? ] + +optim 13 mc02/a 39 ms 59 ms 50 pct + mc02/b 47 ms 77 ms 61 pct + mc02/c 9 ms 12 ms 44 pct + mc02/d 246 ms 289 ms 17 pct vim: set et: diff --git a/ndb/src/ndbapi/NdbScanOperation.cpp b/ndb/src/ndbapi/NdbScanOperation.cpp index a880f308d24..9630dbd453c 100644 --- a/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/ndb/src/ndbapi/NdbScanOperation.cpp @@ -1125,7 +1125,6 @@ NdbIndexScanOperation::setBound(const NdbColumnImpl* tAttrInfo, if (theOperationType == OpenRangeScanRequest && theStatus == SetBound && (0 <= type && type <= 4) && - aValue != NULL && len <= 8000) { // bound type @@ -1136,20 +1135,22 @@ NdbIndexScanOperation::setBound(const NdbColumnImpl* tAttrInfo, setErrorCodeAbort(4209); return -1; } - len = sizeInBytes; + len = aValue != NULL ? sizeInBytes : 0; Uint32 tIndexAttrId = tAttrInfo->m_attrId; Uint32 sizeInWords = (len + 3) / 4; AttributeHeader ah(tIndexAttrId, sizeInWords); insertATTRINFO(ah.m_value); - // attribute data - if ((UintPtr(aValue) & 0x3) == 0 && (len & 0x3) == 0) - insertATTRINFOloop((const Uint32*)aValue, sizeInWords); - else { - Uint32 temp[2000]; - memcpy(temp, aValue, len); - while ((len & 0x3) != 0) - ((char*)temp)[len++] = 0; - insertATTRINFOloop(temp, sizeInWords); + if (len != 0) { + // attribute data + if ((UintPtr(aValue) & 0x3) == 0 && (len & 0x3) == 0) + insertATTRINFOloop((const Uint32*)aValue, sizeInWords); + else { + Uint32 temp[2000]; + memcpy(temp, aValue, len); + while ((len & 0x3) != 0) + ((char*)temp)[len++] = 0; + insertATTRINFOloop(temp, sizeInWords); + } } /** @@ -1236,7 +1237,7 @@ NdbIndexScanOperation::compare(Uint32 skip, Uint32 cols, Uint32 * d2 = (Uint32*)r2->aRef(); unsigned r1_null = r1->isNULL(); if((r1_null ^ (unsigned)r2->isNULL())){ - return (r1_null ? 1 : -1); + return (r1_null ? -1 : 1); } Uint32 type = NdbColumnImpl::getImpl(* r1->m_column).m_extType; Uint32 size = (r1->theAttrSize * r1->theArraySize + 3) / 4; diff --git a/ndb/test/ndbapi/testOIBasic.cpp b/ndb/test/ndbapi/testOIBasic.cpp index 8dd904b7579..59640262f55 100644 --- a/ndb/test/ndbapi/testOIBasic.cpp +++ b/ndb/test/ndbapi/testOIBasic.cpp @@ -85,7 +85,7 @@ printhelp() << " -dups allow duplicate tuples from index scan [" << d.m_dups << "]" << endl << " -fragtype T fragment type single/small/medium/large" << endl << " -index xyz only given index numbers (digits 1-9)" << endl - << " -loop N loop count full suite forever=0 [" << d.m_loop << "]" << endl + << " -loop N loop count full suite 0=forever [" << d.m_loop << "]" << endl << " -nologging create tables in no-logging mode" << endl << " -rows N rows per thread [" << d.m_rows << "]" << endl << " -samples N samples for some timings (0=all) [" << d.m_samples << "]" << endl @@ -102,6 +102,12 @@ printhelp() printtables(); } +// not yet configurable +static const bool g_store_null_key = true; + +// compare NULL like normal value (NULL < not NULL, NULL == NULL) +static const bool g_compare_null = true; + // log and error macros static NdbMutex ndbout_mutex = NDB_MUTEX_INITIALIZER; @@ -306,8 +312,8 @@ Tmr::pct(const Tmr& t1) const char* Tmr::over(const Tmr& t1) { - if (0 < t1.m_ms && t1.m_ms < m_ms) { - sprintf(m_text, "%u pct", (100 * (m_ms - t1.m_ms)) / t1.m_ms); + if (0 < t1.m_ms) { + sprintf(m_text, "%d pct", (100 * (m_ms - t1.m_ms)) / t1.m_ms); } else { sprintf(m_text, "[cannot measure]"); } @@ -1168,9 +1174,9 @@ Val::cmp(const Val& val2) const assert(col.m_type == col2.m_type && col.m_length == col2.m_length); if (m_null || val2.m_null) { if (! m_null) - return -1; - if (! val2.m_null) return +1; + if (! val2.m_null) + return -1; return 0; } // verify data formats @@ -1695,8 +1701,8 @@ int BVal::setbnd(Par par) const { Con& con = par.con(); - const char* addr = (const char*)dataaddr(); - assert(! m_null); + assert(g_compare_null || ! m_null); + const char* addr = ! m_null ? (const char*)dataaddr() : 0; const ICol& icol = m_icol; CHK(con.setBound(icol.m_num, m_type, addr) == 0); return 0; @@ -1785,7 +1791,8 @@ BSet::calc(Par par) if (k + 1 < itab.m_icols) bval.m_type = 4; // value generation parammeters - par.m_pctnull = 0; + if (! g_compare_null) + par.m_pctnull = 0; par.m_pctrange = 50; // bit higher do { bval.calc(par, 0); @@ -1842,18 +1849,20 @@ BSet::filter(const Set& set, Set& set2) const if (! set.exist(i)) continue; const Row& row = *set.m_row[i]; - bool ok1 = false; - for (unsigned k = 0; k < itab.m_icols; k++) { - const ICol& icol = itab.m_icol[k]; - const Col& col = icol.m_col; - const Val& val = *row.m_val[col.m_num]; - if (! val.m_null) { - ok1 = true; - break; + if (! g_store_null_key) { + bool ok1 = false; + for (unsigned k = 0; k < itab.m_icols; k++) { + const ICol& icol = itab.m_icol[k]; + const Col& col = icol.m_col; + const Val& val = *row.m_val[col.m_num]; + if (! val.m_null) { + ok1 = true; + break; + } } + if (! ok1) + continue; } - if (! ok1) - continue; bool ok2 = true; for (unsigned j = 0; j < m_bvals; j++) { const BVal& bval = *m_bval[j]; @@ -2727,13 +2736,13 @@ tpkops(Par par) RUNSTEP(par, pkinsert, MT); RUNSTEP(par, createindex, ST); RUNSTEP(par, invalidateindex, MT); - RUNSTEP(par, readverify, MT); + RUNSTEP(par, readverify, ST); for (unsigned i = 0; i < par.m_subloop; i++) { RUNSTEP(par, pkupdatescanread, MT); - RUNSTEP(par, readverify, MT); + RUNSTEP(par, readverify, ST); } RUNSTEP(par, pkdelete, MT); - RUNSTEP(par, readverify, MT); + RUNSTEP(par, readverify, ST); return 0; } @@ -2746,10 +2755,10 @@ tmixedops(Par par) RUNSTEP(par, pkinsert, MT); RUNSTEP(par, createindex, ST); RUNSTEP(par, invalidateindex, MT); - RUNSTEP(par, readverify, MT); + RUNSTEP(par, readverify, ST); for (unsigned i = 0; i < par.m_subloop; i++) { RUNSTEP(par, mixedoperations, MT); - RUNSTEP(par, readverify, MT); + RUNSTEP(par, readverify, ST); } return 0; } @@ -2832,7 +2841,7 @@ ttimescan(Par par) } LL1("full scan table - " << t1.time()); LL1("full scan PK index - " << t2.time()); - LL1("index time pct - " << t2.pct(t1)); + LL1("overhead - " << t2.over(t1)); return 0; } @@ -2854,7 +2863,7 @@ ttimepkread(Par par) } LL1("pk read table - " << t1.time()); LL1("pk read PK index - " << t2.time()); - LL1("index time pct - " << t2.pct(t1)); + LL1("overhead - " << t2.over(t1)); return 0; }