/* Copyright (C) 2014 InfiniDB, Inc. Copyright (c) 2019 MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ // $Id: wf_percentile.cpp 3932 2013-06-25 16:08:10Z xlou $ //#define NDEBUG #include #include #include #include using namespace std; #include #include using namespace boost; #include "loggingid.h" #include "errorcodes.h" #include "idberrorinfo.h" using namespace logging; #include "rowgroup.h" using namespace rowgroup; #include "idborderby.h" using namespace ordering; #include "joblisttypes.h" #include "calpontsystemcatalog.h" #include "constantcolumn.h" using namespace execplan; #include "windowfunctionstep.h" using namespace joblist; #include "wf_percentile.h" namespace windowfunction { template boost::shared_ptr WF_percentile::makeFunction(int id, const string& name, int ct, WindowFunctionColumn* wc) { boost::shared_ptr func; if (id == WF__PERCENTILE_DISC) { switch (ct) { case CalpontSystemCatalog::TINYINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: { func.reset(new WF_percentile(id, name)); break; } case CalpontSystemCatalog::UTINYINT: case CalpontSystemCatalog::USMALLINT: case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: { func.reset(new WF_percentile(id, name)); break; } case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UDECIMAL: { decltype(datatypes::MAXDECIMALWIDTH) width = wc->resultType().colWidth; if (width < datatypes::MAXDECIMALWIDTH) { if (ct == CalpontSystemCatalog::UDECIMAL) func.reset(new WF_percentile(id, name)); else func.reset(new WF_percentile(id, name)); } else if (width == datatypes::MAXDECIMALWIDTH) { func.reset(new WF_percentile(id, name)); } break; } case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: { func.reset(new WF_percentile(id, name)); break; } case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: { func.reset(new WF_percentile(id, name)); break; } case CalpontSystemCatalog::LONGDOUBLE: { func.reset(new WF_percentile(id, name)); break; } default: { if (id == WF__PERCENTILE_DISC) { func.reset(new WF_percentile(id, name)); } else { string errStr = name + "(" + colType2String[ct] + ")"; errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); cerr << errStr << endl; throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); } break; } } } else { switch (ct) { case CalpontSystemCatalog::TINYINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UTINYINT: case CalpontSystemCatalog::USMALLINT: case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: case CalpontSystemCatalog::UDECIMAL: case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: { func.reset(new WF_percentile(id, name)); break; } case CalpontSystemCatalog::LONGDOUBLE: { func.reset(new WF_percentile(id, name)); break; } default: { string errStr = name + "(" + colType2String[ct] + ")"; errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); cerr << errStr << endl; throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); break; } } } return func; } template WindowFunctionType* WF_percentile::clone() const { return new WF_percentile(*this); } template void WF_percentile::resetData() { WindowFunctionType::resetData(); } template void WF_percentile::parseParms(const std::vector& parms) { // parms[0]: nve ConstantColumn* cc = dynamic_cast(parms[0].get()); if (cc != NULL) { fNveNull = false; fNve = cc->getDoubleVal(fRow, fNveNull); // row not used, no need to setData. if (!fNveNull && (fNve < 0 || fNve > 1)) { ostringstream oss; oss << fNve; throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_ARG_OUT_OF_RANGE, oss.str()), ERR_WF_ARG_OUT_OF_RANGE); } } // workaround for the within group order by column index idbassert(fPeer->fIndex.size() > 0); fFieldIndex.push_back(fPeer->fIndex[0]); } template void WF_percentile::operator()(int64_t b, int64_t e, int64_t c) { int64_t idx = fFieldIndex[1]; fRow.setData(getPointer(fRowData->at(b))); if (idx != -1) { if (idx != -1) { fNveNull = fRow.isNullValue(idx); implicit2T(idx, fNve, 0); if (!fNveNull && (fNve < 0 || fNve > 1)) { ostringstream oss; oss << fNve; throw IDBExcept(IDBErrorInfo::instance()->errorMsg(ERR_WF_ARG_OUT_OF_RANGE, oss.str()), ERR_WF_ARG_OUT_OF_RANGE); } } } if (fNveNull) { for (c = b; c <= e; c++) { if (c % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(c))); setValue(fRow.getColType(fFieldIndex[0]), b, e, c, (T*)NULL); } return; } idx = fFieldIndex[2]; int64_t rank = 0; int64_t dups = 0; int64_t b1 = -1; int64_t e1 = -1; scoped_array rk(new int64_t[e - b + 1]); for (c = b; c <= e; c++) { if (c % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(c))); if (fRow.isNullValue(idx)) continue; // ignore nulls if (b1 == -1) b1 = c; e1 = c; if (fFunctionId == WF__PERCENTILE_DISC) { // need cume_rank if (c != b && fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(c - 1)))) { dups++; } else { rank++; rank += dups; dups = 0; } rk[c - b] = rank; } } T* p = NULL; T v; int ct = (fFunctionId == WF__PERCENTILE_CONT) ? CalpontSystemCatalog::DOUBLE : fRow.getColType(idx); if (b1 != -1) { double cnt = (e1 - b1 + 1); if (fFunctionId == WF__PERCENTILE_CONT) { // @bug5820, this "rn" is the normalized row number, not the real row number. // Using real row number here will introduce a small calculation error in double result. double rn = fNve * (cnt - 1); double crn = ceil(rn); double frn = floor(rn); double vd = 0; if (crn == rn && rn == frn) { fRow.setData(getPointer(fRowData->at((size_t)rn + (size_t)b1))); implicit2T(idx, vd, 0); } else { double cv = 0.0, fv = 0.0; fRow.setData(getPointer(fRowData->at((size_t)frn + (size_t)b1))); implicit2T(idx, fv, 0); fRow.setData(getPointer(fRowData->at((size_t)crn + (size_t)b1))); implicit2T(idx, cv, 0); vd = (crn - rn) * fv + (rn - frn) * cv; } double tempvd[2]; tempvd[0] = vd; tempvd[1] = 0; v = *(reinterpret_cast(&tempvd[0])); // old code that referred to 'vd' var triggered partial out-of-bounds warning. p = &v; } else // (fFunctionId == WF__PERCENTILE_DISC) { int prevRank = ++rank + dups; double cumeDist = 1; fRow.setData(getPointer(fRowData->at(e1))); for (c = e1; c >= b1; c--) { int currRank = rk[c - b]; if (currRank != prevRank) { cumeDist = ((double)(prevRank - 1)) / cnt; if (cumeDist < fNve) break; prevRank = currRank; } } c++; fRow.setData(getPointer(fRowData->at(c))); getValue(idx, v); p = &v; } } for (c = b; c <= e; c++) { if (c % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(c))); setValue(ct, b, e, c, p); } } template boost::shared_ptr WF_percentile::makeFunction(int, const string&, int, WindowFunctionColumn*); } // namespace windowfunction