/* Copyright (C) 2014 InfiniDB, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /*********************************************************************** * $Id: rowestimator.h 5449 2009-06-19 19:58:27Z wweeks $ * * ***********************************************************************/ /** @file */ #pragma once #include #include "joblisttypes.h" #include "calpontsystemcatalog.h" #include "columncommand-jl.h" #include "brmtypes.h" #include "bytestream.h" #include #include #include "brm.h" namespace joblist { /** @brief estimates row counts for a TupleBPS. * * Class RowEstimator uses Casual Partitioning information and the filter string pertaining to a particular * TupleBPS object to estimate cardinality. It is used to determine which table to use as the large side * table in a multijoin operation. */ class RowEstimator { public: /** @brief ctor */ RowEstimator() : fExtentsToSample(20) , fIntDistinctAdjust(1) , fDecDistinctAdjust(1) , fChar1DistinctAdjust(1) , fChar2Thru7DistinctAdjust(1) , fDictDistinctAdjust(1) , fDateDistinctAdjust(1) { } /** @brief estimate the number of rows that will be returned for a particular tuple batch primitive step. * * @param cpColVec vector of ColumnCommandJL pointers associated to the step. * @param scanFlags vector of flags with one entry per extent populated by the casual * partitioning evaluation, each that will be scanned are true, the ones that * were eliminated by casual partitioining are false. * @param dbrm DBRM object used to get the HWM. * @parm oid The objectid for the first column in the step. * */ uint64_t estimateRows(const std::vector& cpColVec, const std::vector& scanFlags, BRM::DBRM& dbrm, const execplan::CalpontSystemCatalog::OID oid); /** @brief Estimate the number of rows that will be returned for a particular table given * a ColumnCommandJL for non casual partitioning column. Added for bug 3503. * * @param colCmd The ColumnCommandJL. * */ uint64_t estimateRowsForNonCPColumn(ColumnCommandJL& colCmd); private: /** @brief adjusts column values so that they can be compared via ranges. * * This function provides a value for dates, datetimes, and strings that can be used for distinct value * estimation and comparisons. * * @param ct The column type. * @param value The column value. * */ uint64_t adjustValue(const execplan::CalpontSystemCatalog::ColType& ct, const uint64_t& value); uint32_t daysThroughMonth(uint32_t mth); template uint32_t estimateDistinctValues(const execplan::CalpontSystemCatalog::ColType& ct, const T& min, const T& max, const char cpStatus); /** @brief returns a factor between 0 and 1 for the estimate of rows that will qualify the given individual * operation. * * This function works for a single operation such as "col1 = 5". * * @param ct The column type. * @param min The minimum value in the range. * @param max The maximum value in the range. * @parm cpStatus The status of the CP data (whether it's valid). * */ template float estimateOpFactor(const T& min, const T& max, const T& value, char op, uint8_t lcf, uint32_t distinctValues, char cpStatus, const execplan::CalpontSystemCatalog::ColType& ct); /** @brief returns a factor between 0 and 1 for the estimate of rows that will qualify * the given operation(s). * * This function works for multiple operations against the same column such as * "col1 = 5 or col1 = 10". It calls estimateOpFactor for each individual operation. * * @param emEntry The extent map entry for the extent being evaluated. * @param msgDataPtr The filter string. * @param ct The column type. * @param BOP The binary operator for the filter predicates (eg. OR for col1 = 5 or col1 = 10) * @param rowsInExtent The number of rows in the extent being evaluated. * */ float estimateRowReturnFactor(const BRM::EMEntry& emEntry, const messageqcpp::ByteStream* msgDataPtr, const uint16_t NOPS, const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP, const uint32_t& rowsInExtent); // Configurables read from Columnstore.xml - future. uint32_t fExtentsToSample; uint32_t fIntDistinctAdjust; uint32_t fDecDistinctAdjust; uint32_t fChar1DistinctAdjust; uint32_t fChar2Thru7DistinctAdjust; uint32_t fDictDistinctAdjust; uint32_t fDateDistinctAdjust; static const uint32_t fRowsPerExtent = 8192 * 1024; static const uint32_t fBlockSize = 8192; // Block size in bytes. // Limits the number of comparisons for each extent. Example, in clause w/ 1000 values will limit the // checks to the number below. static const uint32_t fMaxComparisons = 10; }; // RowEstimator } // namespace joblist