1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00
Files
mariadb-columnstore-engine/dbcon/joblist/rowestimator.h
2022-01-21 16:43:49 +00:00

152 lines
5.6 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id: rowestimator.h 5449 2009-06-19 19:58:27Z wweeks $
*
*
***********************************************************************/
/** @file */
#pragma once
#include <boost/shared_ptr.hpp>
#include "joblisttypes.h"
#include "calpontsystemcatalog.h"
#include "columncommand-jl.h"
#include "brmtypes.h"
#include "bytestream.h"
#include <iostream>
#include <vector>
#include "brm.h"
namespace joblist
{
/** @brief estimates row counts for a TupleBPS.
*
* Class RowEstimator uses Casual Partitioning information and the filter string pertaining to a particular
* TupleBPS object to estimate cardinality. It is used to determine which table to use as the large side
* table in a multijoin operation.
*/
class RowEstimator
{
public:
/** @brief ctor
*/
RowEstimator()
: fExtentsToSample(20)
, fIntDistinctAdjust(1)
, fDecDistinctAdjust(1)
, fChar1DistinctAdjust(1)
, fChar2Thru7DistinctAdjust(1)
, fDictDistinctAdjust(1)
, fDateDistinctAdjust(1)
{
}
/** @brief estimate the number of rows that will be returned for a particular tuple batch primitive step.
*
* @param cpColVec vector of ColumnCommandJL pointers associated to the step.
* @param scanFlags vector of flags with one entry per extent populated by the casual
* partitioning evaluation, each that will be scanned are true, the ones that
* were eliminated by casual partitioining are false.
* @param dbrm DBRM object used to get the HWM.
* @parm oid The objectid for the first column in the step.
*
*/
uint64_t estimateRows(const std::vector<ColumnCommandJL*>& cpColVec, const std::vector<bool>& scanFlags,
BRM::DBRM& dbrm, const execplan::CalpontSystemCatalog::OID oid);
/** @brief Estimate the number of rows that will be returned for a particular table given
* a ColumnCommandJL for non casual partitioning column. Added for bug 3503.
*
* @param colCmd The ColumnCommandJL.
*
*/
uint64_t estimateRowsForNonCPColumn(ColumnCommandJL& colCmd);
private:
/** @brief adjusts column values so that they can be compared via ranges.
*
* This function provides a value for dates, datetimes, and strings that can be used for distinct value
* estimation and comparisons.
*
* @param ct The column type.
* @param value The column value.
*
*/
uint64_t adjustValue(const execplan::CalpontSystemCatalog::ColType& ct, const uint64_t& value);
uint32_t daysThroughMonth(uint32_t mth);
template <typename T>
uint32_t estimateDistinctValues(const execplan::CalpontSystemCatalog::ColType& ct, const T& min,
const T& max, const char cpStatus);
/** @brief returns a factor between 0 and 1 for the estimate of rows that will qualify the given individual
* operation.
*
* This function works for a single operation such as "col1 = 5".
*
* @param ct The column type.
* @param min The minimum value in the range.
* @param max The maximum value in the range.
* @parm cpStatus The status of the CP data (whether it's valid).
*
*/
template <class T>
float estimateOpFactor(const T& min, const T& max, const T& value, char op, uint8_t lcf,
uint32_t distinctValues, char cpStatus,
const execplan::CalpontSystemCatalog::ColType& ct);
/** @brief returns a factor between 0 and 1 for the estimate of rows that will qualify
* the given operation(s).
*
* This function works for multiple operations against the same column such as
* "col1 = 5 or col1 = 10". It calls estimateOpFactor for each individual operation.
*
* @param emEntry The extent map entry for the extent being evaluated.
* @param msgDataPtr The filter string.
* @param ct The column type.
* @param BOP The binary operator for the filter predicates (eg. OR for col1 = 5 or col1 = 10)
* @param rowsInExtent The number of rows in the extent being evaluated.
*
*/
float estimateRowReturnFactor(const BRM::EMEntry& emEntry, const messageqcpp::ByteStream* msgDataPtr,
const uint16_t NOPS, const execplan::CalpontSystemCatalog::ColType& ct,
const uint8_t BOP, const uint32_t& rowsInExtent);
// Configurables read from Columnstore.xml - future.
uint32_t fExtentsToSample;
uint32_t fIntDistinctAdjust;
uint32_t fDecDistinctAdjust;
uint32_t fChar1DistinctAdjust;
uint32_t fChar2Thru7DistinctAdjust;
uint32_t fDictDistinctAdjust;
uint32_t fDateDistinctAdjust;
static const uint32_t fRowsPerExtent = 8192 * 1024;
static const uint32_t fBlockSize = 8192; // Block size in bytes.
// Limits the number of comparisons for each extent. Example, in clause w/ 1000 values will limit the
// checks to the number below.
static const uint32_t fMaxComparisons = 10;
}; // RowEstimator
} // namespace joblist