Merge branch 'bb-10.4-release' into bb-10.5-release

2025-07-29 05:21:33 +03:00 · 2023-05-02 13:54:54 +02:00
parent d821fd7fab edd0b03e60
commit edf8ce5b97
72 changed files with 1109 additions and 474 deletions
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@ -3901,50 +3901,16 @@ double Histogram::point_selectivity(double pos, double avg_sel)
  }
  else
  {
-    /* 
+    /*
      The value 'pos' fits within one single histogram bucket.

-      Histogram buckets have the same numbers of rows, but they cover
-      different ranges of values.
-
-      We assume that values are uniformly distributed across the [0..1] value
-      range.
-    */
-
-    /* 
-      If all buckets covered value ranges of the same size, the width of
-      value range would be:
+      We also have avg_sel which is per-table average selectivity of col=const.
+      If there are popular values, this may be larger than one bucket, so 
+      cap the returned number by the selectivity of one bucket.
    */
    double avg_bucket_width= 1.0 / (get_width() + 1);
-    
-    /*
-      Let's see what is the width of value range that our bucket is covering.
-        (min==max currently. they are kept in the formula just in case we 
-         will want to extend it to handle multi-bucket case)
-    */
-    double inv_prec_factor= (double) 1.0 / prec_factor(); 
-    double current_bucket_width= 
-        (max + 1 == get_width() ?  1.0 : (get_value(max) * inv_prec_factor)) -
-        (min == 0 ?  0.0 : (get_value(min-1) * inv_prec_factor));

-    DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
-
-    /*
-      So:
-      - each bucket has the same #rows 
-      - values are unformly distributed across the [min_value,max_value] domain.
-
-      If a bucket has value range that's N times bigger then average, than
-      each value will have to have N times fewer rows than average.
-    */
-    sel= avg_sel * avg_bucket_width / current_bucket_width;
-
-    /*
-      (Q: if we just follow this proportion we may end up in a situation
-      where number of different values we expect to find in this bucket
-      exceeds the number of rows that this histogram has in a bucket. Are 
-      we ok with this or we would want to have certain caps?)
-    */
+    sel= MY_MIN(avg_bucket_width, avg_sel);
  }
  return sel;
 }