Fix for BUG#5117:

* Renamed handler::estimate_number_of_rows to handler::estimate_rows_upper_bound function, which can also return "unknown" * made filesort to use full sort buffer if number of rows to sort is not known.
2025-07-30 16:24:05 +03:00 · 2004-09-13 06:14:25 +04:00
parent 0afcaac5a8
commit 13fdbc99cf
7 changed files with 102 additions and 15 deletions
--- a/sql/examples/ha_tina.h
+++ b/sql/examples/ha_tina.h
@ -90,6 +90,12 @@ class ha_tina: public handler
  /* The next method will never be called */
  virtual double read_time(ha_rows rows) { DBUG_ASSERT(0); return((double) rows /  20.0+1); }
  virtual bool fast_key_read() { return 1;}
+  /* 
+    TODO: return actual upper bound of number of records in the table.
+    (e.g. save number of records seen on full table scan and/or use file size
+    as upper bound)
+  */
+  ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; }

  int open(const char *name, int mode, uint test_if_locked);
  int close(void);
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@ -169,7 +169,13 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
  else
 #endif
  {
-    records=table->file->estimate_number_of_rows();
+    records= table->file->estimate_rows_upper_bound();
+    /*
+      If number of records is not known, use as much of sort buffer 
+      as possible. 
+    */
+    if (records == HA_POS_ERROR)
+      records--;  // we use 'records+1' below.
    selected_records_file= 0;
  }

@ -315,7 +321,7 @@ static char **make_char_array(register uint fields, uint length, myf my_flag)
 } /* make_char_array */


-	/* Read all buffer pointers into memory */
+/* Read 'count' number of buffer pointers into memory */

 static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count)
 {
@ -336,8 +342,40 @@ static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count)
 }


+/* 
+  Search after sort_keys and write them into tempfile.
+  SYNOPSIS
+    find_all_keys()
+      param             Sorting parameter
+      select            Use this to get source data
+      sort_keys         Array of pointers to sort key + addon buffers.
+      buffpek_pointers  File to write BUFFPEKs describing sorted segments
+                        in tempfile.
+      tempfile          File to write sorted sequences of sortkeys to.
+      indexfile         If !NULL, use it for source data (contains rowids)
  
-	/* Search after sort_keys and place them in a temp. file */
+  NOTE
+    Basic idea:
+      while (get_next_sortkey())
+      {
+        if (no free space in sort_keys buffers) 
+        {
+          sort sort_keys buffer;
+          dump sorted sequence to 'tempfile';
+          dump BUFFPEK describing sequence location into 'buffpek_pointers';
+        }
+        put sort key into 'sort_keys';
+      }
+      if (sort_keys has some elements && dumped at least once)
+        sort-dump-dump as above;
+      else
+        don't sort, leave sort_keys array to be sorted by caller.
+    
+     All produced sequences are guaranteed to be non-empty.
+  RETURN
+    Number of records written on success.
+    HA_POS_ERROR on error.
+*/

 static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 			     uchar **sort_keys,
@ -452,7 +490,25 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 } /* find_all_keys */


-	/* Skriver en buffert med nycklar till filen */
+/*
+  Sort the buffer and write:
+    1) the sorted sequence to tempfile
+    2) a BUFFPEK describing the sorted sequence position to buffpek_pointers
+  (was: Skriver en buffert med nycklar till filen)
+  SYNOPSIS
+    write_keys()
+      param             Sort parameters
+      sort_keys         Array of pointers to keys to sort
+      count             Number of elements in sort_keys array 
+      buffpek_pointers  One 'BUFFPEK' struct will be written into this file.
+                        The BUFFPEK::{file_pos, count} will indicate where 
+                        the sorted data was stored.
+      tempfile          The sorted sequence will be written into this file.
+    
+  RETURN
+    0 OK
+    1 Error
+*/

 static int
 write_keys(SORTPARAM *param, register uchar **sort_keys, uint count,
@ -784,7 +840,21 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,


 /* 
-   Merge buffers to one buffer 
+  Merge buffers to one buffer
+  SYNOPSIS
+    merge_buffers()
+      param        Sort parameter
+      from_file    File with source data (BUFFPEKs point to this file)
+      to_file      File to write the sorted result data.
+      sort_buffer  Buffer for data to store up to MERGEBUFF2 sort keys.
+      lastbuff     OUT Store here BUFFPEK describing data written to to_file                   
+      Fb           First element in source BUFFPEKs array
+      Tb           Last element in source BUFFPEKs array
+      flag
+
+  RETURN
+    0     - OK
+    other - error
 */

 int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
@ -822,6 +892,9 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
  strpos= (uchar*) sort_buffer;
  org_max_rows=max_rows= param->max_rows;

+  /* The following will fire if there is not enough space in sort_buffer */
+  DBUG_ASSERT(maxcount!=0);
+  
  if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
                 (queue_compare) (cmp= get_ptr_compare(sort_length)),
                 (void*) &sort_length))
--- a/sql/ha_berkeley.cc
+++ b/sql/ha_berkeley.cc
@ -25,7 +25,7 @@
    We will need an updated Berkeley DB version for this.
  - Killing threads that has got a 'deadlock'
  - SHOW TABLE STATUS should give more information about the table.
-  - Get a more accurate count of the number of rows (estimate_number_of_rows()).
+  - Get a more accurate count of the number of rows (estimate_rows_upper_bound()).
    We could store the found number of rows when the table is scanned and
    then increment the counter for each attempted write.
  - We will need to extend the manager thread to makes checkpoints at
@ -63,7 +63,7 @@
 #define HA_BERKELEY_ROWS_IN_TABLE 10000 /* to get optimization right */
 #define HA_BERKELEY_RANGE_COUNT   100
 #define HA_BERKELEY_MAX_ROWS	  10000000 /* Max rows in table */
-/* extra rows for estimate_number_of_rows() */
+/* extra rows for estimate_rows_upper_bound() */
 #define HA_BERKELEY_EXTRA_ROWS	  100

 /* Bits for share->status */
@ -2556,7 +2556,7 @@ end:
  Used when sorting to allocate buffers and by the optimizer.
 */

-ha_rows ha_berkeley::estimate_number_of_rows()
+ha_rows ha_berkeley::estimate_rows_upper_bound()
 {
  return share->rows + HA_BERKELEY_EXTRA_ROWS;
 }
--- a/sql/ha_berkeley.h
+++ b/sql/ha_berkeley.h
@ -100,7 +100,7 @@ class ha_berkeley: public handler
  ulong table_flags(void) const { return int_table_flags; }
  uint max_supported_keys()        const { return MAX_KEY-1; }
  uint extra_rec_buf_length()	 { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
-  ha_rows estimate_number_of_rows();
+  ha_rows estimate_rows_upper_bound();
  const key_map *keys_to_use_for_scanning() { return &key_map_full; }
  bool has_transactions()  { return 1;}

--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@ -4115,7 +4115,7 @@ Gives an UPPER BOUND to the number of rows in a table. This is used in
 filesort.cc. */

 ha_rows
-ha_innobase::estimate_number_of_rows(void)
+ha_innobase::estimate_rows_upper_bound(void)
 /*======================================*/
 			/* out: upper bound of rows */
 {
@ -4124,7 +4124,7 @@ ha_innobase::estimate_number_of_rows(void)
 	ulonglong	estimate;
 	ulonglong	local_data_file_length;

- 	DBUG_ENTER("estimate_number_of_rows");
+ 	DBUG_ENTER("estimate_rows_upper_bound");

 	/* We do not know if MySQL can call this function before calling
 	external_lock(). To be safe, update the thd of the current table
@ -4204,7 +4204,7 @@ ha_innobase::read_time(

 	time_for_scan = scan_time();

-	if ((total_rows = estimate_number_of_rows()) < rows)
+	if ((total_rows = estimate_rows_upper_bound()) < rows)
 	  return time_for_scan;

 	return (ranges + (double) rows / (double) total_rows * time_for_scan);
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@ -150,7 +150,7 @@ class ha_innobase: public handler

  	void position(byte *record);
  	ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key);
-	ha_rows estimate_number_of_rows();
+	ha_rows estimate_rows_upper_bound();

  	int create(const char *name, register TABLE *form,
 					HA_CREATE_INFO *create_info);
--- a/sql/handler.h
+++ b/sql/handler.h
@ -300,7 +300,15 @@ public:
  virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
  virtual bool has_transactions(){ return 0;}
  virtual uint extra_rec_buf_length() { return 0; }
-  virtual ha_rows estimate_number_of_rows() { return records+EXTRA_RECORDS; }
+  
+  /*
+    Return upper bound of current number of records in the table
+    (max. of how many records one will retrieve when doing a full table scan)
+    If upper bound is not known, HA_POS_ERROR should be returned as a max
+    possible upper bound.
+  */
+  virtual ha_rows estimate_rows_upper_bound()
+  { return records+EXTRA_RECORDS; }

  virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";}