diff --git a/main.mk b/main.mk index a194c48af8..e9e01b9a96 100644 --- a/main.mk +++ b/main.mk @@ -367,6 +367,19 @@ fulltest: testfixture$(EXE) sqlite3$(EXE) crashtest test: testfixture$(EXE) sqlite3$(EXE) ./testfixture$(EXE) $(TOP)/test/quick.test +sqlite3_analyzer$(EXE): $(TOP)/src/tclsqlite.c libsqlite3.a $(TESTSRC) \ + $(TOP)/tool/spaceanal.tcl + sed \ + -e '/^#/d' \ + -e 's,\\,\\\\,g' \ + -e 's,",\\",g' \ + -e 's,^,",' \ + -e 's,$$,\\n",' \ + $(TOP)/tool/spaceanal.tcl >spaceanal_tcl.h + $(TCCX) $(TCL_FLAGS) -DTCLSH=2 -DSQLITE_TEST=1 -static -o \ + sqlite3_analyzer$(EXE) $(TESTSRC) $(TOP)/src/tclsqlite.c \ + libsqlite3.a $(LIBTCL) $(THREADLIB) + # Rules used to build documentation # arch.html: $(TOP)/www/arch.tcl diff --git a/manifest b/manifest index 0ca0ce9110..b0e9b76d97 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Home\spage\supdates.\s(CVS\s1861) -D 2004-07-22T19:06:32 +C First\scut\sat\sa\sanalysis\stool\sfor\sversion\s3.0\sdatabases.\s(CVS\s1862) +D 2004-07-23T00:01:39 F Makefile.in 4a5e570a9e2d35b09c31b3cf01b78cea764ade4b F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -17,7 +17,7 @@ F doc/lemon.html f0f682f50210928c07e562621c3b7e8ab912a538 F doc/report1.txt a031aaf37b185e4fa540223cb516d3bccec7eeac F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 F ltmain.sh f6b283068efa69f06eb8aa1fe4bddfdbdeb35826 -F main.mk 1957885390af52f5a69679bb43ec4cd8904c7a9f +F main.mk 430cb919fb25515058186674ea0ab7d27fa19c95 F mkdll.sh 68d34a961a1fdfa15ef27fc4f4740be583112124 F publish.sh fa3c5ad1a6ac59925f2b37e422db378a7bec621c F spec.template b2f6c4e488cbc3b993a57deba22cbc36203c4da3 @@ -27,8 +27,8 @@ F sqlite3.def 7610bb4092dcfa7db8fe6d9a92d3e51adce23566 F sqlite3.pc.in 985b9bf34192a549d7d370e0f0b6b34a4f61369a F src/attach.c 784456629b3d7e50e4691f496700658fd1f16441 F src/auth.c 60db23b98bb94c8b0178180faaf49dc116674217 -F src/btree.c 9f1727185200c5b9488f3e99a1bdb250f841c876 -F src/btree.h fab5e800b5d91a5700c0b344d711b98de6f7ad0e +F src/btree.c edf4ece708350dec7f28ebd4620c6d33afe6993a +F src/btree.h 94dfec0a1722d33359b23e7e310f2b64ffedf029 F src/build.c 3a1356286569266873d5d1c910e7797a3bfd5761 F src/date.c e1bb384a7856c18dce9cadb0afbe6934ba5ddb00 F src/delete.c e81545e546f6bc87d7508a93a09ca70695265af3 @@ -62,10 +62,10 @@ F src/shell.c 93c96c847228c02fb84bb381875d87ee71fbbeb4 F src/sqlite.h.in 80de11cde2c9f78eff4dab0aad1eb5196d6e2a3f F src/sqliteInt.h 049cb4f716f7b7ea3657f26c7e26a4f821aca53c F src/table.c 4521c278892f60e4d630788c0ea5cf4db1e75c49 -F src/tclsqlite.c 8652b55cac11dc1025cd4a336180330ebade3cd7 +F src/tclsqlite.c 3ce001b3c301876a9c8163472077a4c10e0d49f3 F src/test1.c ef00096c283ccfec1b2ae5fdaccb85fb06e24281 F src/test2.c f4c2f3928f1998fd8cb75a81e33a60e025ea85d4 -F src/test3.c 8576bb977937265e2c1a4d3fab3793e1974153e8 +F src/test3.c 94d0a2a90bccd85802488cb42c69ec8afd2e4646 F src/test4.c a921a69821fd30209589228e64f94e9f715b6fe2 F src/test5.c b001fa7f1b9e2dc5c2331de62fc641b5ab2bd7a1 F src/tokenize.c 900374b6b37f04748bcd48c2d29a41c251542935 @@ -196,6 +196,7 @@ F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816 F tool/showdb.c 3559eac5a3b46e9b558d50856946b25e77633236 F tool/showjournal.c ec3b171be148656827c4949fbfb8ab4370822f87 F tool/space_used.tcl f714c41a59e326b8b9042f415b628b561bafa06b +F tool/spaceanal.tcl c8c39c466fbbc01dab10fc6c4a816db3c8168ab3 F tool/speedtest.tcl 06c76698485ccf597b9e7dbb1ac70706eb873355 F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F www/arch.fig d5f9752a4dbf242e9cfffffd3f5762b6c63b3bcf @@ -239,7 +240,7 @@ F www/tclsqlite.tcl 06a86cba4d7fc88e2bcd633b57702d3d16abebb5 F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9 F www/version3.tcl 092a01f5ef430d2c4acc0ae558d74c4bb89638a0 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P 068b15ae2a5187d9fc1e88c8c3fd21d647733788 -R fc88a818b5909dc4d4132616865487d0 +P 15774aab7d20eb4dfcb47378c8255e26b8cbbe19 +R 1451250ebcf80a0882a7e76f969566bf U drh -Z 93b763a273687565702f371a2eb11088 +Z af23712a2660f12a62a563c7e13af761 diff --git a/manifest.uuid b/manifest.uuid index 172feaa5dc..0cd7ca1e25 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -15774aab7d20eb4dfcb47378c8255e26b8cbbe19 \ No newline at end of file +7c7f698b2eda7b1b34f5ca7fe104145d8bbd8f56 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 5790ddf787..19162598c1 100644 --- a/src/btree.c +++ b/src/btree.c @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** $Id: btree.c,v 1.179 2004/07/22 02:40:38 drh Exp $ +** $Id: btree.c,v 1.180 2004/07/23 00:01:39 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to @@ -3901,31 +3901,44 @@ int sqlite3BtreePageDump(Btree *pBt, int pgno, int recursive){ ** aResult[0] = The page number ** aResult[1] = The entry number ** aResult[2] = Total number of entries on this page -** aResult[3] = Size of this entry +** aResult[3] = Cell size (local payload + header) ** aResult[4] = Number of free bytes on this page ** aResult[5] = Number of free blocks on the page -** aResult[6] = Page number of the left child of this entry -** aResult[7] = Page number of the right child for the whole page +** aResult[6] = Total payload size (local + overflow) +** aResult[7] = Header size in bytes +** aResult[8] = Local payload size +** aResult[9] = Parent page number ** ** This routine is used for testing and debugging only. */ -int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult){ +int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult, int upCnt){ int cnt, idx; MemPage *pPage = pCur->pPage; + BtCursor tmpCur; pageIntegrity(pPage); assert( pPage->isInit ); + getTempCursor(pCur, &tmpCur); + while( upCnt-- ){ + moveToParent(&tmpCur); + } + pPage = tmpCur.pPage; + pageIntegrity(pPage); aResult[0] = sqlite3pager_pagenumber(pPage->aData); assert( aResult[0]==pPage->pgno ); - aResult[1] = pCur->idx; + aResult[1] = tmpCur.idx; aResult[2] = pPage->nCell; - if( pCur->idx>=0 && pCur->idxnCell ){ - u8 *pCell = findCell(pPage, pCur->idx); - aResult[3] = cellSizePtr(pPage, pCell); - aResult[6] = pPage->leaf ? 0 : get4byte(pCell); + if( tmpCur.idx>=0 && tmpCur.idxnCell ){ + getCellInfo(&tmpCur); + aResult[3] = tmpCur.info.nSize; + aResult[6] = tmpCur.info.nData; + aResult[7] = tmpCur.info.nHeader; + aResult[8] = tmpCur.info.nLocal; }else{ aResult[3] = 0; aResult[6] = 0; + aResult[7] = 0; + aResult[8] = 0; } aResult[4] = pPage->nFree; cnt = 0; @@ -3935,7 +3948,12 @@ int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult){ idx = get2byte(&pPage->aData[idx]); } aResult[5] = cnt; - aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+8]); + if( pPage->pParent==0 || isRootPage(pPage) ){ + aResult[9] = 0; + }else{ + aResult[9] = pPage->pParent->pgno; + } + releaseTempCursor(&tmpCur); return SQLITE_OK; } #endif diff --git a/src/btree.h b/src/btree.h index 6b08c953cd..3f247b77ae 100644 --- a/src/btree.h +++ b/src/btree.h @@ -13,7 +13,7 @@ ** subsystem. See comments in the source code for a detailed description ** of what each interface routine does. ** -** @(#) $Id: btree.h,v 1.57 2004/07/22 02:40:38 drh Exp $ +** @(#) $Id: btree.h,v 1.58 2004/07/23 00:01:39 drh Exp $ */ #ifndef _BTREE_H_ #define _BTREE_H_ @@ -115,7 +115,7 @@ struct Pager *sqlite3BtreePager(Btree*); #ifdef SQLITE_TEST -int sqlite3BtreeCursorInfo(BtCursor*, int*); +int sqlite3BtreeCursorInfo(BtCursor*, int*, int); void sqlite3BtreeCursorList(Btree*); int sqlite3BtreePageDump(Btree*, int, int recursive); #endif diff --git a/src/tclsqlite.c b/src/tclsqlite.c index b2953f9c3c..6e92f404ee 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -11,7 +11,7 @@ ************************************************************************* ** A TCL Interface to SQLite ** -** $Id: tclsqlite.c,v 1.96 2004/07/22 02:40:39 drh Exp $ +** $Id: tclsqlite.c,v 1.97 2004/07/23 00:01:39 drh Exp $ */ #ifndef NO_TCL /* Omit this whole file if TCL is unavailable */ @@ -1131,21 +1131,17 @@ int Tclsqlite3_SafeInit(Tcl_Interp *interp){ return TCL_OK; } -#if 0 -/* -** If compiled using mktclapp, this routine runs to initialize -** everything. +#ifdef TCLSH +/***************************************************************************** +** The code that follows is used to build standalone TCL interpreters */ -int Et_AppInit(Tcl_Interp *interp){ - return Sqlite3_Init(interp); -} -#endif /* -** If the macro TCLSH is defined and is one, then put in code for the -** "main" routine that will initialize Tcl. +** If the macro TCLSH is one, then put in code this for the +** "main" routine that will initialize Tcl and take input from +** standard input. */ -#if defined(TCLSH) && TCLSH==1 +#if TCLSH==1 static char zMainloop[] = "set line {}\n" "while {![eof stdin]} {\n" @@ -1168,6 +1164,17 @@ static char zMainloop[] = "}\n" "}\n" ; +#endif + +/* +** If the macro TCLSH is two, then get the main loop code out of +** the separate file "spaceanal_tcl.h". +*/ +#if TCLSH==2 +static char zMainloop[] = +#include "spaceanal_tcl.h" +; +#endif #define TCLSH_MAIN main /* Needed to fake out mktclapp */ int TCLSH_MAIN(int argc, char **argv){ @@ -1191,7 +1198,7 @@ int TCLSH_MAIN(int argc, char **argv){ Md5_Init(interp); } #endif - if( argc>=2 ){ + if( argc>=2 || TCLSH==2 ){ int i; Tcl_SetVar(interp,"argv0",argv[1],TCL_GLOBAL_ONLY); Tcl_SetVar(interp,"argv", "", TCL_GLOBAL_ONLY); @@ -1199,13 +1206,14 @@ int TCLSH_MAIN(int argc, char **argv){ Tcl_SetVar(interp, "argv", argv[i], TCL_GLOBAL_ONLY | TCL_LIST_ELEMENT | TCL_APPEND_VALUE); } - if( Tcl_EvalFile(interp, argv[1])!=TCL_OK ){ + if( TCLSH==1 && Tcl_EvalFile(interp, argv[1])!=TCL_OK ){ const char *zInfo = Tcl_GetVar(interp, "errorInfo", TCL_GLOBAL_ONLY); if( zInfo==0 ) zInfo = interp->result; fprintf(stderr,"%s: %s\n", *argv, zInfo); return 1; } - }else{ + } + if( argc<=1 || TCLSH==2 ){ Tcl_GlobalEval(interp, zMainloop); } return 0; diff --git a/src/test3.c b/src/test3.c index 9904f36e78..fc009673ba 100644 --- a/src/test3.c +++ b/src/test3.c @@ -13,7 +13,7 @@ ** is not included in the SQLite library. It is used for automated ** testing of the SQLite library. ** -** $Id: test3.c,v 1.49 2004/07/22 01:19:35 drh Exp $ +** $Id: test3.c,v 1.50 2004/07/23 00:01:39 drh Exp $ */ #include "sqliteInt.h" #include "pager.h" @@ -1162,19 +1162,21 @@ static int btree_payload_size( } /* -** Usage: btree_cursor_info ID +** Usage: btree_cursor_info ID ?UP-CNT? ** -** Return eight integers containing information about the entry the +** Return integers containing information about the entry the ** cursor is pointing to: ** ** aResult[0] = The page number ** aResult[1] = The entry number ** aResult[2] = Total number of entries on this page -** aResult[3] = Size of this entry +** aResult[3] = Cell size (local payload + header) ** aResult[4] = Number of free bytes on this page ** aResult[5] = Number of free blocks on the page -** aResult[6] = Page number of the left child of this entry -** aResult[7] = Page number of the right child for the whole page +** aResult[6] = Total payload size (local + overflow) +** aResult[7] = Header size in bytes +** aResult[8] = Local payload size +** aResult[9] = Parent page number */ static int btree_cursor_info( void *NotUsed, @@ -1185,16 +1187,22 @@ static int btree_cursor_info( BtCursor *pCur; int rc; int i, j; - int aResult[8]; + int up; + int aResult[10]; char zBuf[400]; - if( argc!=2 ){ + if( argc!=2 && argc!=3 ){ Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], - " ID\"", 0); + " ID ?UP-CNT?\"", 0); return TCL_ERROR; } if( Tcl_GetInt(interp, argv[1], (int*)&pCur) ) return TCL_ERROR; - rc = sqlite3BtreeCursorInfo(pCur, aResult); + if( argc==3 ){ + if( Tcl_GetInt(interp, argv[2], &up) ) return TCL_ERROR; + }else{ + up = 0; + } + rc = sqlite3BtreeCursorInfo(pCur, aResult, up); if( rc ){ Tcl_AppendResult(interp, errorName(rc), 0); return TCL_ERROR; diff --git a/tool/spaceanal.tcl b/tool/spaceanal.tcl new file mode 100644 index 0000000000..e42fb28de4 --- /dev/null +++ b/tool/spaceanal.tcl @@ -0,0 +1,559 @@ +# Run this TCL script using "testfixture" in order get a report that shows +# how much disk space is used by a particular data to actually store data +# versus how much space is unused. +# + +# Get the name of the database to analyze +# +set argv $argv0 +if {[llength $argv]!=1} { + puts stderr "Usage: $argv0 database-name" + exit 1 +} +set file_to_analyze [lindex $argv 0] +if {![file exists $file_to_analyze]} { + puts stderr "No such file: $file_to_analyze" + exit 1 +} +if {![file readable $file_to_analyze]} { + puts stderr "File is not readable: $file_to_analyze" + exit 1 +} +if {[file size $file_to_analyze]<512} { + puts stderr "Empty or malformed database: $file_to_analyze" + exit 1 +} + +# Open the database +# +sqlite3 db [lindex $argv 0] +set DB [btree_open [lindex $argv 0] 1000 0] + +# In-memory database for collecting statistics +# +sqlite3 mem :memory: +set tabledef\ +{CREATE TABLE space_used( + name clob, -- Name of a table or index in the database file + tblname clob, -- Name of associated table + is_index boolean, -- TRUE if it is an index, false for a table + nentry int, -- Number of entries in the BTree + leaf_entries int, -- Number of leaf entries + payload int, -- Total amount of data stored in this table or index + ovfl_payload int, -- Total amount of data stored on overflow pages + ovfl_cnt int, -- Number of entries that use overflow + mx_payload int, -- Maximum payload size + int_pages int, -- Number of interior pages used + leaf_pages int, -- Number of leaf pages used + ovfl_pages int, -- Number of overflow pages used + int_unused int, -- Number of unused bytes on interior pages + leaf_unused int, -- Number of unused bytes on primary pages + ovfl_unused int -- Number of unused bytes on overflow pages +);} +mem eval $tabledef + +# This query will be used to find the root page number for every table +# in the database. +# +set sql { + SELECT name, rootpage + FROM sqlite_master WHERE type='table' + UNION ALL + SELECT 'sqlite_master', 1 + ORDER BY 1 +} + +# Quote a string for SQL +# +proc quote txt { + regsub -all ' $txt '' q + return '$q' +} + +# Analyze every table in the database, one at a time. +# +set pageSize [db eval {PRAGMA page_size}] +foreach {name rootpage} [db eval $sql] { + puts stderr "Analyzing table $name..." + set cursor [btree_cursor $DB $rootpage 0] + set go [btree_first $cursor] + catch {unset seen} + set total_payload 0 ;# Payload space used by all entries + set total_ovfl 0 ;# Payload space on overflow pages + set unused_int 0 ;# Unused space on interior nodes + set unused_leaf 0 ;# Unused space on leaf nodes + set unused_ovfl 0 ;# Unused space on overflow pages + set cnt_ovfl 0 ;# Number of entries that use overflows + set cnt_leaf_entry 0 ;# Number of leaf entries + set cnt_int_entry 0 ;# Number of interor entries + set mx_payload 0 ;# Maximum payload size + set ovfl_pages 0 ;# Number of overflow pages used + set leaf_pages 0 ;# Number of leaf pages + set int_pages 0 ;# Number of interior pages + while {$go==0} { + incr cnt_leaf_entry + set stat [btree_cursor_info $cursor] + set payload [lindex $stat 6] + if {$payload>$mx_payload} {set mx_payload $payload} + incr total_payload $payload + set local [lindex $stat 8] + set ovfl [expr {$payload-$local}] + if {$ovfl} { + incr cnt_ovfl + incr total_ovfl $ovfl + set n [expr {int(ceil($ovfl/($pageSize-4.0)))}] + incr ovfl_pages $n + incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}] + } + set pgno [lindex $stat 0] + if {![info exists seen($pgno)]} { + set seen($pgno) 1 + incr leaf_pages + incr unused_leaf [lindex $stat 4] + set parent [lindex $stat 9] + set up 0 + while {$parent!=0 && ![info exists seen($parent)]} { + incr up + set stat [btree_cursor_info $cursor $up] + set seen($parent) 1 + incr int_pages + incr cnt_int_entry [lindex $stat 2] + incr unused_int [lindex $stat 4] + set parent [lindex $stat 9] + } + } + set go [btree_next $cursor] + } + btree_close_cursor $cursor + if {[llength [array names seen]]==0} { + set leaf_pages 1 + set unused_leaf [expr {$pageSize-8}] + } elseif {$rootpage==1 && ![info exists seen(1)]} { + incr int_pages + incr unused_int [expr {$pageSize-112}] + } + set sql "INSERT INTO space_used VALUES(" + append sql [quote $name] + append sql ",[quote $name]" + append sql ",0" + append sql ",[expr {$cnt_leaf_entry+$cnt_int_entry}]" + append sql ",$cnt_leaf_entry" + append sql ",$total_payload" + append sql ",$total_ovfl" + append sql ",$cnt_ovfl" + append sql ",$mx_payload" + append sql ",$int_pages" + append sql ",$leaf_pages" + append sql ",$ovfl_pages" + append sql ",$unused_int" + append sql ",$unused_leaf" + append sql ",$unused_ovfl" + append sql ); + mem eval $sql +} + +# This query will be used to find the root page number for every index +# in the database. +# +set sql { + SELECT name, tbl_name, rootpage + FROM sqlite_master WHERE type='index' + ORDER BY 2, 1 +} + +# Analyze every index in the database, one at a time. +# +set pageSize [db eval {PRAGMA page_size}] +foreach {name tbl_name rootpage} [db eval $sql] { + puts stderr "Analyzing index $name of table $tbl_name..." + set cursor [btree_cursor $DB $rootpage 0] + set go [btree_first $cursor] + catch {unset seen} + set total_payload 0 ;# Payload space used by all entries + set total_ovfl 0 ;# Payload space on overflow pages + set unused_leaf 0 ;# Unused space on leaf nodes + set unused_ovfl 0 ;# Unused space on overflow pages + set cnt_ovfl 0 ;# Number of entries that use overflows + set cnt_leaf_entry 0 ;# Number of leaf entries + set mx_payload 0 ;# Maximum payload size + set ovfl_pages 0 ;# Number of overflow pages used + set leaf_pages 0 ;# Number of leaf pages + while {$go==0} { + incr cnt_leaf_entry + set stat [btree_cursor_info $cursor] + set payload [btree_keysize $cursor] + if {$payload>$mx_payload} {set mx_payload $payload} + incr total_payload $payload + set local [lindex $stat 8] + set ovfl [expr {$payload-$local}] + if {$ovfl} { + incr cnt_ovfl + incr total_ovfl $ovfl + set n [expr {int(ceil($ovfl/($pageSize-4.0)))}] + incr ovfl_pages $n + incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}] + } + set pgno [lindex $stat 0] + if {![info exists seen($pgno)]} { + set seen($pgno) 1 + incr leaf_pages + incr unused_leaf [lindex $stat 4] + } + set go [btree_next $cursor] + } + btree_close_cursor $cursor + if {[llength [array names seen]]==0} { + set leaf_pages 1 + set unused_leaf [expr {$pageSize-8}] + } + set sql "INSERT INTO space_used VALUES(" + append sql [quote $name] + append sql ",[quote $tbl_name]" + append sql ",1" + append sql ",$cnt_leaf_entry" + append sql ",$cnt_leaf_entry" + append sql ",$total_payload" + append sql ",$total_ovfl" + append sql ",$cnt_ovfl" + append sql ",$mx_payload" + append sql ",0" + append sql ",$leaf_pages" + append sql ",$ovfl_pages" + append sql ",0" + append sql ",$unused_leaf" + append sql ",$unused_ovfl" + append sql ); + mem eval $sql +} + +# Generate a single line of output in the statistics section of the +# report. +# +proc statline {title value {extra {}}} { + set len [string length $title] + set dots [string range {......................................} $len end] + set len [string length $value] + set sp2 [string range { } $len end] + if {$extra ne ""} { + set extra " $extra" + } + puts "$title$dots $value$sp2$extra" +} + +# Generate a formatted percentage value for $num/$denom +# +proc percent {num denom {of {}}} { + if {$denom==0.0} {return ""} + set v [expr {$num*100.0/$denom}] + set of {} + if {$v==1.0 || $v==0.0 || ($v>1.0 && $v<99.0)} { + return [format {%5.1f%% %s} $v $of] + } elseif {$v<0.1 || $v>99.9} { + return [format {%7.3f%% %s} $v $of] + } else { + return [format {%6.2f%% %s} $v $of] + } +} + +# Generate a subreport that covers some subset of the database. +# the $where clause determines which subset to analyze. +# +proc subreport {title where} { + global pageSize + set hit 0 + mem eval " + SELECT + sum(nentry) AS nentry, + sum(leaf_entries) AS nleaf, + sum(payload) AS payload, + sum(ovfl_payload) AS ovfl_payload, + max(mx_payload) AS mx_payload, + sum(ovfl_cnt) as ovfl_cnt, + sum(leaf_pages) AS leaf_pages, + sum(int_pages) AS int_pages, + sum(ovfl_pages) AS ovfl_pages, + sum(leaf_unused) AS leaf_unused, + sum(int_unused) AS int_unused, + sum(ovfl_unused) AS ovfl_unused + FROM space_used WHERE $where" {} {set hit 1} + if {!$hit} {return 0} + puts "" + set len [string length $title] + incr len 5 + set stars "***********************************" + append stars $stars + set stars [string range $stars $len end] + puts "*** $title $stars" + puts "" + set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] + statline "Percentage of total database" [percent $total_pages $::file_pgcnt] + statline "Number of entries" $nleaf + set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}] + set storage [expr {$total_pages*$pageSize}] + statline "Bytes of storage consumed" $storage + statline "Bytes of payload" $payload \ + [percent $payload $storage {of storage consumed}] + statline "Average payload per entry" [expr {$nleaf>0?$payload/$nleaf:0}] + set avgunused [expr {$nleaf>0?$total_unused/$nleaf:0}] + statline "Average unused bytes per entry" $avgunused + set nint [expr {$nentry-$nleaf}] + if {$int_pages>0} { + statline "Average fanout" [format %.2f [expr {($nint+0.0)/$int_pages}]] + } + statline "Maximum payload per entry" $mx_payload + statline "Entries that use overflow" $ovfl_cnt \ + [percent $ovfl_cnt $nleaf {of all entries}] + if {$int_pages>0} { + statline "Index pages used" $int_pages + } + statline "Primary pages used" $leaf_pages + statline "Overflow pages used" $ovfl_pages + statline "Total pages used" $total_pages + if {$int_unused>0} { + statline "Unused bytes on index pages" $int_unused \ + [percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] + } + statline "Unused bytes on primary pages" $leaf_unused \ + [percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] + statline "Unused bytes on overflow pages" $ovfl_unused \ + [percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] + statline "Unused bytes on all pages" $total_unused \ + [percent $total_unused $storage {of all space}] + return 1 +} + +# Output summary statistics: +# +puts "/** Disk-Space Utilization Report For $file_to_analyze" +puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]" +puts "" +statline {Page size in bytes} $pageSize +set fsize [file size $file_to_analyze] +set file_pgcnt [expr {$fsize/$pageSize}] +set usedcnt [mem eval \ + {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}] +set freecnt [expr {$file_pgcnt-$usedcnt}] +set freecnt2 [lindex [btree_get_meta $DB] 0] +statline {Pages in the whole file (measured)} $file_pgcnt +set file_pgcnt2 [expr {$usedcnt+$freecnt2}] +statline {Pages in the whole file (calculated)} $file_pgcnt2 +statline {Pages that store data} $usedcnt [percent $usedcnt $file_pgcnt] +statline {Pages on the freelist (per header)}\ + $freecnt2 [percent $freecnt2 $file_pgcnt] +statline {Pages on the freelist (calculated)}\ + $freecnt [percent $freecnt $file_pgcnt] + +set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}] +statline {Number of tables in the database} $ntable +set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}] +set autoindex [db eval {SELECT count(*) FROM sqlite_master + WHERE type='index' AND name LIKE '(% autoindex %)'}] +set manindex [expr {$nindex-$autoindex}] +statline {Number of indices} $nindex +statline {Number of named indices} $manindex +statline {Automatically generated indices} $autoindex +set total_payload [mem eval "SELECT sum(payload) FROM space_used"] +statline "Size of the file in bytes" $fsize +set user_payload [mem one {SELECT sum(payload) FROM space_used + WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}] +statline "Bytes of user payload stored" $user_payload \ + [percent $user_payload $fsize] + +# Output table rankings +# +puts "" +puts "*** Page counts for all tables with their indices ********************" +puts "" +mem eval {SELECT tblname, count(*) AS cnt, + sum(int_pages+leaf_pages+ovfl_pages) AS size + FROM space_used GROUP BY tblname ORDER BY size DESC, tblname} {} { + statline [string toupper $tblname] $size [percent $size $file_pgcnt] +} + +# Output subreports +# +if {$nindex>0} { + subreport {All tables and indices} 1 +} +subreport {All tables} {NOT is_index} +if {$nindex>0} { + subreport {All indices} {is_index} +} +foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index + ORDER BY name}] { + regsub ' $tbl '' qn + set name [string toupper $tbl] + set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"] + if {$n>1} { + subreport "Table $name and all its indices" "tblname='$qn'" + subreport "Table $name w/o any indices" "name='$qn'" + subreport "Indices of table $name" "tblname='$qn' AND is_index" + } else { + subreport "Table $name" "name='$qn'" + } +} + +# Output instructions on what the numbers above mean. +# +puts { +*** Definitions ****************************************************** + +Page size in bytes + + The number of bytes in a single page of the database file. + Usually 1024. + +Number of pages in the whole file +} +puts \ +" The number of $pageSize-byte pages that go into forming the complete + database" +puts \ +{ +Pages that store data + + The number of pages that store data, either as primary B*Tree pages or + as overflow pages. The number at the right is the data pages divided by + the total number of pages in the file. + +Pages on the freelist + + The number of pages that are not currently in use but are reserved for + future use. The percentage at the right is the number of freelist pages + divided by the total number of pages in the file. + +Number of tables in the database + + The number of tables in the database, including the SQLITE_MASTER table + used to store schema information. + +Number of indices + + The total number of indices in the database. + +Number of named indices + + The number of indices created using an explicit CREATE INDEX statement. + +Automatically generated indices + + The number of indices used to implement PRIMARY KEY or UNIQUE constraints + on tables. + +Size of the file in bytes + + The total amount of disk space used by the entire database files. + +Bytes of user payload stored + + The total number of bytes of user payload stored in the database. The + schema information in the SQLITE_MASTER table is not counted when + computing this number. The percentage at the right shows the payload + divided by the total file size. + +Percentage of total database + + The amount of the complete database file that is devoted to storing + information described by this category. + +Number of entries + + The total number of B-Tree key/value pairs stored under this category. + +Bytes of storage consumed + + The total amount of disk space required to store all B-Tree entries + under this category. The is the total number of pages used times + the pages size. + +Bytes of payload + + The amount of payload stored under this category. Payload is the data + part of table entries and the key part of index entries. The percentage + at the right is the bytes of payload divided by the bytes of storage + consumed. + +Average payload per entry + + The average amount of payload on each entry. This is just the bytes of + payload divided by the number of entries. + +Average unused bytes per entry + + The average amount of free space remaining on all pages under this + category on a per-entry basis. This is the number of unused bytes on + all pages divided by the number of entries. + +Maximum payload per entry + + The largest payload size of any entry. + +Entries that use overflow + + The number of entries that user one or more overflow pages. + +Total pages used + + This is the number of pages used to hold all information in the current + category. This is the sum of index, primary, and overflow pages. + +Index pages used + + This is the number of pages in a table B-tree that hold only key (rowid) + information and no data. + +Primary pages used + + This is the number of B-tree pages that hold both key and data. + +Overflow pages used + + The total number of overflow pages used for this category. + +Unused bytes on index pages + + The total number of bytes of unused space on all index pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on index pages. + +Unused bytes on primary pages + + The total number of bytes of unused space on all primary pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on primary pages. + +Unused bytes on overflow pages + + The total number of bytes of unused space on all overflow pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on overflow pages. + +Unused bytes on all pages + + The total number of bytes of unused space on all primary and overflow + pages. The percentage at the right is the number of unused bytes + divided by the total number of bytes. +} + +# Output the database +# +puts "**********************************************************************" +puts "The entire text of this report can be sourced into any SQL database" +puts "engine for further analysis. All of the text above is an SQL comment." +puts "The data used to generate this report follows:" +puts "*/" +puts "BEGIN;" +puts $tabledef +unset -nocomplain x +mem eval {SELECT * FROM space_used} x { + puts -nonewline "INSERT INTO space_used VALUES" + set sep ( + foreach col $x(*) { + set v $x($col) + if {$v=="" || ![string is double $v]} {set v [quote $v]} + puts -nonewline $sep$v + set sep , + } + puts ");" +} +puts "COMMIT;"