From 7b39686749fa6b5e56b13830afc45b4d6194146e Mon Sep 17 00:00:00 2001 From: drh Date: Wed, 1 Jan 2003 23:06:20 +0000 Subject: [PATCH] If compiled with the -DVDBE_PROFILE=1 option, special code is inserted that uses the pentium RDTSC instruction to compute very precise runtimes on all VDBE opcodes. (This only works on i586 processors, of course.) The results are written into the vdbe_profile.out file for analysis. Hopefully, this new feature will reveal hot spots that can be optimized to make the VDBE run faster. (CVS 807) FossilOrigin-Name: a1c071ea18766932c90275c704e078134c67be68 --- manifest | 14 +++++----- manifest.uuid | 2 +- src/vdbe.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++----- src/vdbe.h | 6 ++++- 4 files changed, 79 insertions(+), 15 deletions(-) diff --git a/manifest b/manifest index ddc27008b1..1ada763034 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Version\s2.7.5\s(CVS\s806) -D 2002-12-28T01:26:07 +C If\scompiled\swith\sthe\s-DVDBE_PROFILE=1\soption,\sspecial\scode\sis\sinserted\sthat\nuses\sthe\spentium\sRDTSC\sinstruction\sto\scompute\svery\sprecise\sruntimes\son\sall\nVDBE\sopcodes.\s\s(This\sonly\sworks\son\si586\sprocessors,\sof\scourse.)\s\sThe\sresults\nare\swritten\sinto\sthe\svdbe_profile.out\sfile\sfor\sanalysis.\sHopefully,\sthis\nnew\sfeature\swill\sreveal\shot\sspots\sthat\scan\sbe\soptimized\sto\smake\sthe\sVDBE\srun\nfaster.\s(CVS\s807) +D 2003-01-01T23:06:21 F Makefile.in 868c17a1ae1c07603d491274cc8f86c04acf2a1e F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -52,8 +52,8 @@ F src/tokenize.c 75e3bb37305b64e118e709752066f494c4f93c30 F src/trigger.c 5ba917fc226b96065108da28186c2efaec53e481 F src/update.c 881e4c8e7c786545da4fd2d95da19252b2e31137 F src/util.c 8f19c71e45d1a5a3ff2e9a3eef8f36296d87ea43 -F src/vdbe.c aa6165ae4f2303795e4c5531293576c541363e40 -F src/vdbe.h b7584044223104ba7896a7f87b66daebdd6022ba +F src/vdbe.c 0baebb3f0c624e59bf6710cd9fc15db01c7fc31e +F src/vdbe.h 754eba497cfe0c3e352b9c101ab2f811f10d0a55 F src/where.c af235636b7bc7f7f42ee1c7162d1958ad0102cab F test/all.test 873d30e25a41b3aa48fec5633a7ec1816e107029 F test/bigfile.test 1cd8256d4619c39bea48147d344f348823e78678 @@ -152,7 +152,7 @@ F www/speed.tcl a20a792738475b68756ea7a19321600f23d1d803 F www/sqlite.tcl ae3dcfb077e53833b59d4fcc94d8a12c50a44098 F www/tclsqlite.tcl 1db15abeb446aad0caf0b95b8b9579720e4ea331 F www/vdbe.tcl 2013852c27a02a091d39a766bc87cff329f21218 -P b0864cc9c89a3c2350ac46634ef3c420f97d11c0 -R c140ba28091bbfab511edfabbd2b5376 +P ee95eefe12c48f28412461125c231122e0d4277a +R c487959d68c24b3808f56b254c4d4b49 U drh -Z 438f9127f8c98ed0533abf3177ee5b63 +Z 2e2dd169d3dae32e223fb4d52d6a727e diff --git a/manifest.uuid b/manifest.uuid index cd74d84610..f7c88b97af 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ee95eefe12c48f28412461125c231122e0d4277a \ No newline at end of file +a1c071ea18766932c90275c704e078134c67be68 \ No newline at end of file diff --git a/src/vdbe.c b/src/vdbe.c index 58a5374767..1da38c96cd 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -36,7 +36,7 @@ ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. ** -** $Id: vdbe.c,v 1.187 2002/12/04 22:29:29 drh Exp $ +** $Id: vdbe.c,v 1.188 2003/01/01 23:06:21 drh Exp $ */ #include "sqliteInt.h" #include @@ -1299,7 +1299,7 @@ static char *vdbe_fgets(char *zBuf, int nBuf, FILE *in){ return i>0 ? zBuf : 0; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(VDBE_PROFILE) /* ** Print a single opcode. This routine is used for debugging only. */ @@ -1338,6 +1338,22 @@ static int expandCursorArraySize(Vdbe *p, int mxCursor){ return 0; } +#ifdef VDBE_PROFILE +/* +** The following routine only works on pentium-class processors. +** It uses the RDTSC opcode to read cycle count value out of the +** processor and returns that value. This can be used for high-res +** profiling. +*/ +__inline__ unsigned long long int hwtime(void){ + unsigned long long int x; + __asm__("rdtsc\n\t" + "mov %%edx, %%ecx\n\t" + :"=A" (x)); + return x; +} +#endif + /* ** Execute the program in the VDBE. ** @@ -1386,6 +1402,9 @@ int sqliteVdbeExec( char zBuf[100]; /* Space to sprintf() an integer */ int returnStack[100]; /* Return address stack for OP_Gosub & OP_Return */ int returnDepth = 0; /* Next unused element in returnStack[] */ +#ifdef VDBE_PROFILE + unsigned long long start; +#endif /* No instruction ever pushes more than a single element onto the @@ -1399,6 +1418,15 @@ int sqliteVdbeExec( zStack = p->zStack; aStack = p->aStack; p->tos = -1; +#ifdef VDBE_PROFILE + { + int i; + for(i=0; inOp; i++){ + p->aOp[i].cnt = 0; + p->aOp[i].cycles = 0; + } + } +#endif /* Initialize the aggregrate hash table. */ @@ -1414,6 +1442,9 @@ int sqliteVdbeExec( if( sqlite_malloc_failed ) goto no_mem; for(pc=0; !sqlite_malloc_failed && rc==SQLITE_OK && pcnOp VERIFY(&& pc>=0); pc++){ +#ifdef VDBE_PROFILE + start = hwtime(); +#endif pOp = &p->aOp[pc]; /* Interrupt processing if requested. @@ -4065,22 +4096,24 @@ case OP_Rewind: { case OP_Prev: case OP_Next: { int i = pOp->p1; + Cursor *pC; BtCursor *pCrsr; - if( VERIFY( i>=0 && inCursor && ) (pCrsr = p->aCsr[i].pCursor)!=0 ){ + if( VERIFY( i>=0 && inCursor && ) + (pCrsr = (pC = &p->aCsr[i])->pCursor)!=0 ){ int res; - if( p->aCsr[i].nullRow ){ + if( pC->nullRow ){ res = 1; }else{ rc = pOp->opcode==OP_Next ? sqliteBtreeNext(pCrsr, &res) : sqliteBtreePrevious(pCrsr, &res); - p->aCsr[i].nullRow = res; + pC->nullRow = res; } if( res==0 ){ pc = pOp->p2 - 1; sqlite_search_count++; } - p->aCsr[i].recnoIsValid = 0; + pC->recnoIsValid = 0; } break; } @@ -5321,6 +5354,11 @@ default: { *****************************************************************************/ } +#ifdef VDBE_PROFILE + pOp->cycles += hwtime() - start; + pOp->cnt++; +#endif + /* The following code adds nothing to the actual functionality ** of the program. It is only here for testing and debugging. ** On the other hand, it does burn CPU cycles every time through @@ -5417,6 +5455,28 @@ cleanup: sqliteBtreeCommitCkpt(pBt); if( db->pBeTemp ) sqliteBtreeCommitCkpt(db->pBeTemp); assert( p->tosnOp; i++){ + fprintf(out, "%02x", p->aOp[i].opcode); + } + fprintf(out, "\n"); + for(i=0; inOp; i++){ + fprintf(out, "%6d %10lld %8lld ", + p->aOp[i].cnt, + p->aOp[i].cycles, + p->aOp[i].cnt>0 ? p->aOp[i].cycles/p->aOp[i].cnt : 0 + ); + vdbePrintOp(out, i, &p->aOp[i]); + } + fclose(out); + } + } +#endif return rc; /* Jump to here if a malloc() fails. It's hard to get a malloc() diff --git a/src/vdbe.h b/src/vdbe.h index 53d6a1cdba..6986463b74 100644 --- a/src/vdbe.h +++ b/src/vdbe.h @@ -15,7 +15,7 @@ ** or VDBE. The VDBE implements an abstract machine that runs a ** simple program to access and modify the underlying database. ** -** $Id: vdbe.h,v 1.60 2002/09/08 00:04:53 drh Exp $ +** $Id: vdbe.h,v 1.61 2003/01/01 23:06:21 drh Exp $ */ #ifndef _SQLITE_VDBE_H_ #define _SQLITE_VDBE_H_ @@ -39,6 +39,10 @@ struct VdbeOp { int p2; /* Second parameter (often the jump destination) */ char *p3; /* Third parameter */ int p3type; /* P3_STATIC, P3_DYNAMIC or P3_POINTER */ +#ifdef VDBE_PROFILE + int cnt; /* Number of times this instruction was executed */ + long long cycles; /* Total time spend executing this instruction */ +#endif }; typedef struct VdbeOp VdbeOp;