From 5527f8ff4aa55969d4d872b38de0ed51a0e73c90 Mon Sep 17 00:00:00 2001
From: drh <drh@noemail.net>
Date: Thu, 13 Feb 2003 02:54:03 +0000
Subject: [PATCH] Update the documentation for the new journal format to be
 introduced in version 2.8.0. (CVS 865)

FossilOrigin-Name: e05a7a552f694158ee449d8682f5c137f1c2f2ac
---
 manifest           | 18 +++++------
 manifest.uuid      |  2 +-
 www/changes.tcl    |  6 ++++
 www/fileformat.tcl | 80 +++++++++++++++++++++++++++++++++++-----------
 www/formatchng.tcl | 20 +++++++++++-
 www/lang.tcl       | 40 +++++++++++++----------
 6 files changed, 120 insertions(+), 46 deletions(-)
diff --git a/manifest b/manifest
index b0bfb240f7..e9508f66f8 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Fix\sa\sbug\sin\sthe\srollback\slogic\sfor\sthe\snew\sjournal\sformat.\s(CVS\s864)
-D 2003-02-13T01:58:21
+C Update\sthe\sdocumentation\sfor\sthe\snew\sjournal\sformat\sto\sbe\sintroduced\sin\nversion\s2.8.0.\s(CVS\s865)
+D 2003-02-13T02:54:03
 F Makefile.in 6606854b1512f185b8e8c779b8d7fc2750463d64
 F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906
 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
@@ -135,17 +135,17 @@ F www/arch.png 82ef36db1143828a7abc88b1e308a5f55d4336f4
 F www/arch.tcl 679a0c48817f71bc91d5911ef386e5ef35d4f178
 F www/audit.tcl 90e09d580f79c7efec0c7d6f447b7ec5c2dce5c0
 F www/c_interface.tcl bca0aea880d043ed1bc0ad3bb39e24043f88b5bf
-F www/changes.tcl b48068eabfd0ff603d90f75b01bed295f23198e2
+F www/changes.tcl 546f966c8df2c872d1117da0c6e5c8304b0e9577
 F www/conflict.tcl 81dd21f9a679e60aae049e9dd8ab53d59570cda2
 F www/crosscompile.tcl 3622ebbe518927a3854a12de51344673eb2dd060
 F www/datatypes.tcl 0cb28565580554fa7e03e8fcb303e87ce57757ae
 F www/download.tcl 0932d7f4f0e8b2adbbd22fac73132f86e43ab4a9
 F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c
 F www/faq.tcl 06276ff6c3e369374bb83034cc9d4a7d3a2a34a1
-F www/fileformat.tcl 5e3009b1451364602916da986501b94d8516bbb4
-F www/formatchng.tcl b4449e065d2da38b6563bdf12cf46cfe1d4d765e
+F www/fileformat.tcl d9b586416c0d099b82e02e469d532c9372f98f3f
+F www/formatchng.tcl cbaf0f410096c71f86a7537cf9249fa04b9a659c
 F www/index.tcl b5265ca54a5124ec40bffb7c7943e072e074d61a
-F www/lang.tcl 7ad51d873059368a98bcc2afec60d6ba4bb5688a
+F www/lang.tcl 9bd9380dceba83d11fe268e0142c05ee06c757db
 F www/mingw.tcl f1c7c0a7f53387dd9bb4f8c7e8571b7561510ebc
 F www/nulls.tcl 29497dac2bc5b437aa7e2e94577dad4d8933ed26
 F www/omitted.tcl 118062f40a203fcb88b8d68ef1d7c0073ac191ec
@@ -155,7 +155,7 @@ F www/speed.tcl 4d463e2aea41f688ed320a937f93ff885be918c3
 F www/sqlite.tcl ae3dcfb077e53833b59d4fcc94d8a12c50a44098
 F www/tclsqlite.tcl 1db15abeb446aad0caf0b95b8b9579720e4ea331
 F www/vdbe.tcl 2013852c27a02a091d39a766bc87cff329f21218
-P 8968bc063607856775ad63b6594d40c55cf288c0
-R 5afba469118d5ff096906c926faadaaa
+P 7c22aa3f817e737cfd943d903856756468e8678b
+R 496fbf0dd37f4b8503d955d31fa52734
 U drh
-Z dea82f51b0a81ba078f1ce14af670302
+Z 1eae5b92d00b5a748205e87d73d3dd5a
diff --git a/manifest.uuid b/manifest.uuid
index ec013a9d2c..bece6852f8 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-7c22aa3f817e737cfd943d903856756468e8678b
\ No newline at end of file
+e05a7a552f694158ee449d8682f5c137f1c2f2ac
\ No newline at end of file
diff --git a/www/changes.tcl b/www/changes.tcl
index 5645542530..69182d395a 100644
--- a/www/changes.tcl
+++ b/www/changes.tcl
@@ -25,6 +25,12 @@ proc chng {date desc} {
   puts "<DD><P><UL>$desc</UL></P></DD>"
 }
 
+chng {2003 Feb 14 (2.8.0)} {
+<li>Modified the journal file format to make it more resistant to corruption
+    that can occur after an OS crash or power failure.</li>
+<li>Added a new C/C++ API that does not use callback for returning data.</li>
+}
+
 chng {2003 Jan 25 (2.7.6)} {
 <li>Performance improvements.  The library is now much faster.</li>
 <li>Added the <b>sqlite_set_authorizer()</b> API.  Formal documentation has
diff --git a/www/fileformat.tcl b/www/fileformat.tcl
index 077a128e37..8b91d6941b 100644
--- a/www/fileformat.tcl
+++ b/www/fileformat.tcl
@@ -1,7 +1,7 @@
 #
 # Run this script to generated a fileformat.html output file
 #
-set rcsid {$Id: fileformat.tcl,v 1.7 2003/02/12 14:09:45 drh Exp $}
+set rcsid {$Id: fileformat.tcl,v 1.8 2003/02/13 02:54:04 drh Exp $}
 
 puts {<html>
 <head>
@@ -72,22 +72,37 @@ the journal file.
 
 <p>
 A journal file begins with 8 bytes as follows:
-0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, and 0xd5.
+0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, and 0xd6.
 Processes that are attempting to rollback a journal use these 8 bytes
 as a sanity check to make sure the file they think is a journal really
-is a valid journal.  There is no significance to the choice of
-bytes here - the values were obtained from /dev/random. 
+is a valid journal.  Prior version of SQLite used different journal
+file formats.  The magic numbers for these prior formats is differ
+so that if a new version of the library attempts to rollback a journal
+created by an earlier version, it can detect that the journal uses
+an obsolete format and make the necessary adjustments.  This article
+describes only the newest journal format - supported as of version
+2.8.0.
 </p>
 
 <p>
-Following the 8 byte prefix is a single 4-byte integer that is the
+Following the 8 byte prefix is a three 4-byte integers that tell us
+the number of pages that have been committed to the journal,
+a magic number used for
+sanity checking each page, and the
 original size of the main database file before the transaction was
-started.  The main database file is truncated back to this size
-as part of the rollback process.
-The size is expressed in pages (1024 bytes per page) and is
-a big-endian number.  That means that the most significant byte
-occurs first.  All multi-byte integers in the journal file are
-written as big-endian numbers.  That way, a journal file that is
+started.  The number of committed pages is used to limit how far
+into the journal to read.  The use of the checksum magic number is
+described below.
+The original size of the database is used to restore the database
+file back to its original size.
+The size is expressed in pages (1024 bytes per page).
+</p>
+
+<p>
+All three integers in the journal header and all other multi-byte
+numbers used in the journal file are big-endian.
+That means that the most significant byte
+occurs first.  That way, a journal file that is
 originally created on one machine can be rolled back by another
 machine that uses a different byte order.  So, for example, a
 transaction that failed to complete on your big-endian SparcStation
@@ -95,10 +110,11 @@ can still be rolled back on your little-endian Linux box.
 </p>
 
 <p>
-After the 8-byte prefix and the 4-byte initial database size, the
+After the 8-byte prefix and the three 4-byte integers, the
 journal file consists of zero or more page records.  Each page
 record is a 4-byte (big-endian) page number followed by 1024 bytes
-of data.  The data is the original content of the database page
+of data and a 4-byte checksum.  
+The data is the original content of the database page
 before the transaction was started.  So to roll back the transaction,
 the data is simply written into the corresponding page of the
 main database file.  Pages can appear in the journal in any order,
@@ -107,17 +123,37 @@ between 1 and the maximum specified by the page size integer that
 appeared at the beginning of the journal.
 </p>
 
+<p>
+The so-called checksum at the end of each record is not really a
+checksum - it is the sum of the page number and the magic number which
+was the second integer in the journal header.  The purpose of this
+value is to try to detect journal corruption that might have occurred
+because of a power loss or OS crash that occurred which the journal
+file was being written to disk.  It could have been the case that the
+meta-data for the journal file, specifically the size of the file, had
+been written to the disk so that when the machine reboots it appears that
+file is large enough to hold the current record.  But even though the
+file size has changed, the data for the file might not have made it to
+the disk surface at the time of the OS crash or power loss.  This means
+that after reboot, the end of the journal file will contain quasi-random
+garbage data.  The checksum is an attempt to detect such corruption.  If
+the checksum does not match, that page of the journal is not rolled back.
+</p>
+
 <p>
 Here is a summary of the journal file format:
 </p>
 
 <ul>
-<li>8 byte prefix: 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, x0d5</li>
-<li>4 byte initial database page count, big-endian.</li>
+<li>8 byte prefix: 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6</li>
+<li>4 byte number of records in journal</li>
+<li>4 byte magic number used for page checksums</li>
+<li>4 byte initial database page count</li>
 <li>Zero or more instances of the following:
    <ul>
-   <li>4 byte page number - big-endian</li>
+   <li>4 byte page number</li>
    <li>1024 bytes of original data for the page</li>
+   <li>4 byte checksum</li>
    </ul>
 </li>
 </ul>
@@ -235,8 +271,8 @@ Here is a summary of the information contained on page 1 in the b-tree layer:
 <li>4 byte integer used to determine the byte-order</li>
 <li>4 byte integer which is the first page of the freelist</li>
 <li>4 byte integer which is the number of pages on the freelist</li>
-<li>16 bytes of meta-data arranged as four 4-byte integers</li>
-<li>948 bytes of unused space</li>
+<li>36 bytes of meta-data arranged as nine 4-byte integers</li>
+<li>928 bytes of unused space</li>
 </ul>
 
 <h3>3.2 &nbsp; Structure Of A Single B-Tree Page</h3>
@@ -741,7 +777,13 @@ disabled.
 </p>
 
 <p>
-The fourth meta-value is currently unused.
+The fourth meta-value is safety level added in version 2.8.0.
+A value of 1 corresponds to a SYNCHRONOUS setting of OFF.  In other
+words, SQLite does not pause to wait for journal data to reach the disk
+surface before overwriting pages of the database.  A value of 2 corresponds
+to a SYNCHRONOUS setting of NORMAL.  A value of 3 corresponds to a
+SYNCHRONOUS setting of FULL. If the value is 0, that means it has not
+been initialized so the default synchronous setting of NORMAL is used.
 </p>
 
 }
diff --git a/www/formatchng.tcl b/www/formatchng.tcl
index 9456c1b981..688ca42762 100644
--- a/www/formatchng.tcl
+++ b/www/formatchng.tcl
@@ -1,7 +1,7 @@
 #
 # Run this Tcl script to generate the formatchng.html file.
 #
-set rcsid {$Id: formatchng.tcl,v 1.7 2002/08/13 23:02:59 drh Exp $ }
+set rcsid {$Id: formatchng.tcl,v 1.8 2003/02/13 02:54:04 drh Exp $ }
 
 puts {<html>
 <head>
@@ -157,6 +157,24 @@ occurred since version 1.0.0:
   and later of SQLite will read earlier database version.</p>
   </td>
 </tr>
+<tr>
+  <td valign="top">2.7.6 to 2.8.0</td>
+  <td valign="top">2003-Feb-14</td>
+  <td><p>Version 2.8.0 introduces a change to the format of the rollback
+  journal file.  The main database file format is unchanged.  Versions
+  2.7.6 and earlier can read and write 2.8.0 databases and vice versa.
+  Version 2.8.0 can rollback a transation that was started by version
+  2.7.6 and earlier.  But version 2.7.6 and earlier cannot rollback a
+  transaction started by version 2.8.0 or later.</p>
+
+  <p>The only time this would ever be an issue is when you have a program
+  using version 2.8.0 or later that crashes with an incomplete
+  transaction, then you try to examine the database using version 2.7.6 or
+  earlier.  The 2.7.6 code will not be able to read the journal file
+  and thus will not be able to rollback the incomplete transaction
+  to restore the database.</p>
+  </td>
+</tr>
 </table>
 </blockquote>
 
diff --git a/www/lang.tcl b/www/lang.tcl
index 113c42dc25..e37d366800 100644
--- a/www/lang.tcl
+++ b/www/lang.tcl
@@ -1,7 +1,7 @@
 #
 # Run this Tcl script to generate the sqlite.html file.
 #
-set rcsid {$Id: lang.tcl,v 1.49 2003/01/29 22:58:27 drh Exp $}
+set rcsid {$Id: lang.tcl,v 1.50 2003/02/13 02:54:04 drh Exp $}
 
 puts {<html>
 <head>
@@ -1080,23 +1080,30 @@ with caution.</p>
     everytime you reopen the database.</p></li>
 
 <li><p><b>PRAGMA default_synchronous;
-       <br>PRAGMA default_synchronous = ON;
+       <br>PRAGMA default_synchronous = FULL;
+       <br>PRAGMA default_synchronous = NORMAL;
        <br>PRAGMA default_synchronous = OFF;</b></p>
     <p>Query or change the setting of the "synchronous" flag in
-    the database.  When synchronous is on (the default), the SQLite database
-    engine will pause at critical moments to make sure that data has actually
-    be written to the disk surface.  (In other words, it invokes the
-    equivalent of the <b>fsync()</b> system call.)  In synchronous mode,
-    an SQLite database should be fully recoverable even if the operating
-    system crashes or power is interrupted unexpectedly.  The penalty for
-    this assurance is that some database operations take longer because the
-    engine has to wait on the (relatively slow) disk drive.  The alternative
-    is to turn synchronous off.  With synchronous off, SQLite continues
-    processing as soon as it has handed data off to the operating system.
+    the database.  When synchronous is FULL, the SQLite database engine will
+    pause at critical moments to make sure that data has actually been 
+    written to the disk surface before continuing.  This ensures that if
+    the operating system crashes or if there is a power failure, the database
+    will be uncorrupted after rebooting.  FULL synchronous is very 
+    safe, but it is also slow.  
+    When synchronous is NORMAL (the default), the SQLite database
+    engine will still pause at the most critical moments, but less often
+    than in FULL mode.  There is a very small (though non-zero) chance that
+    a power failure at just the wrong time could corrupt the database in
+    NORMAL mode.  But in practice, you are more likely to suffer
+    a catastrophic disk failure or some other unrecoverable hardware
+    fault.  So NORMAL is the default mode.
+    With synchronous OFF, SQLite continues without pausing
+    as soon as it has handed data off to the operating system.
     If the application running SQLite crashes, the data will be safe, but
-    the database could (in theory) become corrupted if the operating system
-    crashes or the computer suddenly loses power.  On the other hand, some
-    operations are as much as 50 or more times faster with synchronous off.
+    the database might become corrupted if the operating system
+    crashes or the computer loses power before that data has been written
+    to the disk surface.  On the other hand, some
+    operations are as much as 50 or more times faster with synchronous OFF.
     </p>
     <p>This pragma changes the synchronous mode persistently.  Once changed,
     the mode stays as set even if the database is closed and reopened.  The
@@ -1179,7 +1186,8 @@ with caution.</p>
     </td></table></blockquote></li>
 
 <li><p><b>PRAGMA synchronous;
-       <br>PRAGMA synchronous = ON;
+       <br>PRAGMA synchronous = FULL;
+       <br>PRAGMA synchronous = NORMAL;
        <br>PRAGMA synchronous = OFF;</b></p>
     <p>Query or change the setting of the "synchronous" flag in
     the database for the duration of the current database connect.