mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Add the fossildelta.c extension in ext/misc with implementations of the Fossil
delta functions. FossilOrigin-Name: b80cafa6f8a5c6ff1dc9efd2f670777ab131ace2df1eb431cedc8cfa901baf18
This commit is contained in:
791
ext/misc/fossildelta.c
Normal file
791
ext/misc/fossildelta.c
Normal file
@ -0,0 +1,791 @@
|
||||
/*
|
||||
** 2019-02-19
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This SQLite extension implements the delta functions used by Fossil.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT1
|
||||
|
||||
/*
|
||||
** The "u32" type must be an unsigned 32-bit integer. Adjust this
|
||||
*/
|
||||
typedef unsigned int u32;
|
||||
|
||||
/*
|
||||
** Must be a 16-bit value
|
||||
*/
|
||||
typedef short int s16;
|
||||
typedef unsigned short int u16;
|
||||
|
||||
|
||||
/*
|
||||
** The width of a hash window in bytes. The algorithm only works if this
|
||||
** is a power of 2.
|
||||
*/
|
||||
#define NHASH 16
|
||||
|
||||
/*
|
||||
** The current state of the rolling hash.
|
||||
**
|
||||
** z[] holds the values that have been hashed. z[] is a circular buffer.
|
||||
** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
|
||||
** the window.
|
||||
**
|
||||
** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
|
||||
** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
|
||||
** (Each index for z[] should be module NHASH, of course. The %NHASH operator
|
||||
** is omitted in the prior expression for brevity.)
|
||||
*/
|
||||
typedef struct hash hash;
|
||||
struct hash {
|
||||
u16 a, b; /* Hash values */
|
||||
u16 i; /* Start of the hash window */
|
||||
char z[NHASH]; /* The values that have been hashed */
|
||||
};
|
||||
|
||||
/*
|
||||
** Initialize the rolling hash using the first NHASH characters of z[]
|
||||
*/
|
||||
static void hash_init(hash *pHash, const char *z){
|
||||
u16 a, b, i;
|
||||
a = b = z[0];
|
||||
for(i=1; i<NHASH; i++){
|
||||
a += z[i];
|
||||
b += a;
|
||||
}
|
||||
memcpy(pHash->z, z, NHASH);
|
||||
pHash->a = a & 0xffff;
|
||||
pHash->b = b & 0xffff;
|
||||
pHash->i = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Advance the rolling hash by a single character "c"
|
||||
*/
|
||||
static void hash_next(hash *pHash, int c){
|
||||
u16 old = pHash->z[pHash->i];
|
||||
pHash->z[pHash->i] = c;
|
||||
pHash->i = (pHash->i+1)&(NHASH-1);
|
||||
pHash->a = pHash->a - old + c;
|
||||
pHash->b = pHash->b - NHASH*old + pHash->a;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a 32-bit hash value
|
||||
*/
|
||||
static u32 hash_32bit(hash *pHash){
|
||||
return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
|
||||
}
|
||||
|
||||
/*
|
||||
** Compute a hash on NHASH bytes.
|
||||
**
|
||||
** This routine is intended to be equivalent to:
|
||||
** hash h;
|
||||
** hash_init(&h, zInput);
|
||||
** return hash_32bit(&h);
|
||||
*/
|
||||
static u32 hash_once(const char *z){
|
||||
u16 a, b, i;
|
||||
a = b = z[0];
|
||||
for(i=1; i<NHASH; i++){
|
||||
a += z[i];
|
||||
b += a;
|
||||
}
|
||||
return a | (((u32)b)<<16);
|
||||
}
|
||||
|
||||
/*
|
||||
** Write an base-64 integer into the given buffer.
|
||||
*/
|
||||
static void putInt(unsigned int v, char **pz){
|
||||
static const char zDigits[] =
|
||||
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
|
||||
/* 123456789 123456789 123456789 123456789 123456789 123456789 123 */
|
||||
int i, j;
|
||||
char zBuf[20];
|
||||
if( v==0 ){
|
||||
*(*pz)++ = '0';
|
||||
return;
|
||||
}
|
||||
for(i=0; v>0; i++, v>>=6){
|
||||
zBuf[i] = zDigits[v&0x3f];
|
||||
}
|
||||
for(j=i-1; j>=0; j--){
|
||||
*(*pz)++ = zBuf[j];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Read bytes from *pz and convert them into a positive integer. When
|
||||
** finished, leave *pz pointing to the first character past the end of
|
||||
** the integer. The *pLen parameter holds the length of the string
|
||||
** in *pz and is decremented once for each character in the integer.
|
||||
*/
|
||||
static unsigned int getInt(const char **pz, int *pLen){
|
||||
static const signed char zValue[] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
||||
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36,
|
||||
-1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1,
|
||||
};
|
||||
unsigned int v = 0;
|
||||
int c;
|
||||
unsigned char *z = (unsigned char*)*pz;
|
||||
unsigned char *zStart = z;
|
||||
while( (c = zValue[0x7f&*(z++)])>=0 ){
|
||||
v = (v<<6) + c;
|
||||
}
|
||||
z--;
|
||||
*pLen -= z - zStart;
|
||||
*pz = (char*)z;
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return the number digits in the base-64 representation of a positive integer
|
||||
*/
|
||||
static int digit_count(int v){
|
||||
unsigned int i, x;
|
||||
for(i=1, x=64; v>=x; i++, x <<= 6){}
|
||||
return i;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
|
||||
#else
|
||||
# define GCC_VERSION 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Compute a 32-bit big-endian checksum on the N-byte buffer. If the
|
||||
** buffer is not a multiple of 4 bytes length, compute the sum that would
|
||||
** have occurred if the buffer was padded with zeros to the next multiple
|
||||
** of four bytes.
|
||||
*/
|
||||
static unsigned int checksum(const char *zIn, size_t N){
|
||||
static const int byteOrderTest = 1;
|
||||
const unsigned char *z = (const unsigned char *)zIn;
|
||||
const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
|
||||
unsigned sum = 0;
|
||||
assert( (z - (const unsigned char*)0)%4==0 ); /* Four-byte alignment */
|
||||
if( 0==*(char*)&byteOrderTest ){
|
||||
/* This is a big-endian machine */
|
||||
while( z<zEnd ){
|
||||
sum += *(unsigned*)z;
|
||||
z += 4;
|
||||
}
|
||||
}else{
|
||||
/* A little-endian machine */
|
||||
#if GCC_VERSION>=4003000
|
||||
while( z<zEnd ){
|
||||
sum += __builtin_bswap32(*(unsigned*)z);
|
||||
z += 4;
|
||||
}
|
||||
#elif defined(_MSC_VER) && _MSC_VER>=1300
|
||||
while( z<zEnd ){
|
||||
sum += _byteswap_ulong(*(unsigned*)z);
|
||||
z += 4;
|
||||
}
|
||||
#else
|
||||
unsigned sum0 = 0;
|
||||
unsigned sum1 = 0;
|
||||
unsigned sum2 = 0;
|
||||
while(N >= 16){
|
||||
sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
|
||||
sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
|
||||
sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
|
||||
sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
|
||||
z += 16;
|
||||
N -= 16;
|
||||
}
|
||||
while(N >= 4){
|
||||
sum0 += z[0];
|
||||
sum1 += z[1];
|
||||
sum2 += z[2];
|
||||
sum += z[3];
|
||||
z += 4;
|
||||
N -= 4;
|
||||
}
|
||||
sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
|
||||
#endif
|
||||
}
|
||||
switch(N&3){
|
||||
case 3: sum += (z[2] << 8);
|
||||
case 2: sum += (z[1] << 16);
|
||||
case 1: sum += (z[0] << 24);
|
||||
default: ;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new delta.
|
||||
**
|
||||
** The delta is written into a preallocated buffer, zDelta, which
|
||||
** should be at least 60 bytes longer than the target file, zOut.
|
||||
** The delta string will be NUL-terminated, but it might also contain
|
||||
** embedded NUL characters if either the zSrc or zOut files are
|
||||
** binary. This function returns the length of the delta string
|
||||
** in bytes, excluding the final NUL terminator character.
|
||||
**
|
||||
** Output Format:
|
||||
**
|
||||
** The delta begins with a base64 number followed by a newline. This
|
||||
** number is the number of bytes in the TARGET file. Thus, given a
|
||||
** delta file z, a program can compute the size of the output file
|
||||
** simply by reading the first line and decoding the base-64 number
|
||||
** found there. The delta_output_size() routine does exactly this.
|
||||
**
|
||||
** After the initial size number, the delta consists of a series of
|
||||
** literal text segments and commands to copy from the SOURCE file.
|
||||
** A copy command looks like this:
|
||||
**
|
||||
** NNN@MMM,
|
||||
**
|
||||
** where NNN is the number of bytes to be copied and MMM is the offset
|
||||
** into the source file of the first byte (both base-64). If NNN is 0
|
||||
** it means copy the rest of the input file. Literal text is like this:
|
||||
**
|
||||
** NNN:TTTTT
|
||||
**
|
||||
** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
|
||||
**
|
||||
** The last term is of the form
|
||||
**
|
||||
** NNN;
|
||||
**
|
||||
** In this case, NNN is a 32-bit bigendian checksum of the output file
|
||||
** that can be used to verify that the delta applied correctly. All
|
||||
** numbers are in base-64.
|
||||
**
|
||||
** Pure text files generate a pure text delta. Binary files generate a
|
||||
** delta that may contain some binary data.
|
||||
**
|
||||
** Algorithm:
|
||||
**
|
||||
** The encoder first builds a hash table to help it find matching
|
||||
** patterns in the source file. 16-byte chunks of the source file
|
||||
** sampled at evenly spaced intervals are used to populate the hash
|
||||
** table.
|
||||
**
|
||||
** Next we begin scanning the target file using a sliding 16-byte
|
||||
** window. The hash of the 16-byte window in the target is used to
|
||||
** search for a matching section in the source file. When a match
|
||||
** is found, a copy command is added to the delta. An effort is
|
||||
** made to extend the matching section to regions that come before
|
||||
** and after the 16-byte hash window. A copy command is only issued
|
||||
** if the result would use less space that just quoting the text
|
||||
** literally. Literal text is added to the delta for sections that
|
||||
** do not match or which can not be encoded efficiently using copy
|
||||
** commands.
|
||||
*/
|
||||
static int delta_create(
|
||||
const char *zSrc, /* The source or pattern file */
|
||||
unsigned int lenSrc, /* Length of the source file */
|
||||
const char *zOut, /* The target file */
|
||||
unsigned int lenOut, /* Length of the target file */
|
||||
char *zDelta /* Write the delta into this buffer */
|
||||
){
|
||||
int i, base;
|
||||
char *zOrigDelta = zDelta;
|
||||
hash h;
|
||||
int nHash; /* Number of hash table entries */
|
||||
int *landmark; /* Primary hash table */
|
||||
int *collide; /* Collision chain */
|
||||
int lastRead = -1; /* Last byte of zSrc read by a COPY command */
|
||||
|
||||
/* Add the target file size to the beginning of the delta
|
||||
*/
|
||||
putInt(lenOut, &zDelta);
|
||||
*(zDelta++) = '\n';
|
||||
|
||||
/* If the source file is very small, it means that we have no
|
||||
** chance of ever doing a copy command. Just output a single
|
||||
** literal segment for the entire target and exit.
|
||||
*/
|
||||
if( lenSrc<=NHASH ){
|
||||
putInt(lenOut, &zDelta);
|
||||
*(zDelta++) = ':';
|
||||
memcpy(zDelta, zOut, lenOut);
|
||||
zDelta += lenOut;
|
||||
putInt(checksum(zOut, lenOut), &zDelta);
|
||||
*(zDelta++) = ';';
|
||||
return zDelta - zOrigDelta;
|
||||
}
|
||||
|
||||
/* Compute the hash table used to locate matching sections in the
|
||||
** source file.
|
||||
*/
|
||||
nHash = lenSrc/NHASH;
|
||||
collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) );
|
||||
memset(collide, -1, nHash*2*sizeof(int));
|
||||
landmark = &collide[nHash];
|
||||
for(i=0; i<lenSrc-NHASH; i+=NHASH){
|
||||
int hv = hash_once(&zSrc[i]) % nHash;
|
||||
collide[i/NHASH] = landmark[hv];
|
||||
landmark[hv] = i/NHASH;
|
||||
}
|
||||
|
||||
/* Begin scanning the target file and generating copy commands and
|
||||
** literal sections of the delta.
|
||||
*/
|
||||
base = 0; /* We have already generated everything before zOut[base] */
|
||||
while( base+NHASH<lenOut ){
|
||||
int iSrc, iBlock;
|
||||
unsigned int bestCnt, bestOfst=0, bestLitsz=0;
|
||||
hash_init(&h, &zOut[base]);
|
||||
i = 0; /* Trying to match a landmark against zOut[base+i] */
|
||||
bestCnt = 0;
|
||||
while( 1 ){
|
||||
int hv;
|
||||
int limit = 250;
|
||||
|
||||
hv = hash_32bit(&h) % nHash;
|
||||
iBlock = landmark[hv];
|
||||
while( iBlock>=0 && (limit--)>0 ){
|
||||
/*
|
||||
** The hash window has identified a potential match against
|
||||
** landmark block iBlock. But we need to investigate further.
|
||||
**
|
||||
** Look for a region in zOut that matches zSrc. Anchor the search
|
||||
** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to
|
||||
** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
|
||||
**
|
||||
** Set cnt equal to the length of the match and set ofst so that
|
||||
** zSrc[ofst] is the first element of the match. litsz is the number
|
||||
** of characters between zOut[base] and the beginning of the match.
|
||||
** sz will be the overhead (in bytes) needed to encode the copy
|
||||
** command. Only generate copy command if the overhead of the
|
||||
** copy command is less than the amount of literal text to be copied.
|
||||
*/
|
||||
int cnt, ofst, litsz;
|
||||
int j, k, x, y;
|
||||
int sz;
|
||||
int limitX;
|
||||
|
||||
/* Beginning at iSrc, match forwards as far as we can. j counts
|
||||
** the number of characters that match */
|
||||
iSrc = iBlock*NHASH;
|
||||
y = base+i;
|
||||
limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
|
||||
for(x=iSrc; x<limitX; x++, y++){
|
||||
if( zSrc[x]!=zOut[y] ) break;
|
||||
}
|
||||
j = x - iSrc - 1;
|
||||
|
||||
/* Beginning at iSrc-1, match backwards as far as we can. k counts
|
||||
** the number of characters that match */
|
||||
for(k=1; k<iSrc && k<=i; k++){
|
||||
if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
|
||||
}
|
||||
k--;
|
||||
|
||||
/* Compute the offset and size of the matching region */
|
||||
ofst = iSrc-k;
|
||||
cnt = j+k+1;
|
||||
litsz = i-k; /* Number of bytes of literal text before the copy */
|
||||
/* sz will hold the number of bytes needed to encode the "insert"
|
||||
** command and the copy command, not counting the "insert" text */
|
||||
sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
|
||||
if( cnt>=sz && cnt>bestCnt ){
|
||||
/* Remember this match only if it is the best so far and it
|
||||
** does not increase the file size */
|
||||
bestCnt = cnt;
|
||||
bestOfst = iSrc-k;
|
||||
bestLitsz = litsz;
|
||||
}
|
||||
|
||||
/* Check the next matching block */
|
||||
iBlock = collide[iBlock];
|
||||
}
|
||||
|
||||
/* We have a copy command that does not cause the delta to be larger
|
||||
** than a literal insert. So add the copy command to the delta.
|
||||
*/
|
||||
if( bestCnt>0 ){
|
||||
if( bestLitsz>0 ){
|
||||
/* Add an insert command before the copy */
|
||||
putInt(bestLitsz,&zDelta);
|
||||
*(zDelta++) = ':';
|
||||
memcpy(zDelta, &zOut[base], bestLitsz);
|
||||
zDelta += bestLitsz;
|
||||
base += bestLitsz;
|
||||
}
|
||||
base += bestCnt;
|
||||
putInt(bestCnt, &zDelta);
|
||||
*(zDelta++) = '@';
|
||||
putInt(bestOfst, &zDelta);
|
||||
*(zDelta++) = ',';
|
||||
if( bestOfst + bestCnt -1 > lastRead ){
|
||||
lastRead = bestOfst + bestCnt - 1;
|
||||
}
|
||||
bestCnt = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we reach this point, it means no match is found so far */
|
||||
if( base+i+NHASH>=lenOut ){
|
||||
/* We have reached the end of the file and have not found any
|
||||
** matches. Do an "insert" for everything that does not match */
|
||||
putInt(lenOut-base, &zDelta);
|
||||
*(zDelta++) = ':';
|
||||
memcpy(zDelta, &zOut[base], lenOut-base);
|
||||
zDelta += lenOut-base;
|
||||
base = lenOut;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Advance the hash by one character. Keep looking for a match */
|
||||
hash_next(&h, zOut[base+i+NHASH]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
/* Output a final "insert" record to get all the text at the end of
|
||||
** the file that does not match anything in the source file.
|
||||
*/
|
||||
if( base<lenOut ){
|
||||
putInt(lenOut-base, &zDelta);
|
||||
*(zDelta++) = ':';
|
||||
memcpy(zDelta, &zOut[base], lenOut-base);
|
||||
zDelta += lenOut-base;
|
||||
}
|
||||
/* Output the final checksum record. */
|
||||
putInt(checksum(zOut, lenOut), &zDelta);
|
||||
*(zDelta++) = ';';
|
||||
sqlite3_free(collide);
|
||||
return zDelta - zOrigDelta;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return the size (in bytes) of the output from applying
|
||||
** a delta.
|
||||
**
|
||||
** This routine is provided so that an procedure that is able
|
||||
** to call delta_apply() can learn how much space is required
|
||||
** for the output and hence allocate nor more space that is really
|
||||
** needed.
|
||||
*/
|
||||
static int delta_output_size(const char *zDelta, int lenDelta){
|
||||
int size;
|
||||
size = getInt(&zDelta, &lenDelta);
|
||||
if( *zDelta!='\n' ){
|
||||
/* ERROR: size integer not terminated by "\n" */
|
||||
return -1;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Apply a delta.
|
||||
**
|
||||
** The output buffer should be big enough to hold the whole output
|
||||
** file and a NUL terminator at the end. The delta_output_size()
|
||||
** routine will determine this size for you.
|
||||
**
|
||||
** The delta string should be null-terminated. But the delta string
|
||||
** may contain embedded NUL characters (if the input and output are
|
||||
** binary files) so we also have to pass in the length of the delta in
|
||||
** the lenDelta parameter.
|
||||
**
|
||||
** This function returns the size of the output file in bytes (excluding
|
||||
** the final NUL terminator character). Except, if the delta string is
|
||||
** malformed or intended for use with a source file other than zSrc,
|
||||
** then this routine returns -1.
|
||||
**
|
||||
** Refer to the delta_create() documentation above for a description
|
||||
** of the delta file format.
|
||||
*/
|
||||
static int delta_apply(
|
||||
const char *zSrc, /* The source or pattern file */
|
||||
int lenSrc, /* Length of the source file */
|
||||
const char *zDelta, /* Delta to apply to the pattern */
|
||||
int lenDelta, /* Length of the delta */
|
||||
char *zOut /* Write the output into this preallocated buffer */
|
||||
){
|
||||
unsigned int limit;
|
||||
unsigned int total = 0;
|
||||
#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
|
||||
char *zOrigOut = zOut;
|
||||
#endif
|
||||
|
||||
limit = getInt(&zDelta, &lenDelta);
|
||||
if( *zDelta!='\n' ){
|
||||
/* ERROR: size integer not terminated by "\n" */
|
||||
return -1;
|
||||
}
|
||||
zDelta++; lenDelta--;
|
||||
while( *zDelta && lenDelta>0 ){
|
||||
unsigned int cnt, ofst;
|
||||
cnt = getInt(&zDelta, &lenDelta);
|
||||
switch( zDelta[0] ){
|
||||
case '@': {
|
||||
zDelta++; lenDelta--;
|
||||
ofst = getInt(&zDelta, &lenDelta);
|
||||
if( lenDelta>0 && zDelta[0]!=',' ){
|
||||
/* ERROR: copy command not terminated by ',' */
|
||||
return -1;
|
||||
}
|
||||
zDelta++; lenDelta--;
|
||||
total += cnt;
|
||||
if( total>limit ){
|
||||
/* ERROR: copy exceeds output file size */
|
||||
return -1;
|
||||
}
|
||||
if( ofst+cnt > lenSrc ){
|
||||
/* ERROR: copy extends past end of input */
|
||||
return -1;
|
||||
}
|
||||
memcpy(zOut, &zSrc[ofst], cnt);
|
||||
zOut += cnt;
|
||||
break;
|
||||
}
|
||||
case ':': {
|
||||
zDelta++; lenDelta--;
|
||||
total += cnt;
|
||||
if( total>limit ){
|
||||
/* ERROR: insert command gives an output larger than predicted */
|
||||
return -1;
|
||||
}
|
||||
if( cnt>lenDelta ){
|
||||
/* ERROR: insert count exceeds size of delta */
|
||||
return -1;
|
||||
}
|
||||
memcpy(zOut, zDelta, cnt);
|
||||
zOut += cnt;
|
||||
zDelta += cnt;
|
||||
lenDelta -= cnt;
|
||||
break;
|
||||
}
|
||||
case ';': {
|
||||
zDelta++; lenDelta--;
|
||||
zOut[0] = 0;
|
||||
#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
|
||||
if( cnt!=checksum(zOrigOut, total) ){
|
||||
/* ERROR: bad checksum */
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
if( total!=limit ){
|
||||
/* ERROR: generated size does not match predicted size */
|
||||
return -1;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
default: {
|
||||
/* ERROR: unknown delta operator */
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* ERROR: unterminated delta */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
** Analyze a delta. Figure out the total number of bytes copied from
|
||||
** source to target, and the total number of bytes inserted by the delta,
|
||||
** and return both numbers.
|
||||
*/
|
||||
static int delta_analyze(
|
||||
const char *zDelta, /* Delta to apply to the pattern */
|
||||
int lenDelta, /* Length of the delta */
|
||||
int *pnCopy, /* OUT: Number of bytes copied */
|
||||
int *pnInsert /* OUT: Number of bytes inserted */
|
||||
){
|
||||
unsigned int nInsert = 0;
|
||||
unsigned int nCopy = 0;
|
||||
|
||||
(void)getInt(&zDelta, &lenDelta);
|
||||
if( *zDelta!='\n' ){
|
||||
/* ERROR: size integer not terminated by "\n" */
|
||||
return -1;
|
||||
}
|
||||
zDelta++; lenDelta--;
|
||||
while( *zDelta && lenDelta>0 ){
|
||||
unsigned int cnt;
|
||||
cnt = getInt(&zDelta, &lenDelta);
|
||||
switch( zDelta[0] ){
|
||||
case '@': {
|
||||
zDelta++; lenDelta--;
|
||||
(void)getInt(&zDelta, &lenDelta);
|
||||
if( lenDelta>0 && zDelta[0]!=',' ){
|
||||
/* ERROR: copy command not terminated by ',' */
|
||||
return -1;
|
||||
}
|
||||
zDelta++; lenDelta--;
|
||||
nCopy += cnt;
|
||||
break;
|
||||
}
|
||||
case ':': {
|
||||
zDelta++; lenDelta--;
|
||||
nInsert += cnt;
|
||||
if( cnt>lenDelta ){
|
||||
/* ERROR: insert count exceeds size of delta */
|
||||
return -1;
|
||||
}
|
||||
zDelta += cnt;
|
||||
lenDelta -= cnt;
|
||||
break;
|
||||
}
|
||||
case ';': {
|
||||
*pnCopy = nCopy;
|
||||
*pnInsert = nInsert;
|
||||
return 0;
|
||||
}
|
||||
default: {
|
||||
/* ERROR: unknown delta operator */
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* ERROR: unterminated delta */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
** SQL functions: fossildelta_create(X,Y)
|
||||
**
|
||||
** Return a delta for carrying X into Y.
|
||||
*/
|
||||
static void deltaCreateFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
const char *aOrig; int nOrig; /* old blob */
|
||||
const char *aNew; int nNew; /* new blob */
|
||||
char *aOut; int nOut; /* output delta */
|
||||
|
||||
assert( argc==2 );
|
||||
if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
|
||||
if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
|
||||
nOrig = sqlite3_value_bytes(argv[0]);
|
||||
aOrig = (const char*)sqlite3_value_blob(argv[0]);
|
||||
nNew = sqlite3_value_bytes(argv[1]);
|
||||
aNew = (const char*)sqlite3_value_blob(argv[1]);
|
||||
aOut = sqlite3_malloc64(nNew+70);
|
||||
if( aOut==0 ){
|
||||
sqlite3_result_error_nomem(context);
|
||||
}else{
|
||||
nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut);
|
||||
if( nOut<0 ){
|
||||
sqlite3_free(aOut);
|
||||
sqlite3_result_error(context, "cannot create fossil delta", -1);
|
||||
}else{
|
||||
sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** SQL functions: fossildelta_apply(X,D)
|
||||
**
|
||||
** Return the result of applying delta D to input X.
|
||||
*/
|
||||
static void deltaApplyFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
const char *aOrig; int nOrig; /* The X input */
|
||||
const char *aDelta; int nDelta; /* The input delta (D) */
|
||||
char *aOut; int nOut, nOut2; /* The output */
|
||||
|
||||
assert( argc==2 );
|
||||
if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
|
||||
if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
|
||||
nOrig = sqlite3_value_bytes(argv[0]);
|
||||
aOrig = (const char*)sqlite3_value_blob(argv[0]);
|
||||
nDelta = sqlite3_value_bytes(argv[1]);
|
||||
aDelta = (const char*)sqlite3_value_blob(argv[1]);
|
||||
|
||||
/* Figure out the size of the output */
|
||||
nOut = delta_output_size(aDelta, nDelta);
|
||||
if( nOut<0 ){
|
||||
sqlite3_result_error(context, "corrupt fossil delta", -1);
|
||||
return;
|
||||
}
|
||||
aOut = sqlite3_malloc64((sqlite3_int64)nOut+1);
|
||||
if( aOut==0 ){
|
||||
sqlite3_result_error_nomem(context);
|
||||
}else{
|
||||
nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut);
|
||||
if( nOut2!=nOut ){
|
||||
sqlite3_free(aOut);
|
||||
sqlite3_result_error(context, "corrupt fossil delta", -1);
|
||||
}else{
|
||||
sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** SQL functions: fossildelta_output_size(D)
|
||||
**
|
||||
** Return the size of the output that results from applying delta D.
|
||||
*/
|
||||
static void deltaOutputSizeFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
const char *aDelta; int nDelta; /* The input delta (D) */
|
||||
int nOut; /* Size of output */
|
||||
assert( argc==1 );
|
||||
if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
|
||||
nDelta = sqlite3_value_bytes(argv[0]);
|
||||
aDelta = (const char*)sqlite3_value_blob(argv[0]);
|
||||
|
||||
/* Figure out the size of the output */
|
||||
nOut = delta_output_size(aDelta, nDelta);
|
||||
if( nOut<0 ){
|
||||
sqlite3_result_error(context, "corrupt fossil delta", -1);
|
||||
return;
|
||||
}else{
|
||||
sqlite3_result_int(context, nOut);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
#endif
|
||||
int sqlite3_fossildelta_init(
|
||||
sqlite3 *db,
|
||||
char **pzErrMsg,
|
||||
const sqlite3_api_routines *pApi
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
SQLITE_EXTENSION_INIT2(pApi);
|
||||
(void)pzErrMsg; /* Unused parameter */
|
||||
rc = sqlite3_create_function(db, "delta_create", 2, SQLITE_UTF8, 0,
|
||||
deltaCreateFunc, 0, 0);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3_create_function(db, "delta_apply", 2, SQLITE_UTF8, 0,
|
||||
deltaApplyFunc, 0, 0);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3_create_function(db, "delta_output_size", 1, SQLITE_UTF8, 0,
|
||||
deltaOutputSizeFunc, 0, 0);
|
||||
}
|
||||
return rc;
|
||||
}
|
Reference in New Issue
Block a user