mirror of
https://github.com/sqlite/sqlite.git
synced 2025-11-08 03:22:21 +03:00
A new approach for UTF-8 translation. (CVS 4004)
FossilOrigin-Name: 6c8ad2790eaede90b3f1ef62614e667178b2a8c4
This commit is contained in:
@@ -11,7 +11,7 @@
|
||||
*************************************************************************
|
||||
** Internal interface definitions for SQLite.
|
||||
**
|
||||
** @(#) $Id: sqliteInt.h,v 1.566 2007/05/12 12:08:51 drh Exp $
|
||||
** @(#) $Id: sqliteInt.h,v 1.567 2007/05/15 11:55:09 drh Exp $
|
||||
*/
|
||||
#ifndef _SQLITEINT_H_
|
||||
#define _SQLITEINT_H_
|
||||
@@ -1536,6 +1536,63 @@ typedef struct {
|
||||
*/
|
||||
extern int sqlite3_always_code_trigger_setup;
|
||||
|
||||
/*
|
||||
** A lookup table used by the SQLITE_READ_UTF8 macro. The definition
|
||||
** is in utf.c.
|
||||
*/
|
||||
extern const unsigned char sqlite3UtfTrans1[];
|
||||
|
||||
/*
|
||||
** Macros for reading UTF8 characters.
|
||||
**
|
||||
** SQLITE_READ_UTF8(x,c) reads a single UTF8 value out of x and writes
|
||||
** that value into c. The type of x must be unsigned char*. The type
|
||||
** of c must be unsigned int.
|
||||
**
|
||||
** SQLITE_SKIP_UTF8(x) advances x forward by one character. The type of
|
||||
** x must be unsigned char*.
|
||||
**
|
||||
** Notes On Invalid UTF-8:
|
||||
**
|
||||
** * These macros never allow a 7-bit character (0x00 through 0x7f) to
|
||||
** be encoded as a multi-byte character. Any multi-byte character that
|
||||
** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
|
||||
**
|
||||
** * These macros never allow a UTF16 surragate value to be encoded.
|
||||
** If a multi-byte character attempts to encode a value between
|
||||
** 0xd800 and 0xe000 then it is rendered as 0xfffd.
|
||||
**
|
||||
** * Bytes in the range of 0x80 through 0xbf which occur as the first
|
||||
** byte of a character are interpreted as single-byte characters
|
||||
** and rendered as themselves even though they are technically
|
||||
** invalid characters.
|
||||
**
|
||||
** * These routines accept an infinite number of different UTF8 encodings
|
||||
** for unicode values 0x80 and greater. They do not change over-length
|
||||
** encodings to 0xfffd as some systems recommend.
|
||||
**
|
||||
*/
|
||||
#define SQLITE_READ_UTF8(zIn, c) { \
|
||||
c = *(zIn++); \
|
||||
if( c>=0xc0 ){ \
|
||||
c = sqlite3UtfTrans1[c-0xc0]; \
|
||||
while( (*zIn & 0xc0)==0x80 ){ \
|
||||
c = (c<<6) + (0x3f & *(zIn++)); \
|
||||
} \
|
||||
if( c<0x80 \
|
||||
|| (c&0xFFFFF800)==0xD800 \
|
||||
|| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
|
||||
} \
|
||||
}
|
||||
#define SQLITE_SKIP_UTF8(zIn) { \
|
||||
if( (*(zIn++))>=0xc0 ){ \
|
||||
while( (*zIn & 0xc0)==0x80 ){ zIn++; } \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** The SQLITE_CORRUPT_BKPT macro can be either a constant (for production
|
||||
** builds) or a function call (for debugging). If it is a function call,
|
||||
@@ -1753,7 +1810,7 @@ int sqlite3GetInt32(const char *, int*);
|
||||
int sqlite3FitsIn64Bits(const char *);
|
||||
int sqlite3Utf16ByteLen(const void *pData, int nChar);
|
||||
int sqlite3Utf8CharLen(const char *pData, int nByte);
|
||||
int sqlite3ReadUtf8(const unsigned char *);
|
||||
u32 sqlite3ReadUtf8(const unsigned char *);
|
||||
int sqlite3PutVarint(unsigned char *, u64);
|
||||
int sqlite3GetVarint(const unsigned char *, u64 *);
|
||||
int sqlite3GetVarint32(const unsigned char *, u32 *);
|
||||
|
||||
Reference in New Issue
Block a user