1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-09 14:21:03 +03:00

Treat byte-order marks (BOMs) at the start of a token as whitespace.

This enhancement is inspired by
[forum:/forumpost/ed8f696a20|forum post ed8f696a20].

FossilOrigin-Name: 3d55c21c167631f42d155aadec544e629bd078de9992aa5a74694d08bc52052b
This commit is contained in:
drh
2021-04-24 12:24:08 +00:00
parent 971ae9f66a
commit ba9ebc2d12
3 changed files with 24 additions and 15 deletions

View File

@@ -56,6 +56,7 @@
#define CC_ID 27 /* unicode characters usable in IDs */
#define CC_ILLEGAL 28 /* Illegal character */
#define CC_NUL 29 /* 0x00 */
#define CC_BOM 30 /* First byte of UTF8 BOM: 0xEF 0xBB 0xBF */
static const unsigned char aiClass[] = {
#ifdef SQLITE_ASCII
@@ -68,14 +69,14 @@ static const unsigned char aiClass[] = {
/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 9, 28, 28, 28, 2,
/* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 28, 10, 28, 25, 28,
/* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Bx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Ex */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Fx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
/* 8x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* 9x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* Ax */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* Cx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* Dx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* Ex */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 30,
/* Fx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27
#endif
#ifdef SQLITE_EBCDIC
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
@@ -535,6 +536,14 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){
i = 1;
break;
}
case CC_BOM: {
if( z[1]==0xbb && z[2]==0xbf ){
*tokenType = TK_SPACE;
return 3;
}
i = 1;
break;
}
case CC_NUL: {
*tokenType = TK_ILLEGAL;
return 0;