mirror of
https://github.com/postgres/postgres.git
synced 2025-11-09 06:21:09 +03:00
Change case-folding of keywords to conform to SQL99 and fix misbehavior
in Turkish locale. Keywords are now checked under pure ASCII case-folding
rules ('A'-'Z'->'a'-'z' and nothing else). However, once a word is
determined not to be a keyword, it will be case-folded under the current
locale, same as before. See pghackers discussion 20-Feb-01.
This commit is contained in:
@@ -1,23 +1,22 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* keywords.c
|
||||
* lexical token lookup for reserved words in postgres SQL
|
||||
* lexical token lookup for reserved words in PostgreSQL
|
||||
*
|
||||
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.88 2001/01/24 19:43:01 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.89 2001/02/21 18:53:46 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <ctype.h>
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "nodes/parsenodes.h"
|
||||
#include "nodes/pg_list.h"
|
||||
#include "parser/keywords.h"
|
||||
#include "parser/parse.h"
|
||||
|
||||
@@ -286,18 +285,62 @@ static ScanKeyword ScanKeywords[] = {
|
||||
{"zone", ZONE},
|
||||
};
|
||||
|
||||
/*
|
||||
* ScanKeywordLookup - see if a given word is a keyword
|
||||
*
|
||||
* Returns a pointer to the ScanKeyword table entry, or NULL if no match.
|
||||
*
|
||||
* The match is done case-insensitively. Note that we deliberately use a
|
||||
* dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
|
||||
* even if we are in a locale where tolower() would produce more or different
|
||||
* translations. This is to conform to the SQL99 spec, which says that
|
||||
* keywords are to be matched in this way even though non-keyword identifiers
|
||||
* receive a different case-normalization mapping.
|
||||
*/
|
||||
ScanKeyword *
|
||||
ScanKeywordLookup(char *text)
|
||||
{
|
||||
ScanKeyword *low = &ScanKeywords[0];
|
||||
ScanKeyword *high = endof(ScanKeywords) - 1;
|
||||
ScanKeyword *middle;
|
||||
int difference;
|
||||
int len,
|
||||
i;
|
||||
char word[NAMEDATALEN];
|
||||
ScanKeyword *low;
|
||||
ScanKeyword *high;
|
||||
|
||||
len = strlen(text);
|
||||
/* We assume all keywords are shorter than NAMEDATALEN. */
|
||||
if (len >= NAMEDATALEN)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Apply an ASCII-only downcasing. We must not use tolower() since
|
||||
* it may produce the wrong translation in some locales (eg, Turkish),
|
||||
* and we don't trust isupper() very much either. In an ASCII-based
|
||||
* encoding the tests against A and Z are sufficient, but we also check
|
||||
* isupper() so that we will work correctly under EBCDIC. The actual
|
||||
* case conversion step should work for either ASCII or EBCDIC.
|
||||
*/
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
char ch = text[i];
|
||||
|
||||
if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
|
||||
ch += 'a' - 'A';
|
||||
word[i] = ch;
|
||||
}
|
||||
word[len] = '\0';
|
||||
|
||||
/*
|
||||
* Now do a binary search using plain strcmp() comparison.
|
||||
*/
|
||||
low = &ScanKeywords[0];
|
||||
high = endof(ScanKeywords) - 1;
|
||||
while (low <= high)
|
||||
{
|
||||
ScanKeyword *middle;
|
||||
int difference;
|
||||
|
||||
middle = low + (high - low) / 2;
|
||||
difference = strcmp(middle->name, text);
|
||||
difference = strcmp(middle->name, word);
|
||||
if (difference == 0)
|
||||
return middle;
|
||||
else if (difference < 0)
|
||||
|
||||
Reference in New Issue
Block a user