mirror of
https://github.com/postgres/postgres.git
synced 2025-07-27 12:41:57 +03:00
Clean up scan.l's handling of \r vs \n --- they are reliably treated as
equivalent now, which should make Windows and Mac clients happier. Also fix failure to handle SQL comments between segments of a multiline quoted literal.
This commit is contained in:
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.63 2000/01/26 05:56:43 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.64 2000/02/19 04:17:25 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -41,15 +41,19 @@ static char *parseCh;
|
|||||||
|
|
||||||
/* set up my input handler --- need one flavor for flex, one for lex */
|
/* set up my input handler --- need one flavor for flex, one for lex */
|
||||||
#if defined(FLEX_SCANNER)
|
#if defined(FLEX_SCANNER)
|
||||||
|
|
||||||
#define YY_NO_UNPUT
|
#define YY_NO_UNPUT
|
||||||
static int myinput(char* buf, int max);
|
static int myinput(char* buf, int max);
|
||||||
#undef YY_INPUT
|
#undef YY_INPUT
|
||||||
#define YY_INPUT(buf,result,max) {result = myinput(buf,max);}
|
#define YY_INPUT(buf,result,max) {result = myinput(buf,max);}
|
||||||
#else
|
|
||||||
|
#else /* !FLEX_SCANNER */
|
||||||
|
|
||||||
#undef input
|
#undef input
|
||||||
int input();
|
int input();
|
||||||
#undef unput
|
#undef unput
|
||||||
void unput(char);
|
void unput(char);
|
||||||
|
|
||||||
#endif /* FLEX_SCANNER */
|
#endif /* FLEX_SCANNER */
|
||||||
|
|
||||||
extern YYSTYPE yylval;
|
extern YYSTYPE yylval;
|
||||||
@ -68,27 +72,22 @@ static int literalalloc; /* current allocated buffer size */
|
|||||||
static void addlit(char *ytext, int yleng);
|
static void addlit(char *ytext, int yleng);
|
||||||
|
|
||||||
%}
|
%}
|
||||||
/* OK, here is a short description of lex/flex rules behavior.
|
/*
|
||||||
|
* OK, here is a short description of lex/flex rules behavior.
|
||||||
* The longest pattern which matches an input string is always chosen.
|
* The longest pattern which matches an input string is always chosen.
|
||||||
* For equal-length patterns, the first occurring in the rules list is chosen.
|
* For equal-length patterns, the first occurring in the rules list is chosen.
|
||||||
* INITIAL is the starting condition, to which all non-conditional rules apply.
|
* INITIAL is the starting state, to which all non-conditional rules apply.
|
||||||
* When in an exclusive condition, only those rules defined for that condition apply.
|
* Exclusive states change parsing rules while the state is active. When in
|
||||||
|
* an exclusive state, only those rules defined for that state apply.
|
||||||
*
|
*
|
||||||
* Exclusive states change parsing rules while the state is active.
|
* We use exclusive states for quoted strings, extended comments,
|
||||||
* There are exclusive states for quoted strings, extended comments,
|
* and to eliminate parsing troubles for numeric strings.
|
||||||
* and to eliminate parsing troubles for numeric strings.
|
|
||||||
* Exclusive states:
|
* Exclusive states:
|
||||||
* <xb> binary numeric string - thomas 1997-11-16
|
* <xb> binary numeric string - thomas 1997-11-16
|
||||||
* <xc> extended C-style comments - tgl 1997-07-12
|
* <xc> extended C-style comments - tgl 1997-07-12
|
||||||
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
|
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
|
||||||
* <xh> hexadecimal numeric string - thomas 1997-11-16
|
* <xh> hexadecimal numeric string - thomas 1997-11-16
|
||||||
* <xq> quoted strings - tgl 1997-07-30
|
* <xq> quoted strings - tgl 1997-07-30
|
||||||
*
|
|
||||||
* The "extended comment" syntax closely resembles allowable operator syntax.
|
|
||||||
* So, when in condition <xc>, only strings which would terminate the
|
|
||||||
* "extended comment" trigger any action other than "ignore".
|
|
||||||
* Be sure to match _any_ candidate comment, including those with appended
|
|
||||||
* operator-like symbols. - thomas 1997-07-14
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
%x xb
|
%x xb
|
||||||
@ -101,29 +100,29 @@ static void addlit(char *ytext, int yleng);
|
|||||||
*/
|
*/
|
||||||
xbstart [bB]{quote}
|
xbstart [bB]{quote}
|
||||||
xbstop {quote}
|
xbstop {quote}
|
||||||
xbinside [^']*
|
xbinside [^']+
|
||||||
xbcat {quote}{space}*\n{space}*{quote}
|
xbcat {quote}{whitespace_with_newline}{quote}
|
||||||
|
|
||||||
/* Hexadecimal number
|
/* Hexadecimal number
|
||||||
*/
|
*/
|
||||||
xhstart [xX]{quote}
|
xhstart [xX]{quote}
|
||||||
xhstop {quote}
|
xhstop {quote}
|
||||||
xhinside [^']*
|
xhinside [^']+
|
||||||
xhcat {quote}{space}*\n{space}*{quote}
|
xhcat {quote}{whitespace_with_newline}{quote}
|
||||||
|
|
||||||
/* Extended quote
|
/* Extended quote
|
||||||
* xqdouble implements SQL92 embedded quote
|
* xqdouble implements SQL92 embedded quote
|
||||||
* xqcat allows strings to cross input lines
|
* xqcat allows strings to cross input lines
|
||||||
* Note: reduction of '' and \ sequences to output text is done in scanstr(),
|
* Note: reduction of '' and \ sequences to output text is done in scanstr(),
|
||||||
* not by rules here.
|
* not by rules here. But we do get rid of xqcat sequences here.
|
||||||
*/
|
*/
|
||||||
quote '
|
quote '
|
||||||
xqstart {quote}
|
xqstart {quote}
|
||||||
xqstop {quote}
|
xqstop {quote}
|
||||||
xqdouble {quote}{quote}
|
xqdouble {quote}{quote}
|
||||||
xqinside [^\\']*
|
xqinside [^\\']+
|
||||||
xqliteral [\\](.|\n)
|
xqliteral [\\](.|\n)
|
||||||
xqcat {quote}{space}*\n{space}*{quote}
|
xqcat {quote}{whitespace_with_newline}{quote}
|
||||||
|
|
||||||
/* Delimited quote
|
/* Delimited quote
|
||||||
* Allows embedded spaces and other special characters into identifiers.
|
* Allows embedded spaces and other special characters into identifiers.
|
||||||
@ -131,16 +130,28 @@ xqcat {quote}{space}*\n{space}*{quote}
|
|||||||
dquote \"
|
dquote \"
|
||||||
xdstart {dquote}
|
xdstart {dquote}
|
||||||
xdstop {dquote}
|
xdstop {dquote}
|
||||||
xdinside [^"]*
|
xdinside [^"]+
|
||||||
|
|
||||||
/* Comments
|
/* C-style comments
|
||||||
* Ignored by the scanner and parser.
|
* Ignored by the scanner and parser.
|
||||||
|
*
|
||||||
|
* The "extended comment" syntax closely resembles allowable operator syntax.
|
||||||
|
* The tricky part here is to get lex to recognize a string starting with
|
||||||
|
* slash-star as a comment, when interpreting it as an operator would produce
|
||||||
|
* a longer match --- remember lex will prefer a longer match! So, we have
|
||||||
|
* to provide a special rule for xcline (a complete comment that could
|
||||||
|
* otherwise look like an operator), as well as append {op_and_self}* to
|
||||||
|
* xcstart so that it matches at least as much as {operator} would.
|
||||||
|
* Then the tie-breaker (first matching rule of same length) wins.
|
||||||
|
* There is still a problem if someone writes, eg, slash-star-star-slash-plus.
|
||||||
|
* It'll be taken as an xcstart, rather than xcline and an operator as one
|
||||||
|
* could wish. I don't see any way around that given lex's behavior;
|
||||||
|
* that someone will just have to write a space after the comment.
|
||||||
*/
|
*/
|
||||||
xcline [\/][\*].*[\*][\/]{space}*\n*
|
xcline \/\*{op_and_self}*\*\/
|
||||||
xcstart [\/][\*]{op_and_self}*
|
xcstart \/\*{op_and_self}*
|
||||||
xcstop {op_and_self}*[\*][\/]({space}*|\n)
|
xcstop \*+\/
|
||||||
xcinside [^*]*
|
xcinside ([^*]+)|(\*+[^/])
|
||||||
xcstar [^/]
|
|
||||||
|
|
||||||
digit [0-9]
|
digit [0-9]
|
||||||
letter [\200-\377_A-Za-z]
|
letter [\200-\377_A-Za-z]
|
||||||
@ -161,13 +172,44 @@ operator {op_and_self}+
|
|||||||
|
|
||||||
integer {digit}+
|
integer {digit}+
|
||||||
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
|
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
|
||||||
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
|
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
|
||||||
|
|
||||||
param \${integer}
|
param \${integer}
|
||||||
|
|
||||||
comment ("--"|"//").*
|
/*
|
||||||
|
* In order to make the world safe for Windows and Mac clients as well as
|
||||||
|
* Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
|
||||||
|
* sequence will be seen as two successive newlines, but that doesn't cause
|
||||||
|
* any problems. SQL92-style comments, which start with -- and extend to the
|
||||||
|
* next newline, are treated as equivalent to a single whitespace character.
|
||||||
|
*
|
||||||
|
* NOTE a fine point: if there is no newline following --, we will absorb
|
||||||
|
* everything to the end of the input as a comment. This is correct. Older
|
||||||
|
* versions of Postgres failed to recognize -- as a comment if the input
|
||||||
|
* did not end with a newline.
|
||||||
|
*
|
||||||
|
* XXX perhaps \f (formfeed) should be treated as a newline as well?
|
||||||
|
*/
|
||||||
|
|
||||||
space [ \t\n\r\f]
|
space [ \t\n\r\f]
|
||||||
|
horiz_space [ \t\f]
|
||||||
|
newline [\n\r]
|
||||||
|
non_newline [^\n\r]
|
||||||
|
|
||||||
|
comment (("--"|"//"){non_newline}*)
|
||||||
|
|
||||||
|
whitespace ({space}|{comment})
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SQL92 requires at least one newline in the whitespace separating
|
||||||
|
* string literals that are to be concatenated. Silly, but who are we
|
||||||
|
* to argue? Note that {whitespace_with_newline} should not have * after
|
||||||
|
* it, whereas {whitespace} should generally have a * after it...
|
||||||
|
*/
|
||||||
|
|
||||||
|
horiz_whitespace ({horiz_space}|{comment})
|
||||||
|
whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
|
||||||
|
|
||||||
other .
|
other .
|
||||||
|
|
||||||
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
|
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
|
||||||
@ -181,14 +223,16 @@ other .
|
|||||||
* of escaped-quote "\'".
|
* of escaped-quote "\'".
|
||||||
* Other embedded escaped characters are matched explicitly and the leading
|
* Other embedded escaped characters are matched explicitly and the leading
|
||||||
* backslash is dropped from the string. - thomas 1997-09-24
|
* backslash is dropped from the string. - thomas 1997-09-24
|
||||||
|
* Note that xcline must appear before xcstart, which must appear before
|
||||||
|
* operator, as explained above! Also whitespace (comment) must appear
|
||||||
|
* before operator.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
%%
|
%%
|
||||||
{comment} { /* ignore */ }
|
{whitespace} { /* ignore */ }
|
||||||
|
|
||||||
{xcline} { /* ignore */ }
|
{xcline} { /* ignore */ }
|
||||||
|
|
||||||
<xc>{xcstar} |
|
|
||||||
{xcstart} { BEGIN(xc); }
|
{xcstart} { BEGIN(xc); }
|
||||||
|
|
||||||
<xc>{xcstop} { BEGIN(INITIAL); }
|
<xc>{xcstop} { BEGIN(INITIAL); }
|
||||||
@ -216,6 +260,7 @@ other .
|
|||||||
}
|
}
|
||||||
<xh>{xhcat} |
|
<xh>{xhcat} |
|
||||||
<xb>{xbcat} {
|
<xb>{xbcat} {
|
||||||
|
/* ignore */
|
||||||
}
|
}
|
||||||
|
|
||||||
{xhstart} {
|
{xhstart} {
|
||||||
@ -249,6 +294,7 @@ other .
|
|||||||
addlit(yytext, yyleng);
|
addlit(yytext, yyleng);
|
||||||
}
|
}
|
||||||
<xq>{xqcat} {
|
<xq>{xqcat} {
|
||||||
|
/* ignore */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -270,18 +316,18 @@ other .
|
|||||||
{self} { return yytext[0]; }
|
{self} { return yytext[0]; }
|
||||||
|
|
||||||
{operator} {
|
{operator} {
|
||||||
if (strcmp((char*)yytext,"!=") == 0)
|
if (strcmp((char*)yytext, "!=") == 0)
|
||||||
yylval.str = pstrdup("<>"); /* compatability */
|
yylval.str = pstrdup("<>"); /* compatibility */
|
||||||
else
|
else
|
||||||
yylval.str = pstrdup((char*)yytext);
|
yylval.str = pstrdup((char*)yytext);
|
||||||
return Op;
|
return Op;
|
||||||
}
|
}
|
||||||
|
|
||||||
{param} {
|
{param} {
|
||||||
yylval.ival = atoi((char*)&yytext[1]);
|
yylval.ival = atoi((char*)&yytext[1]);
|
||||||
return PARAM;
|
return PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
{integer} {
|
{integer} {
|
||||||
char* endptr;
|
char* endptr;
|
||||||
|
|
||||||
@ -354,7 +400,6 @@ other .
|
|||||||
return IDENT;
|
return IDENT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{space} { /* ignore */ }
|
|
||||||
|
|
||||||
{other} { return yytext[0]; }
|
{other} { return yytext[0]; }
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user