mirror of
https://github.com/postgres/postgres.git
synced 2025-07-27 12:41:57 +03:00
Rename default text search parser's "uri" token type to "url_path",
per recommendation from Alvaro. This doesn't force initdb since the numeric token type in the catalogs doesn't change; but note that the expected regression test output changed.
This commit is contained in:
@ -1,4 +1,4 @@
|
|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.27 2007/10/27 00:19:45 tgl Exp $ -->
|
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.28 2007/10/27 16:01:08 tgl Exp $ -->
|
||||||
|
|
||||||
<chapter id="textsearch">
|
<chapter id="textsearch">
|
||||||
<title id="textsearch-title">Full Text Search</title>
|
<title id="textsearch-title">Full Text Search</title>
|
||||||
@ -1815,8 +1815,8 @@ LIMIT 10;
|
|||||||
<entry><literal>example.com</literal></entry>
|
<entry><literal>example.com</literal></entry>
|
||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry><literal>uri</></entry>
|
<entry><literal>url_path</></entry>
|
||||||
<entry>URI</entry>
|
<entry>URL path</entry>
|
||||||
<entry><literal>/stuff/index.html</literal>, in the context of a URL</entry>
|
<entry><literal>/stuff/index.html</literal>, in the context of a URL</entry>
|
||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
@ -1907,7 +1907,7 @@ SELECT alias, description, token FROM ts_debug('http://example.com/stuff/index.h
|
|||||||
protocol | Protocol head | http://
|
protocol | Protocol head | http://
|
||||||
url | URL | example.com/stuff/index.html
|
url | URL | example.com/stuff/index.html
|
||||||
host | Host | example.com
|
host | Host | example.com
|
||||||
uri | URI | /stuff/index.html
|
url_path | URL path | /stuff/index.html
|
||||||
</programlisting>
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@ -2632,7 +2632,7 @@ ALTER TEXT SEARCH CONFIGURATION pg
|
|||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
ALTER TEXT SEARCH CONFIGURATION pg
|
ALTER TEXT SEARCH CONFIGURATION pg
|
||||||
DROP MAPPING FOR email, url, sfloat, uri, float;
|
DROP MAPPING FOR email, url, url_path, sfloat, float;
|
||||||
</programlisting>
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@ -2939,7 +2939,7 @@ SELECT * FROM ts_token_type('default');
|
|||||||
15 | numhword | Hyphenated word, letters and digits
|
15 | numhword | Hyphenated word, letters and digits
|
||||||
16 | asciihword | Hyphenated word, all ASCII
|
16 | asciihword | Hyphenated word, all ASCII
|
||||||
17 | hword | Hyphenated word, all letters
|
17 | hword | Hyphenated word, all letters
|
||||||
18 | uri | URI
|
18 | url_path | URL path
|
||||||
19 | file | File or path name
|
19 | file | File or path name
|
||||||
20 | float | Decimal notation
|
20 | float | Decimal notation
|
||||||
21 | int | Signed integer
|
21 | int | Signed integer
|
||||||
@ -3308,8 +3308,8 @@ Parser: "pg_catalog.default"
|
|||||||
numword | simple
|
numword | simple
|
||||||
sfloat | simple
|
sfloat | simple
|
||||||
uint | simple
|
uint | simple
|
||||||
uri | simple
|
|
||||||
url | simple
|
url | simple
|
||||||
|
url_path | simple
|
||||||
version | simple
|
version | simple
|
||||||
word | russian_stem
|
word | russian_stem
|
||||||
</programlisting>
|
</programlisting>
|
||||||
@ -3398,8 +3398,8 @@ Parser: "pg_catalog.default"
|
|||||||
sfloat | Scientific notation
|
sfloat | Scientific notation
|
||||||
tag | HTML tag
|
tag | HTML tag
|
||||||
uint | Unsigned integer
|
uint | Unsigned integer
|
||||||
uri | URI
|
|
||||||
url | URL
|
url | URL
|
||||||
|
url_path | URL path
|
||||||
version | Version number
|
version | Version number
|
||||||
word | Word, all letters
|
word | Word, all letters
|
||||||
(23 rows)
|
(23 rows)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.5 2007/10/23 20:46:12 tgl Exp $$
|
-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.6 2007/10/27 16:01:08 tgl Exp $$
|
||||||
|
|
||||||
-- text search configuration for _LANGNAME_ language
|
-- text search configuration for _LANGNAME_ language
|
||||||
CREATE TEXT SEARCH DICTIONARY _DICTNAME_
|
CREATE TEXT SEARCH DICTIONARY _DICTNAME_
|
||||||
@ -12,7 +12,8 @@ CREATE TEXT SEARCH CONFIGURATION _CFGNAME_
|
|||||||
COMMENT ON TEXT SEARCH CONFIGURATION _CFGNAME_ IS 'configuration for _LANGNAME_ language';
|
COMMENT ON TEXT SEARCH CONFIGURATION _CFGNAME_ IS 'configuration for _LANGNAME_ language';
|
||||||
|
|
||||||
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
|
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
|
||||||
FOR email, url, host, sfloat, version, uri, file, float, int, uint,
|
FOR email, url, url_path, host, file, version,
|
||||||
|
sfloat, float, int, uint,
|
||||||
numword, hword_numpart, numhword
|
numword, hword_numpart, numhword
|
||||||
WITH simple;
|
WITH simple;
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.4 2007/10/23 20:46:12 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.5 2007/10/27 16:01:08 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -41,7 +41,7 @@
|
|||||||
#define NUMHWORD 15
|
#define NUMHWORD 15
|
||||||
#define ASCIIHWORD 16
|
#define ASCIIHWORD 16
|
||||||
#define HWORD 17
|
#define HWORD 17
|
||||||
#define URI 18
|
#define URLPATH 18
|
||||||
#define FILEPATH 19
|
#define FILEPATH 19
|
||||||
#define DECIMAL 20
|
#define DECIMAL 20
|
||||||
#define SIGNEDINT 21
|
#define SIGNEDINT 21
|
||||||
@ -69,7 +69,7 @@ static const char * const tok_alias[] = {
|
|||||||
"numhword",
|
"numhword",
|
||||||
"asciihword",
|
"asciihword",
|
||||||
"hword",
|
"hword",
|
||||||
"uri",
|
"url_path",
|
||||||
"file",
|
"file",
|
||||||
"float",
|
"float",
|
||||||
"int",
|
"int",
|
||||||
@ -96,7 +96,7 @@ static const char * const lex_descr[] = {
|
|||||||
"Hyphenated word, letters and digits",
|
"Hyphenated word, letters and digits",
|
||||||
"Hyphenated word, all ASCII",
|
"Hyphenated word, all ASCII",
|
||||||
"Hyphenated word, all letters",
|
"Hyphenated word, all letters",
|
||||||
"URI",
|
"URL path",
|
||||||
"File or path name",
|
"File or path name",
|
||||||
"Decimal notation",
|
"Decimal notation",
|
||||||
"Signed integer",
|
"Signed integer",
|
||||||
@ -164,9 +164,9 @@ typedef enum
|
|||||||
TPS_InPathSecond,
|
TPS_InPathSecond,
|
||||||
TPS_InFile,
|
TPS_InFile,
|
||||||
TPS_InFileNext,
|
TPS_InFileNext,
|
||||||
TPS_InURIFirst,
|
TPS_InURLPathFirst,
|
||||||
TPS_InURIStart,
|
TPS_InURLPathStart,
|
||||||
TPS_InURI,
|
TPS_InURLPath,
|
||||||
TPS_InFURL,
|
TPS_InFURL,
|
||||||
TPS_InProtocolFirst,
|
TPS_InProtocolFirst,
|
||||||
TPS_InProtocolSecond,
|
TPS_InProtocolSecond,
|
||||||
@ -624,7 +624,7 @@ p_ishost(TParser * prs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
p_isURI(TParser * prs)
|
p_isURLPath(TParser * prs)
|
||||||
{
|
{
|
||||||
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
|
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
|
||||||
int res = 0;
|
int res = 0;
|
||||||
@ -632,7 +632,7 @@ p_isURI(TParser * prs)
|
|||||||
tmpprs->state = newTParserPosition(tmpprs->state);
|
tmpprs->state = newTParserPosition(tmpprs->state);
|
||||||
tmpprs->state->state = TPS_InFileFirst;
|
tmpprs->state->state = TPS_InFileFirst;
|
||||||
|
|
||||||
if (TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH))
|
if (TParserGet(tmpprs) && (tmpprs->type == URLPATH || tmpprs->type == FILEPATH))
|
||||||
{
|
{
|
||||||
prs->state->posbyte += tmpprs->lenbytelexeme;
|
prs->state->posbyte += tmpprs->lenbytelexeme;
|
||||||
prs->state->poschar += tmpprs->lencharlexeme;
|
prs->state->poschar += tmpprs->lencharlexeme;
|
||||||
@ -995,7 +995,7 @@ static TParserStateActionItem actionTPS_InHostDomain[] = {
|
|||||||
{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
|
{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
|
||||||
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
|
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
|
||||||
{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
|
{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
|
||||||
{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL},
|
{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
|
||||||
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
|
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
|
||||||
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
|
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
|
||||||
};
|
};
|
||||||
@ -1009,7 +1009,7 @@ static TParserStateActionItem actionTPS_InPortFirst[] = {
|
|||||||
static TParserStateActionItem actionTPS_InPort[] = {
|
static TParserStateActionItem actionTPS_InPort[] = {
|
||||||
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
|
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
|
||||||
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
|
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
|
||||||
{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL},
|
{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
|
||||||
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
|
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
|
||||||
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
|
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
|
||||||
};
|
};
|
||||||
@ -1042,7 +1042,7 @@ static TParserStateActionItem actionTPS_InFileFirst[] = {
|
|||||||
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
|
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
|
||||||
{p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
|
{p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
|
||||||
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
|
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
|
||||||
{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
|
{p_iseqC, '?', A_PUSH, TPS_InURLPathFirst, 0, NULL},
|
||||||
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
|
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
|
||||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||||
};
|
};
|
||||||
@ -1089,7 +1089,7 @@ static TParserStateActionItem actionTPS_InFile[] = {
|
|||||||
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
|
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
|
||||||
{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
|
{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
|
||||||
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
|
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
|
||||||
{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
|
{p_iseqC, '?', A_PUSH, TPS_InURLPathFirst, 0, NULL},
|
||||||
{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
|
{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1101,29 +1101,29 @@ static TParserStateActionItem actionTPS_InFileNext[] = {
|
|||||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static TParserStateActionItem actionTPS_InURIFirst[] = {
|
static TParserStateActionItem actionTPS_InURLPathFirst[] = {
|
||||||
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
||||||
{p_iseqC, '"', A_POP, TPS_Null, 0, NULL},
|
{p_iseqC, '"', A_POP, TPS_Null, 0, NULL},
|
||||||
{p_iseqC, '\'', A_POP, TPS_Null, 0, NULL},
|
{p_iseqC, '\'', A_POP, TPS_Null, 0, NULL},
|
||||||
{p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL},
|
{p_isnotspace, 0, A_CLEAR, TPS_InURLPath, 0, NULL},
|
||||||
{NULL, 0, A_POP, TPS_Null, 0, NULL},
|
{NULL, 0, A_POP, TPS_Null, 0, NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
static TParserStateActionItem actionTPS_InURIStart[] = {
|
static TParserStateActionItem actionTPS_InURLPathStart[] = {
|
||||||
{NULL, 0, A_NEXT, TPS_InURI, 0, NULL}
|
{NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static TParserStateActionItem actionTPS_InURI[] = {
|
static TParserStateActionItem actionTPS_InURLPath[] = {
|
||||||
{p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL},
|
{p_isEOF, 0, A_BINGO, TPS_Base, URLPATH, NULL},
|
||||||
{p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL},
|
{p_iseqC, '"', A_BINGO, TPS_Base, URLPATH, NULL},
|
||||||
{p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL},
|
{p_iseqC, '\'', A_BINGO, TPS_Base, URLPATH, NULL},
|
||||||
{p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL},
|
{p_isnotspace, 0, A_NEXT, TPS_InURLPath, 0, NULL},
|
||||||
{NULL, 0, A_BINGO, TPS_Base, URI, NULL}
|
{NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static TParserStateActionItem actionTPS_InFURL[] = {
|
static TParserStateActionItem actionTPS_InFURL[] = {
|
||||||
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
||||||
{p_isURI, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL},
|
{p_isURLPath, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL},
|
||||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1344,9 +1344,9 @@ static const TParserStateAction Actions[] = {
|
|||||||
{TPS_InPathSecond, actionTPS_InPathSecond},
|
{TPS_InPathSecond, actionTPS_InPathSecond},
|
||||||
{TPS_InFile, actionTPS_InFile},
|
{TPS_InFile, actionTPS_InFile},
|
||||||
{TPS_InFileNext, actionTPS_InFileNext},
|
{TPS_InFileNext, actionTPS_InFileNext},
|
||||||
{TPS_InURIFirst, actionTPS_InURIFirst},
|
{TPS_InURLPathFirst, actionTPS_InURLPathFirst},
|
||||||
{TPS_InURIStart, actionTPS_InURIStart},
|
{TPS_InURLPathStart, actionTPS_InURLPathStart},
|
||||||
{TPS_InURI, actionTPS_InURI},
|
{TPS_InURLPath, actionTPS_InURLPath},
|
||||||
{TPS_InFURL, actionTPS_InFURL},
|
{TPS_InFURL, actionTPS_InFURL},
|
||||||
{TPS_InProtocolFirst, actionTPS_InProtocolFirst},
|
{TPS_InProtocolFirst, actionTPS_InProtocolFirst},
|
||||||
{TPS_InProtocolSecond, actionTPS_InProtocolSecond},
|
{TPS_InProtocolSecond, actionTPS_InProtocolSecond},
|
||||||
|
@ -227,7 +227,7 @@ SELECT * FROM ts_token_type('default');
|
|||||||
15 | numhword | Hyphenated word, letters and digits
|
15 | numhword | Hyphenated word, letters and digits
|
||||||
16 | asciihword | Hyphenated word, all ASCII
|
16 | asciihword | Hyphenated word, all ASCII
|
||||||
17 | hword | Hyphenated word, all letters
|
17 | hword | Hyphenated word, all letters
|
||||||
18 | uri | URI
|
18 | url_path | URL path
|
||||||
19 | file | File or path name
|
19 | file | File or path name
|
||||||
20 | float | Decimal notation
|
20 | float | Decimal notation
|
||||||
21 | int | Signed integer
|
21 | int | Signed integer
|
||||||
|
Reference in New Issue
Block a user