mirror of
https://github.com/postgres/postgres.git
synced 2025-07-26 01:22:12 +03:00
August 13, 2002
Use parser of OpenFTS v0.33. -- Teodor Sigaev
This commit is contained in:
@ -4,6 +4,11 @@ a searchable data type (textual) with indexed access.
|
|||||||
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
|
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
|
||||||
(oleg@sai.msu.su).
|
(oleg@sai.msu.su).
|
||||||
|
|
||||||
|
CHANGES:
|
||||||
|
|
||||||
|
August 13, 2002
|
||||||
|
Use parser of OpenFTS v0.33.
|
||||||
|
|
||||||
IMPORTANT NOTICE:
|
IMPORTANT NOTICE:
|
||||||
|
|
||||||
This is a first step of our work on integration of OpenFTS
|
This is a first step of our work on integration of OpenFTS
|
||||||
|
@ -2,28 +2,33 @@
|
|||||||
#define __DEFLEX_H__
|
#define __DEFLEX_H__
|
||||||
|
|
||||||
/* rememder !!!! */
|
/* rememder !!!! */
|
||||||
#define LASTNUM 19
|
#define LASTNUM 23
|
||||||
|
|
||||||
#define LATWORD 1
|
#define LATWORD 1
|
||||||
#define NONLATINWORD 2
|
#define CYRWORD 2
|
||||||
#define UWORD 3
|
#define UWORD 3
|
||||||
#define EMAIL 4
|
#define EMAIL 4
|
||||||
#define FURL 5
|
#define FURL 5
|
||||||
#define HOST 6
|
#define HOST 6
|
||||||
#define FLOAT 7
|
#define SCIENTIFIC 7
|
||||||
#define FINT 8
|
#define VERSIONNUMBER 8
|
||||||
#define PARTWORD 9
|
#define PARTHYPHENWORD 9
|
||||||
#define NONLATINPARTWORD 10
|
#define CYRPARTHYPHENWORD 10
|
||||||
#define LATPARTWORD 11
|
#define LATPARTHYPHENWORD 11
|
||||||
#define SPACE 12
|
#define SPACE 12
|
||||||
#define SYMTAG 13
|
#define TAG 13
|
||||||
#define HTTP 14
|
#define HTTP 14
|
||||||
#define DEFISWORD 15
|
#define HYPHENWORD 15
|
||||||
#define DEFISLATWORD 16
|
#define LATHYPHENWORD 16
|
||||||
#define DEFISNONLATINWORD 17
|
#define CYRHYPHENWORD 17
|
||||||
#define URI 18
|
#define URI 18
|
||||||
#define FILEPATH 19
|
#define FILEPATH 19
|
||||||
|
#define DECIMAL 20
|
||||||
|
#define SIGNEDINT 21
|
||||||
|
#define UNSIGNEDINT 22
|
||||||
|
#define HTMLENTITY 23
|
||||||
|
|
||||||
extern const char *descr[];
|
extern const char *descr[];
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -689,9 +689,9 @@ SELECT count(*) FROM test_txtidx WHERE a ## '(eq|yt)&(wR|qh)';
|
|||||||
select txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
select txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||||
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
||||||
<i <b> wow < jqw <> qwerty');
|
<i <b> wow < jqw <> qwerty');
|
||||||
txt2txtidx
|
txt2txtidx
|
||||||

|

|
||||||
'ad' 'dw' 'jf' '234' '345' '4.2' '455' 'jqw' 'qwe' 'wer' 'wow' 'asdf' 'ewr1' 'qwer' 'sdjk' '5.005' 'ewri2' 'qwqwe' 'wefjn' 'gist.c' 'gist.h' 'qwerti' '234.435' ':8100/?' 'qwe-wer' 'readlin' 'www.com' '+4.0e-10' 'gist.h.c' 'rewt/ewr' 'qwe@efd.r' '/?ad=qwe&dw' '/wqe-324/ewr' 'aew.werc.ewr' '1aew.werc.ewr' '2aew.werc.ewr' '3aew.werc.ewr' '4aew.werc.ewr' '5aew.werc.ewr' '6aew.werc.ewr' '7aew.werc.ewr' '/usr/local/fff' '/awdf/dwqe/4325' ':8100/?ad=qwe&dw' 'teodor@stack.net' '5aew.werc.ewr:8100/?' ':8100/?ad=qwe&dw=%20%32' 'aew.werc.ewr/?ad=qwe&dw' '1aew.werc.ewr/?ad=qwe&dw' '3aew.werc.ewr/?ad=qwe&dw' '6aew.werc.ewr:8100/?ad=qwe&dw' '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32'
|
'ad' 'dw' 'jf' '234' '345' '4.2' '455' 'jqw' 'qwe' 'wer' 'wow' 'asdf' 'ewr1' 'qwer' 'sdjk' '5.005' 'ewri2' 'qwqwe' 'wefjn' 'gist.c' 'gist.h' 'qwerti' '234.435' ':8100/?' 'qwe-wer' 'readlin' 'www.com' '+4.0e-10' 'gist.h.c' 'rewt/ewr' 'qwe@efd.r' 'readline-4' '/?ad=qwe&dw' '/wqe-324/ewr' 'aew.werc.ewr' '1aew.werc.ewr' '2aew.werc.ewr' '3aew.werc.ewr' '4aew.werc.ewr' '5aew.werc.ewr' '6aew.werc.ewr' '7aew.werc.ewr' '/usr/local/fff' '/awdf/dwqe/4325' ':8100/?ad=qwe&dw' 'teodor@stack.net' '5aew.werc.ewr:8100/?' ':8100/?ad=qwe&dw=%20%32' 'aew.werc.ewr/?ad=qwe&dw' '1aew.werc.ewr/?ad=qwe&dw' '3aew.werc.ewr/?ad=qwe&dw' '6aew.werc.ewr:8100/?ad=qwe&dw' '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
select txtidxsize(txt2txtidx('345 qw'));
|
select txtidxsize(txt2txtidx('345 qw'));
|
||||||
@ -705,7 +705,7 @@ select txtidxsize(txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.e
|
|||||||
<i <b> wow < jqw <> qwerty'));
|
<i <b> wow < jqw <> qwerty'));
|
||||||
txtidxsize
|
txtidxsize
|
||||||
------------
|
------------
|
||||||
52
|
53
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
insert into test_txtidx (a) values ('345 qwerty');
|
insert into test_txtidx (a) values ('345 qwerty');
|
||||||
|
@ -75,19 +75,23 @@ static MAPDICT mapdict[] = {
|
|||||||
{NODICT, NODICT}, /* EMAIL */
|
{NODICT, NODICT}, /* EMAIL */
|
||||||
{NODICT, NODICT}, /* FURL */
|
{NODICT, NODICT}, /* FURL */
|
||||||
{NODICT, NODICT}, /* HOST */
|
{NODICT, NODICT}, /* HOST */
|
||||||
{NODICT, NODICT}, /* FLOAT */
|
{NODICT, NODICT}, /* SCIENTIFIC */
|
||||||
{NODICT, NODICT}, /* FINT */
|
{NODICT, NODICT}, /* VERSIONNUMBER */
|
||||||
{BYLOCALE, DEFAULTDICT}, /* PARTWORD */
|
{BYLOCALE, DEFAULTDICT}, /* PARTHYPHENWORD */
|
||||||
{BYLOCALE, NODICT}, /* NONLATINPARTWORD */
|
{BYLOCALE, NODICT}, /* CYRPARTHYPHENWORD */
|
||||||
{DEFAULTDICT, NODICT}, /* LATPARTWORD */
|
{DEFAULTDICT, NODICT}, /* LATPARTHYPHENWORD */
|
||||||
{STOPLEXEM, NODICT}, /* SPACE */
|
{STOPLEXEM, NODICT}, /* SPACE */
|
||||||
{STOPLEXEM, NODICT}, /* SYMTAG */
|
{STOPLEXEM, NODICT}, /* TAG */
|
||||||
{STOPLEXEM, NODICT}, /* HTTP */
|
{STOPLEXEM, NODICT}, /* HTTP */
|
||||||
{BYLOCALE, DEFAULTDICT}, /* DEFISWORD */
|
{BYLOCALE, DEFAULTDICT}, /* HYPHENWORD */
|
||||||
{DEFAULTDICT, NODICT}, /* DEFISLATWORD */
|
{DEFAULTDICT, NODICT}, /* LATHYPHENWORD */
|
||||||
{BYLOCALE, NODICT}, /* DEFISNONLATINWORD */
|
{BYLOCALE, NODICT}, /* CYRHYPHENWORD */
|
||||||
{NODICT, NODICT}, /* URI */
|
{NODICT, NODICT}, /* URI */
|
||||||
{NODICT, NODICT} /* FILEPATH */
|
{NODICT, NODICT}, /* FILEPATH */
|
||||||
|
{NODICT, NODICT}, /* DECIMAL */
|
||||||
|
{NODICT, NODICT}, /* SIGNEDINT */
|
||||||
|
{NODICT, NODICT}, /* UNSIGNEDINT */
|
||||||
|
{STOPLEXEM, NODICT} /* HTMLENTITY */
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool inited = false;
|
static bool inited = false;
|
||||||
|
@ -5,18 +5,17 @@
|
|||||||
|
|
||||||
/* postgres allocation function */
|
/* postgres allocation function */
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
#define free pfree
|
#define free pfree
|
||||||
#define malloc palloc
|
#define malloc palloc
|
||||||
#define realloc repalloc
|
#define realloc repalloc
|
||||||
|
|
||||||
#ifdef strdup
|
#ifdef strdup
|
||||||
#undef strdup
|
#undef strdup
|
||||||
#endif
|
#endif
|
||||||
#define strdup pstrdup
|
#define strdup pstrdup
|
||||||
|
|
||||||
|
|
||||||
char *token = NULL; /* pointer to token */
|
char *token = NULL; /* pointer to token */
|
||||||
char *s = NULL; /* for returning full defis-word */
|
char *s = NULL; /* to return WHOLE hyphenated-word */
|
||||||
|
|
||||||
YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
|
YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
|
||||||
|
|
||||||
@ -57,21 +56,21 @@ int bytestoread = 0; /* for limiting read from filehandle */
|
|||||||
%option nounput
|
%option nounput
|
||||||
%option noyywrap
|
%option noyywrap
|
||||||
|
|
||||||
|
/* parser's state for parsing hyphenated-word */
|
||||||
/* parser's state for parsing defis-word */
|
|
||||||
%x DELIM
|
%x DELIM
|
||||||
/* parser's state for parsing URL*/
|
/* parser's state for parsing URL*/
|
||||||
%x URL
|
%x URL
|
||||||
%x SERVER
|
%x SERVER
|
||||||
|
|
||||||
/* parser's state for parsing filepath */
|
/* parser's state for parsing TAGS */
|
||||||
|
|
||||||
%x INTAG
|
%x INTAG
|
||||||
%x QINTAG
|
%x QINTAG
|
||||||
|
%x INCOMMENT
|
||||||
|
%x INSCRIPT
|
||||||
|
|
||||||
/* NONLATIN char */
|
/* cyrillic koi8 char */
|
||||||
NONLATINALNUM [0-9\200-\377]
|
CYRALNUM [0-9\200-\377]
|
||||||
NONLATINALPHA [\200-\377]
|
CYRALPHA [\200-\377]
|
||||||
ALPHA [a-zA-Z\200-\377]
|
ALPHA [a-zA-Z\200-\377]
|
||||||
ALNUM [0-9a-zA-Z\200-\377]
|
ALNUM [0-9a-zA-Z\200-\377]
|
||||||
|
|
||||||
@ -81,66 +80,59 @@ URI [-_[:alnum:]/%,\.;=&?#]+
|
|||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
"<"[[:alpha:]] { BEGIN INTAG;
|
"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
|
||||||
token = tsearch_yytext;
|
|
||||||
tokenlen = tsearch_yyleng;
|
|
||||||
return SYMTAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
"</"[[:alpha:]] { BEGIN INTAG;
|
<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
|
||||||
token = tsearch_yytext;
|
BEGIN INITIAL;
|
||||||
tokenlen = tsearch_yyleng;
|
*tsearch_yytext=' '; *(tsearch_yytext+1) = '\0';
|
||||||
return SYMTAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
"<>" {
|
|
||||||
token = tsearch_yytext;
|
|
||||||
tokenlen = tsearch_yyleng;
|
|
||||||
return SYMTAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
"<"[^>[:alpha:]] {
|
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return SPACE;
|
return SPACE;
|
||||||
}
|
}
|
||||||
|
|
||||||
<INTAG>"\"" { BEGIN QINTAG;
|
"<!--" { BEGIN INCOMMENT; }
|
||||||
token = tsearch_yytext;
|
|
||||||
tokenlen = tsearch_yyleng;
|
|
||||||
return SYMTAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
<QINTAG>"\\\"" {
|
<INCOMMENT>"-->" {
|
||||||
|
BEGIN INITIAL;
|
||||||
|
*tsearch_yytext=' '; *(tsearch_yytext+1) = '\0';
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return SYMTAG;
|
return SPACE;
|
||||||
}
|
}
|
||||||
|
|
||||||
<QINTAG>"\"" { BEGIN INTAG;
|
|
||||||
token = tsearch_yytext;
|
|
||||||
tokenlen = tsearch_yyleng;
|
|
||||||
return SYMTAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
<QINTAG>.|\n {
|
"<"[\![:alpha:]] { BEGIN INTAG; }
|
||||||
|
|
||||||
|
"</"[[:alpha:]] { BEGIN INTAG; }
|
||||||
|
|
||||||
|
<INTAG>"\"" { BEGIN QINTAG; }
|
||||||
|
|
||||||
|
<QINTAG>"\\\"" ;
|
||||||
|
|
||||||
|
<QINTAG>"\"" { BEGIN INTAG; }
|
||||||
|
|
||||||
|
<INTAG>">" {
|
||||||
|
BEGIN INITIAL;
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
*tsearch_yytext=' ';
|
||||||
return SYMTAG;
|
token = tsearch_yytext;
|
||||||
|
tokenlen = 1;
|
||||||
|
return TAG;
|
||||||
}
|
}
|
||||||
|
|
||||||
<INTAG>">" { BEGIN INITIAL;
|
<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n ;
|
||||||
token = tsearch_yytext;
|
|
||||||
tokenlen = tsearch_yyleng;
|
|
||||||
return SYMTAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
<INTAG>.|\n {
|
\&(quot|amp|nbsp|lt|gt)\; {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return SYMTAG;
|
return HTMLENTITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
\&\#[0-9][0-9]?[0-9]?\; {
|
||||||
|
token = tsearch_yytext;
|
||||||
|
tokenlen = tsearch_yyleng;
|
||||||
|
return HTMLENTITY;
|
||||||
|
}
|
||||||
|
|
||||||
[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
|
[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
@ -148,22 +140,34 @@ URI [-_[:alnum:]/%,\.;=&?#]+
|
|||||||
return EMAIL;
|
return EMAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
<DELIM,INITIAL>[0-9] /* digit's and point (might be a version) */ {
|
[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return FINT;
|
return SCIENTIFIC;
|
||||||
}
|
}
|
||||||
|
|
||||||
<DELIM,INITIAL>[0-9]+[0-9\.]*[0-9] /* digit's and point (might be a version) */ {
|
[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return FINT;
|
return VERSIONNUMBER;
|
||||||
}
|
}
|
||||||
|
|
||||||
[+-]?[0-9\.]+[eE][+-]?[0-9]+ /* float */ {
|
[+-]?[0-9]+\.[0-9]+ {
|
||||||
|
token = tsearch_yytext;
|
||||||
|
tokenlen = tsearch_yyleng;
|
||||||
|
return DECIMAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
[+-][0-9]+ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return FLOAT;
|
return SIGNEDINT;
|
||||||
|
}
|
||||||
|
|
||||||
|
<DELIM,INITIAL>[0-9]+ {
|
||||||
|
token = tsearch_yytext;
|
||||||
|
tokenlen = tsearch_yyleng;
|
||||||
|
return UNSIGNEDINT;
|
||||||
}
|
}
|
||||||
|
|
||||||
http"://" {
|
http"://" {
|
||||||
@ -208,52 +212,58 @@ ftp"://" {
|
|||||||
return FILEPATH;
|
return FILEPATH;
|
||||||
}
|
}
|
||||||
|
|
||||||
({NONLATINALNUM}+-)+{NONLATINALPHA}+ /* composite-word */ {
|
({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ {
|
||||||
BEGIN DELIM;
|
BEGIN DELIM;
|
||||||
if (s) { free(s); s=NULL; }
|
if (s) { free(s); s=NULL; }
|
||||||
s = strdup( tsearch_yytext );
|
s = strdup( tsearch_yytext );
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
yyless( 0 );
|
yyless( 0 );
|
||||||
token = s;
|
token = s;
|
||||||
return DEFISNONLATINWORD;
|
return CYRHYPHENWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
([[:alnum:]]+-)+[[:alpha:]]+ /* composite-word */ {
|
([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ {
|
||||||
BEGIN DELIM;
|
BEGIN DELIM;
|
||||||
if (s) { free(s); s=NULL; }
|
if (s) { free(s); s=NULL; }
|
||||||
tokenlen = tsearch_yyleng;
|
|
||||||
s = strdup( tsearch_yytext );
|
s = strdup( tsearch_yytext );
|
||||||
|
tokenlen = tsearch_yyleng;
|
||||||
yyless( 0 );
|
yyless( 0 );
|
||||||
token = s;
|
token = s;
|
||||||
return DEFISLATWORD;
|
return LATHYPHENWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
({ALNUM}+-)+{ALPHA}+ /* composite-word */ {
|
({ALNUM}+-)+{ALNUM}+ /* composite-word */ {
|
||||||
BEGIN DELIM;
|
BEGIN DELIM;
|
||||||
if (s) { free(s); s=NULL; }
|
if (s) { free(s); s=NULL; }
|
||||||
s = strdup( tsearch_yytext );
|
s = strdup( tsearch_yytext );
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
yyless( 0 );
|
yyless( 0 );
|
||||||
token = s;
|
token = s;
|
||||||
return DEFISWORD;
|
return HYPHENWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
<DELIM>{NONLATINALNUM}+ /* one word in composite-word */ {
|
<DELIM>\+?[0-9]+\.[0-9]+ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return NONLATINPARTWORD;
|
return DECIMAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
<DELIM>[[:alnum:]]+ /* one word in composite-word */ {
|
<DELIM>{CYRALPHA}+ /* one word in composite-word */ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return LATPARTWORD;
|
return CYRPARTHYPHENWORD;
|
||||||
|
}
|
||||||
|
|
||||||
|
<DELIM>[[:alpha:]]+ /* one word in composite-word */ {
|
||||||
|
token = tsearch_yytext;
|
||||||
|
tokenlen = tsearch_yyleng;
|
||||||
|
return LATPARTHYPHENWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
<DELIM>{ALNUM}+ /* one word in composite-word */ {
|
<DELIM>{ALNUM}+ /* one word in composite-word */ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return PARTWORD;
|
return PARTHYPHENWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
<DELIM>- {
|
<DELIM>- {
|
||||||
@ -264,17 +274,16 @@ ftp"://" {
|
|||||||
|
|
||||||
<DELIM,SERVER,URL>.|\n /* return in basic state */ {
|
<DELIM,SERVER,URL>.|\n /* return in basic state */ {
|
||||||
BEGIN INITIAL;
|
BEGIN INITIAL;
|
||||||
tokenlen = tsearch_yyleng;
|
|
||||||
yyless( 0 );
|
yyless( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
{NONLATINALNUM}+ /* normal word */ {
|
{CYRALPHA}+ /* normal word */ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return NONLATINWORD;
|
return CYRWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[:alnum:]]+ /* normal word */ {
|
[[:alpha:]]+ /* normal word */ {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return LATWORD;
|
return LATWORD;
|
||||||
@ -286,7 +295,13 @@ ftp"://" {
|
|||||||
return UWORD;
|
return UWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
.|\n {
|
[ \r\n\t]+ {
|
||||||
|
token = tsearch_yytext;
|
||||||
|
tokenlen = tsearch_yyleng;
|
||||||
|
return SPACE;
|
||||||
|
}
|
||||||
|
|
||||||
|
. {
|
||||||
token = tsearch_yytext;
|
token = tsearch_yytext;
|
||||||
tokenlen = tsearch_yyleng;
|
tokenlen = tsearch_yyleng;
|
||||||
return SPACE;
|
return SPACE;
|
||||||
|
Reference in New Issue
Block a user