1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

1 Eliminate duplicate field HLWORD->skip

2 Rework support for html tags in parser
3 add HighlightAll to headline function for generating highlighted
  whole text with saved html tags
This commit is contained in:
Teodor Sigaev
2004-06-28 16:19:09 +00:00
parent e48cfacb84
commit bb89237531
6 changed files with 234 additions and 133 deletions

View File

@ -78,6 +78,7 @@ prsd_end(PG_FUNCTION_ARGS)
#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
#define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
@ -196,6 +197,7 @@ prsd_headline(PG_FUNCTION_ARGS)
curlen;
int i;
int highlight=0;
/* config */
prs->startsel = NULL;
@ -220,6 +222,15 @@ prsd_headline(PG_FUNCTION_ARGS)
prs->startsel = pstrdup(mptr->value);
else if (pg_strcasecmp(mptr->key, "StopSel") == 0)
prs->stopsel = pstrdup(mptr->value);
else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0)
highlight = (
pg_strcasecmp(mptr->value, "1")==0 ||
pg_strcasecmp(mptr->value, "on")==0 ||
pg_strcasecmp(mptr->value, "true")==0 ||
pg_strcasecmp(mptr->value, "t")==0 ||
pg_strcasecmp(mptr->value, "y")==0 ||
pg_strcasecmp(mptr->value, "yes")==0 ) ?
1 : 0;
pfree(mptr->key);
pfree(mptr->value);
@ -228,124 +239,133 @@ prsd_headline(PG_FUNCTION_ARGS)
}
pfree(map);
if (min_words >= max_words)
ereport(ERROR,
if (highlight==0) {
if (min_words >= max_words)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("MinWords should be less than MaxWords")));
if (min_words <= 0)
ereport(ERROR,
if (min_words <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("MinWords should be positive")));
if (shortword < 0)
ereport(ERROR,
if (shortword < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ShortWord should be >= 0")));
}
}
while (hlCover(prs, query, &p, &q))
{
/* find cover len in words */
curlen = 0;
poslen = 0;
for (i = p; i <= q && curlen < max_words; i++)
if (highlight==0) {
while (hlCover(prs, query, &p, &q))
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
pose = i;
}
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
{
/* best already finded, so try one more cover */
p++;
continue;
}
posb=p;
if (curlen < max_words)
{ /* find good end */
for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
/* find cover len in words */
curlen = 0;
poslen = 0;
for (i = p; i <= q && curlen < max_words; i++)
{
if (i != q)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
}
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
if (curlen >= min_words)
break;
}
if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */
for(i=p; i>= 0; i--) {
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
{
/* best already finded, so try one more cover */
p++;
continue;
}
posb=p;
if (curlen < max_words)
{ /* find good end */
for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
{
if (i != q)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
}
pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
if (curlen >= min_words)
break;
}
posb=(i>=0) ? i : 0;
if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */
for(i=p; i>= 0; i--) {
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
if (curlen >= min_words)
break;
}
posb=(i>=0) ? i : 0;
}
}
else
{ /* shorter cover :((( */
for (; curlen > min_words; i--)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen--;
if (prs->words[i].item && !prs->words[i].repeated)
poslen--;
pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
break;
}
}
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
{
bestb = posb;
beste = pose;
bestlen = poslen;
}
p++;
}
else
{ /* shorter cover :((( */
for (; curlen > min_words; i--)
if (bestlen < 0)
{
curlen = 0;
for (i = 0; i < prs->curwords && curlen < min_words; i++)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen--;
if (prs->words[i].item && !prs->words[i].repeated)
poslen--;
curlen++;
pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
break;
}
}
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
{
bestb = posb;
bestb = 0;
beste = pose;
bestlen = poslen;
}
p++;
}
if (bestlen < 0)
{
curlen = 0;
poslen = 0;
for (i = 0; i < prs->curwords && curlen < min_words; i++)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
pose = i;
}
bestb = 0;
beste = pose;
} else {
bestb=0;
beste=prs->curwords-1;
}
for (i = bestb; i <= beste; i++)
{
if (prs->words[i].item)
prs->words[i].selected = 1;
if (prs->words[i].repeated)
prs->words[i].skip = 1;
if (HLIDIGNORE(prs->words[i].type))
prs->words[i].replace = 1;
if ( highlight==0 ) {
if (HLIDIGNORE(prs->words[i].type))
prs->words[i].replace = 1;
} else {
if (HTMLHLIDIGNORE(prs->words[i].type))
prs->words[i].replace = 1;
}
prs->words[i].in = 1;
prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
}
if (!prs->startsel)