mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
1 Eliminate duplicate field HLWORD->skip
2 Rework support for html tags in parser 3 add HighlightAll to headline function for generating highlighted whole text with saved html tags
This commit is contained in:
@ -78,6 +78,7 @@ prsd_end(PG_FUNCTION_ARGS)
|
||||
|
||||
#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
|
||||
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
|
||||
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
|
||||
|
||||
@ -196,6 +197,7 @@ prsd_headline(PG_FUNCTION_ARGS)
|
||||
curlen;
|
||||
|
||||
int i;
|
||||
int highlight=0;
|
||||
|
||||
/* config */
|
||||
prs->startsel = NULL;
|
||||
@ -220,6 +222,15 @@ prsd_headline(PG_FUNCTION_ARGS)
|
||||
prs->startsel = pstrdup(mptr->value);
|
||||
else if (pg_strcasecmp(mptr->key, "StopSel") == 0)
|
||||
prs->stopsel = pstrdup(mptr->value);
|
||||
else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0)
|
||||
highlight = (
|
||||
pg_strcasecmp(mptr->value, "1")==0 ||
|
||||
pg_strcasecmp(mptr->value, "on")==0 ||
|
||||
pg_strcasecmp(mptr->value, "true")==0 ||
|
||||
pg_strcasecmp(mptr->value, "t")==0 ||
|
||||
pg_strcasecmp(mptr->value, "y")==0 ||
|
||||
pg_strcasecmp(mptr->value, "yes")==0 ) ?
|
||||
1 : 0;
|
||||
|
||||
pfree(mptr->key);
|
||||
pfree(mptr->value);
|
||||
@ -228,124 +239,133 @@ prsd_headline(PG_FUNCTION_ARGS)
|
||||
}
|
||||
pfree(map);
|
||||
|
||||
if (min_words >= max_words)
|
||||
ereport(ERROR,
|
||||
if (highlight==0) {
|
||||
if (min_words >= max_words)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("MinWords should be less than MaxWords")));
|
||||
if (min_words <= 0)
|
||||
ereport(ERROR,
|
||||
if (min_words <= 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("MinWords should be positive")));
|
||||
if (shortword < 0)
|
||||
ereport(ERROR,
|
||||
if (shortword < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("ShortWord should be >= 0")));
|
||||
}
|
||||
}
|
||||
|
||||
while (hlCover(prs, query, &p, &q))
|
||||
{
|
||||
/* find cover len in words */
|
||||
curlen = 0;
|
||||
poslen = 0;
|
||||
for (i = p; i <= q && curlen < max_words; i++)
|
||||
if (highlight==0) {
|
||||
while (hlCover(prs, query, &p, &q))
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
pose = i;
|
||||
}
|
||||
|
||||
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
|
||||
{
|
||||
/* best already finded, so try one more cover */
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
posb=p;
|
||||
if (curlen < max_words)
|
||||
{ /* find good end */
|
||||
for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
|
||||
/* find cover len in words */
|
||||
curlen = 0;
|
||||
poslen = 0;
|
||||
for (i = p; i <= q && curlen < max_words; i++)
|
||||
{
|
||||
if (i != q)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
}
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
if (curlen >= min_words)
|
||||
break;
|
||||
}
|
||||
if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */
|
||||
for(i=p; i>= 0; i--) {
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
|
||||
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
|
||||
{
|
||||
/* best already finded, so try one more cover */
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
posb=p;
|
||||
if (curlen < max_words)
|
||||
{ /* find good end */
|
||||
for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
|
||||
{
|
||||
if (i != q)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
}
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
if (curlen >= min_words)
|
||||
break;
|
||||
}
|
||||
posb=(i>=0) ? i : 0;
|
||||
if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */
|
||||
for(i=p; i>= 0; i--) {
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
if (curlen >= min_words)
|
||||
break;
|
||||
}
|
||||
posb=(i>=0) ? i : 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* shorter cover :((( */
|
||||
for (; curlen > min_words; i--)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen--;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen--;
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
|
||||
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
|
||||
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
|
||||
{
|
||||
bestb = posb;
|
||||
beste = pose;
|
||||
bestlen = poslen;
|
||||
}
|
||||
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{ /* shorter cover :((( */
|
||||
for (; curlen > min_words; i--)
|
||||
|
||||
if (bestlen < 0)
|
||||
{
|
||||
curlen = 0;
|
||||
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen--;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen--;
|
||||
curlen++;
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
|
||||
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
|
||||
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
|
||||
{
|
||||
bestb = posb;
|
||||
bestb = 0;
|
||||
beste = pose;
|
||||
bestlen = poslen;
|
||||
}
|
||||
|
||||
p++;
|
||||
}
|
||||
|
||||
if (bestlen < 0)
|
||||
{
|
||||
curlen = 0;
|
||||
poslen = 0;
|
||||
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
pose = i;
|
||||
}
|
||||
bestb = 0;
|
||||
beste = pose;
|
||||
} else {
|
||||
bestb=0;
|
||||
beste=prs->curwords-1;
|
||||
}
|
||||
|
||||
for (i = bestb; i <= beste; i++)
|
||||
{
|
||||
if (prs->words[i].item)
|
||||
prs->words[i].selected = 1;
|
||||
if (prs->words[i].repeated)
|
||||
prs->words[i].skip = 1;
|
||||
if (HLIDIGNORE(prs->words[i].type))
|
||||
prs->words[i].replace = 1;
|
||||
if ( highlight==0 ) {
|
||||
if (HLIDIGNORE(prs->words[i].type))
|
||||
prs->words[i].replace = 1;
|
||||
} else {
|
||||
if (HTMLHLIDIGNORE(prs->words[i].type))
|
||||
prs->words[i].replace = 1;
|
||||
}
|
||||
|
||||
prs->words[i].in = 1;
|
||||
prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
|
||||
}
|
||||
|
||||
if (!prs->startsel)
|
||||
|
Reference in New Issue
Block a user