mirror of
https://github.com/postgres/postgres.git
synced 2025-06-14 18:42:34 +03:00
Postgres95 1.01 Distribution - Virgin Sources
This commit is contained in:
343
src/backend/utils/adt/regexp.c
Normal file
343
src/backend/utils/adt/regexp.c
Normal file
@ -0,0 +1,343 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* regexp.c--
|
||||
* regular expression handling code.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.1.1.1 1996/07/09 06:22:05 scrappy Exp $
|
||||
*
|
||||
* Alistair Crooks added the code for the regex caching
|
||||
* agc - cached the regular expressions used - there's a good chance
|
||||
* that we'll get a hit, so this saves a compile step for every
|
||||
* attempted match. I haven't actually measured the speed improvement,
|
||||
* but it `looks' a lot quicker visually when watching regression
|
||||
* test output.
|
||||
*
|
||||
* agc - incorporated Keith Bostic's Berkeley regex code into
|
||||
* the tree for all ports. To distinguish this regex code from any that
|
||||
* is existent on a platform, I've prepended the string "pg95_" to
|
||||
* the functions regcomp, regerror, regexec and regfree.
|
||||
* Fixed a bug that was originally a typo by me, where `i' was used
|
||||
* instead of `oldest' when compiling regular expressions - benign
|
||||
* results mostly, although occasionally it bit you...
|
||||
*
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <string.h>
|
||||
#include "postgres.h" /* postgres system include file */
|
||||
#include "utils/elog.h" /* for logging postgres errors */
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/builtins.h" /* where the function declarations go */
|
||||
|
||||
#if defined(DISABLE_XOPEN_NLS)
|
||||
#undef _XOPEN_SOURCE
|
||||
#endif /* DISABLE_XOPEN_NLS */
|
||||
|
||||
#ifndef WIN32
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
|
||||
#endif /* WIN32 why is this necessary? */
|
||||
|
||||
/* this is the number of cached regular expressions held. */
|
||||
#ifndef MAX_CACHED_RES
|
||||
#define MAX_CACHED_RES 32
|
||||
#endif
|
||||
|
||||
/* this structure describes a cached regular expression */
|
||||
struct cached_re_str {
|
||||
struct varlena *cre_text; /* pattern as a text* */
|
||||
char *cre_s; /* pattern as null-terminated string */
|
||||
int cre_type; /* compiled-type: extended,icase etc */
|
||||
regex_t cre_re; /* the compiled regular expression */
|
||||
unsigned long cre_lru; /* lru tag */
|
||||
};
|
||||
|
||||
static int rec = 0; /* # of cached re's */
|
||||
static struct cached_re_str rev[MAX_CACHED_RES]; /* cached re's */
|
||||
static unsigned long lru; /* system lru tag */
|
||||
|
||||
/* attempt to compile `re' as an re, then match it against text */
|
||||
/* cflags - flag to regcomp indicates case sensitivity */
|
||||
static int
|
||||
RE_compile_and_execute(struct varlena *text_re, char *text, int cflags)
|
||||
{
|
||||
int oldest;
|
||||
int n;
|
||||
int i;
|
||||
char *re;
|
||||
int regcomp_result;
|
||||
|
||||
re = textout(text_re);
|
||||
/* find a previously compiled regular expression */
|
||||
for (i = 0 ; i < rec ; i++) {
|
||||
if (rev[i].cre_s) {
|
||||
if (strcmp(rev[i].cre_s, re) == 0) {
|
||||
if (rev[i].cre_type == cflags) {
|
||||
rev[i].cre_lru = ++lru;
|
||||
pfree(re);
|
||||
return(pg95_regexec(&rev[i].cre_re,
|
||||
text, 0,
|
||||
(regmatch_t *) NULL, 0) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* we didn't find it - make room in the cache for it */
|
||||
if (rec == MAX_CACHED_RES) {
|
||||
/* cache is full - find the oldest entry */
|
||||
for (oldest = 0, i = 1 ; i < rec ; i++) {
|
||||
if (rev[i].cre_lru < rev[oldest].cre_lru) {
|
||||
oldest = i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
oldest = rec++;
|
||||
}
|
||||
|
||||
/* if there was an old re, then de-allocate the space it used */
|
||||
if (rev[oldest].cre_s != (char *) NULL) {
|
||||
for (lru = i = 0 ; i < rec ; i++) {
|
||||
rev[i].cre_lru =
|
||||
(rev[i].cre_lru - rev[oldest].cre_lru) / 2;
|
||||
if (rev[i].cre_lru > lru) {
|
||||
lru = rev[i].cre_lru;
|
||||
}
|
||||
}
|
||||
pg95_regfree(&rev[oldest].cre_re);
|
||||
/* use malloc/free for the cre_s field because the storage
|
||||
has to persist across transactions */
|
||||
free(rev[oldest].cre_s);
|
||||
}
|
||||
|
||||
/* compile the re */
|
||||
regcomp_result = pg95_regcomp(&rev[oldest].cre_re, re, cflags);
|
||||
if ( regcomp_result == 0) {
|
||||
n = strlen(re);
|
||||
/* use malloc/free for the cre_s field because the storage
|
||||
has to persist across transactions */
|
||||
rev[oldest].cre_s = (char *) malloc(n + 1);
|
||||
(void) memmove(rev[oldest].cre_s, re, n);
|
||||
rev[oldest].cre_s[n] = 0;
|
||||
rev[oldest].cre_text = text_re;
|
||||
rev[oldest].cre_lru = ++lru;
|
||||
rev[oldest].cre_type = cflags;
|
||||
pfree(re);
|
||||
/* agc - fixed an old typo here */
|
||||
return(pg95_regexec(&rev[oldest].cre_re, text, 0,
|
||||
(regmatch_t *) NULL, 0) == 0);
|
||||
} else {
|
||||
char errMsg[1000];
|
||||
/* re didn't compile */
|
||||
rev[oldest].cre_s = (char *) NULL;
|
||||
pg95_regerror(regcomp_result, &rev[oldest].cre_re, errMsg,
|
||||
sizeof(errMsg));
|
||||
elog(WARN,"regcomp failed with error %s",errMsg);
|
||||
}
|
||||
|
||||
/* not reached */
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* interface routines called by the function manager
|
||||
*/
|
||||
|
||||
/*
|
||||
fixedlen_regexeq:
|
||||
|
||||
a generic fixed length regexp routine
|
||||
s - the string to match against (not necessarily null-terminated)
|
||||
p - the pattern
|
||||
charlen - the length of the string
|
||||
*/
|
||||
static bool
|
||||
fixedlen_regexeq(char *s, struct varlena* p, int charlen, int cflags)
|
||||
{
|
||||
char *sterm;
|
||||
int result;
|
||||
|
||||
if (!s || !p)
|
||||
return FALSE;
|
||||
|
||||
/* be sure sterm is null-terminated */
|
||||
sterm = (char *) palloc(charlen + 1);
|
||||
memset(sterm, 0, charlen + 1);
|
||||
strncpy(sterm, s, charlen);
|
||||
|
||||
result = RE_compile_and_execute(p, sterm, cflags);
|
||||
|
||||
pfree(sterm);
|
||||
|
||||
return ((bool) result);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* routines that use the regexp stuff
|
||||
*/
|
||||
bool
|
||||
char2regexeq(uint16 arg1, struct varlena *p)
|
||||
{
|
||||
char *s = (char *) &arg1;
|
||||
return (fixedlen_regexeq(s, p, 2, REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
char2regexne(uint16 arg1, struct varlena *p)
|
||||
{
|
||||
return (!char2regexeq(arg1, p));
|
||||
}
|
||||
|
||||
bool
|
||||
char4regexeq(uint32 arg1, struct varlena *p)
|
||||
{
|
||||
char *s = (char *) &arg1;
|
||||
return (fixedlen_regexeq(s, p, 4, REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
char4regexne(uint32 arg1, struct varlena *p)
|
||||
{
|
||||
return (!char4regexeq(arg1, p));
|
||||
}
|
||||
|
||||
bool
|
||||
char8regexeq(char *s, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(s, p, 8, REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
char8regexne(char *s, struct varlena *p)
|
||||
{
|
||||
return (!char8regexeq(s, p));
|
||||
}
|
||||
|
||||
bool
|
||||
char16regexeq(char *s, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(s, p, 16, REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
char16regexne(char *s, struct varlena *p)
|
||||
{
|
||||
return (!char16regexeq(s, p));
|
||||
}
|
||||
|
||||
bool
|
||||
nameregexeq(NameData *n, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(n->data, p, NAMEDATALEN, REG_EXTENDED));
|
||||
}
|
||||
bool
|
||||
nameregexne(NameData *s, struct varlena *p)
|
||||
{
|
||||
return (!nameregexeq(s, p));
|
||||
}
|
||||
|
||||
bool
|
||||
textregexeq(struct varlena *s, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(VARDATA(s), p, VARSIZE(s) - VARHDRSZ, REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
textregexne(struct varlena *s, struct varlena *p)
|
||||
{
|
||||
return (!textregexeq(s, p));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* routines that use the regexp stuff, but ignore the case.
|
||||
* for this, we use the REG_ICASE flag to pg95_regcomp
|
||||
*/
|
||||
bool
|
||||
char2icregexeq(uint16 arg1, struct varlena *p)
|
||||
{
|
||||
char *s = (char *) &arg1;
|
||||
return (fixedlen_regexeq(s, p, 2, REG_ICASE | REG_EXTENDED));
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
char2icregexne(uint16 arg1, struct varlena *p)
|
||||
{
|
||||
return (!char2icregexeq(arg1, p));
|
||||
}
|
||||
|
||||
bool
|
||||
char4icregexeq(uint32 arg1, struct varlena *p)
|
||||
{
|
||||
char *s = (char *) &arg1;
|
||||
return (fixedlen_regexeq(s, p, 4, REG_ICASE | REG_EXTENDED ));
|
||||
}
|
||||
|
||||
bool
|
||||
char4icregexne(uint32 arg1, struct varlena *p)
|
||||
{
|
||||
return (!char4icregexeq(arg1, p));
|
||||
}
|
||||
|
||||
bool
|
||||
char8icregexeq(char *s, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(s, p, 8, REG_ICASE | REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
char8icregexne(char *s, struct varlena *p)
|
||||
{
|
||||
return (!char8icregexeq(s, p));
|
||||
}
|
||||
|
||||
bool
|
||||
char16icregexeq(char *s, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(s, p, 16, REG_ICASE | REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
char16icregexne(char *s, struct varlena *p)
|
||||
{
|
||||
return (!char16icregexeq(s, p));
|
||||
}
|
||||
|
||||
bool
|
||||
texticregexeq(struct varlena *s, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(VARDATA(s), p, VARSIZE(s) - VARHDRSZ,
|
||||
REG_ICASE | REG_EXTENDED));
|
||||
}
|
||||
|
||||
bool
|
||||
texticregexne(struct varlena *s, struct varlena *p)
|
||||
{
|
||||
return (!texticregexeq(s, p));
|
||||
}
|
||||
|
||||
bool
|
||||
nameicregexeq(NameData *n, struct varlena *p)
|
||||
{
|
||||
return (fixedlen_regexeq(n->data, p, NAMEDATALEN,
|
||||
REG_ICASE | REG_EXTENDED));
|
||||
}
|
||||
bool
|
||||
nameicregexne(NameData *s, struct varlena *p)
|
||||
{
|
||||
return (!nameicregexeq(s, p));
|
||||
}
|
||||
|
Reference in New Issue
Block a user