summaryrefslogtreecommitdiff
path: root/contrib/tsearch2/wparser_def.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tsearch2/wparser_def.c')
-rw-r--r--contrib/tsearch2/wparser_def.c356
1 files changed, 201 insertions, 155 deletions
diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c
index c3b03067600..4680d746b3e 100644
--- a/contrib/tsearch2/wparser_def.c
+++ b/contrib/tsearch2/wparser_def.c
@@ -1,5 +1,5 @@
-/*
- * default word parser
+/*
+ * default word parser
* Teodor Sigaev <[email protected]>
*/
#include <errno.h>
@@ -17,40 +17,44 @@
#include "wordparser/deflex.h"
PG_FUNCTION_INFO_V1(prsd_lextype);
-Datum prsd_lextype(PG_FUNCTION_ARGS);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum
+prsd_lextype(PG_FUNCTION_ARGS)
+{
+ LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
+ int i;
+
+ for (i = 1; i <= LASTNUM; i++)
+ {
+ descr[i - 1].lexid = i;
+ descr[i - 1].alias = pstrdup(tok_alias[i]);
+ descr[i - 1].descr = pstrdup(lex_descr[i]);
+ }
-Datum
-prsd_lextype(PG_FUNCTION_ARGS) {
- LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
- int i;
+ descr[LASTNUM].lexid = 0;
- for(i=1;i<=LASTNUM;i++) {
- descr[i-1].lexid = i;
- descr[i-1].alias = pstrdup(tok_alias[i]);
- descr[i-1].descr = pstrdup(lex_descr[i]);
- }
-
- descr[LASTNUM].lexid=0;
-
PG_RETURN_POINTER(descr);
}
PG_FUNCTION_INFO_V1(prsd_start);
-Datum prsd_start(PG_FUNCTION_ARGS);
-Datum
-prsd_start(PG_FUNCTION_ARGS) {
- start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum
+prsd_start(PG_FUNCTION_ARGS)
+{
+ start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1));
PG_RETURN_POINTER(NULL);
}
PG_FUNCTION_INFO_V1(prsd_getlexeme);
-Datum prsd_getlexeme(PG_FUNCTION_ARGS);
-Datum
-prsd_getlexeme(PG_FUNCTION_ARGS) {
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum
+prsd_getlexeme(PG_FUNCTION_ARGS)
+{
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
- char **t=(char**)PG_GETARG_POINTER(1);
- int *tlen=(int*)PG_GETARG_POINTER(2);
- int type=tsearch2_yylex();
+ char **t = (char **) PG_GETARG_POINTER(1);
+ int *tlen = (int *) PG_GETARG_POINTER(2);
+ int type = tsearch2_yylex();
*t = token;
*tlen = tokenlen;
@@ -58,34 +62,39 @@ prsd_getlexeme(PG_FUNCTION_ARGS) {
}
PG_FUNCTION_INFO_V1(prsd_end);
-Datum prsd_end(PG_FUNCTION_ARGS);
-Datum
-prsd_end(PG_FUNCTION_ARGS) {
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum
+prsd_end(PG_FUNCTION_ARGS)
+{
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
end_parse();
PG_RETURN_VOID();
}
#define LEAVETOKEN(x) ( (x)==12 )
-#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
-#define ENDPUNCTOKEN(x) ( (x)==12 )
+#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x) ( (x)==12 )
#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
-#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
+#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
-typedef struct {
- HLWORD *words;
- int len;
-} hlCheck;
+typedef struct
+{
+ HLWORD *words;
+ int len;
+} hlCheck;
static bool
-checkcondition_HL(void *checkval, ITEM *val) {
- int i;
- for(i=0;i<((hlCheck*)checkval)->len;i++) {
- if ( ((hlCheck*)checkval)->words[i].item==val )
+checkcondition_HL(void *checkval, ITEM * val)
+{
+ int i;
+
+ for (i = 0; i < ((hlCheck *) checkval)->len; i++)
+ {
+ if (((hlCheck *) checkval)->words[i].item == val)
return true;
}
return false;
@@ -93,21 +102,28 @@ checkcondition_HL(void *checkval, ITEM *val) {
static bool
-hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
- int i,j;
- ITEM *item=GETQUERY(query);
- int pos=*p;
- *q=0;
- *p=0x7fffffff;
-
- for(j=0;j<query->size;j++) {
- if ( item->type != VAL ) {
+hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int *p, int *q)
+{
+ int i,
+ j;
+ ITEM *item = GETQUERY(query);
+ int pos = *p;
+
+ *q = 0;
+ *p = 0x7fffffff;
+
+ for (j = 0; j < query->size; j++)
+ {
+ if (item->type != VAL)
+ {
item++;
continue;
}
- for(i=pos;i<prs->curwords;i++) {
- if ( prs->words[i].item == item ) {
- if ( i>*q)
+ for (i = pos; i < prs->curwords; i++)
+ {
+ if (prs->words[i].item == item)
+ {
+ if (i > *q)
*q = i;
break;
}
@@ -115,32 +131,39 @@ hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
item++;
}
- if ( *q==0 )
+ if (*q == 0)
return false;
- item=GETQUERY(query);
- for(j=0;j<query->size;j++) {
- if ( item->type != VAL ) {
+ item = GETQUERY(query);
+ for (j = 0; j < query->size; j++)
+ {
+ if (item->type != VAL)
+ {
item++;
continue;
}
- for(i=*q;i>=pos;i--) {
- if ( prs->words[i].item == item ) {
- if ( i<*p )
- *p=i;
+ for (i = *q; i >= pos; i--)
+ {
+ if (prs->words[i].item == item)
+ {
+ if (i < *p)
+ *p = i;
break;
}
}
item++;
- }
+ }
+
+ if (*p <= *q)
+ {
+ hlCheck ch = {&(prs->words[*p]), *q - *p + 1};
- if ( *p<=*q ) {
- hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
- if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) {
+ if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
return true;
- } else {
+ else
+ {
(*p)++;
- return hlCover(prs,query,p,q);
+ return hlCover(prs, query, p, q);
}
}
@@ -148,45 +171,54 @@ hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
}
PG_FUNCTION_INFO_V1(prsd_headline);
-Datum prsd_headline(PG_FUNCTION_ARGS);
-Datum
-prsd_headline(PG_FUNCTION_ARGS) {
- HLPRSTEXT *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
- text *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
- QUERYTYPE *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum
+prsd_headline(PG_FUNCTION_ARGS)
+{
+ HLPRSTEXT *prs = (HLPRSTEXT *) PG_GETARG_POINTER(0);
+ text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */
+ QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(2); /* can't be toasted */
+
/* from opt + start and and tag */
- int min_words=15;
- int max_words=35;
- int shortword=3;
-
- int p=0,q=0;
- int bestb=-1,beste=-1;
- int bestlen=-1;
- int pose=0, poslen, curlen;
-
- int i;
-
- /*config*/
- prs->startsel=NULL;
- prs->stopsel=NULL;
- if ( opt ) {
- Map *map,*mptr;
-
- parse_cfgdict(opt,&map);
- mptr=map;
-
- while(mptr && mptr->key) {
- if ( strcasecmp(mptr->key,"MaxWords")==0 )
- max_words=pg_atoi(mptr->value,4,1);
- else if ( strcasecmp(mptr->key,"MinWords")==0 )
- min_words=pg_atoi(mptr->value,4,1);
- else if ( strcasecmp(mptr->key,"ShortWord")==0 )
- shortword=pg_atoi(mptr->value,4,1);
- else if ( strcasecmp(mptr->key,"StartSel")==0 )
- prs->startsel=pstrdup(mptr->value);
- else if ( strcasecmp(mptr->key,"StopSel")==0 )
- prs->stopsel=pstrdup(mptr->value);
-
+ int min_words = 15;
+ int max_words = 35;
+ int shortword = 3;
+
+ int p = 0,
+ q = 0;
+ int bestb = -1,
+ beste = -1;
+ int bestlen = -1;
+ int pose = 0,
+ poslen,
+ curlen;
+
+ int i;
+
+ /* config */
+ prs->startsel = NULL;
+ prs->stopsel = NULL;
+ if (opt)
+ {
+ Map *map,
+ *mptr;
+
+ parse_cfgdict(opt, &map);
+ mptr = map;
+
+ while (mptr && mptr->key)
+ {
+ if (strcasecmp(mptr->key, "MaxWords") == 0)
+ max_words = pg_atoi(mptr->value, 4, 1);
+ else if (strcasecmp(mptr->key, "MinWords") == 0)
+ min_words = pg_atoi(mptr->value, 4, 1);
+ else if (strcasecmp(mptr->key, "ShortWord") == 0)
+ shortword = pg_atoi(mptr->value, 4, 1);
+ else if (strcasecmp(mptr->key, "StartSel") == 0)
+ prs->startsel = pstrdup(mptr->value);
+ else if (strcasecmp(mptr->key, "StopSel") == 0)
+ prs->stopsel = pstrdup(mptr->value);
+
pfree(mptr->key);
pfree(mptr->value);
@@ -194,104 +226,118 @@ prsd_headline(PG_FUNCTION_ARGS) {
}
pfree(map);
- if ( min_words >= max_words )
+ if (min_words >= max_words)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be MinWords < MaxWords")));
- if ( min_words<=0 )
+ if (min_words <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be MinWords > 0")));
- if ( shortword<0 )
+ if (shortword < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be ShortWord >= 0")));
}
- while( hlCover(prs,query,&p,&q) ) {
+ while (hlCover(prs, query, &p, &q))
+ {
/* find cover len in words */
- curlen=0;
- poslen=0;
- for(i=p;i<=q && curlen < max_words ; i++) {
- if ( !NONWORDTOKEN(prs->words[i].type) )
+ curlen = 0;
+ poslen = 0;
+ for (i = p; i <= q && curlen < max_words; i++)
+ {
+ if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
- if ( prs->words[i].item && !prs->words[i].repeated )
- poslen++;
- pose=i;
+ if (prs->words[i].item && !prs->words[i].repeated)
+ poslen++;
+ pose = i;
}
- if ( poslen<bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) {
+ if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
+ {
/* best already finded, so try one more cover */
p++;
continue;
}
- if ( curlen < max_words ) { /* find good end */
- for(i=i-1 ;i<prs->curwords && curlen<max_words; i++) {
- if ( i!=q ) {
- if ( !NONWORDTOKEN(prs->words[i].type) )
+ if (curlen < max_words)
+ { /* find good end */
+ for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
+ {
+ if (i != q)
+ {
+ if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
- if ( prs->words[i].item && !prs->words[i].repeated )
+ if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
}
- pose=i;
- if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
+ pose = i;
+ if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
- if ( curlen>=min_words )
+ if (curlen >= min_words)
break;
}
- } else { /* shorter cover :((( */
- for(;curlen>min_words;i--) {
- if ( !NONWORDTOKEN(prs->words[i].type) )
+ }
+ else
+ { /* shorter cover :((( */
+ for (; curlen > min_words; i--)
+ {
+ if (!NONWORDTOKEN(prs->words[i].type))
curlen--;
- if ( prs->words[i].item && !prs->words[i].repeated )
+ if (prs->words[i].item && !prs->words[i].repeated)
poslen--;
- pose=i;
- if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
+ pose = i;
+ if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
break;
}
}
- if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
- ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
- (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
- bestb=p; beste=pose;
- bestlen=poslen;
- }
+ if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
+ (bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
+ (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
+ {
+ bestb = p;
+ beste = pose;
+ bestlen = poslen;
+ }
p++;
}
- if ( bestlen<0 ) {
- curlen=0;
- poslen=0;
- for(i=0;i<prs->curwords && curlen<min_words ; i++) {
- if ( !NONWORDTOKEN(prs->words[i].type) )
+ if (bestlen < 0)
+ {
+ curlen = 0;
+ poslen = 0;
+ for (i = 0; i < prs->curwords && curlen < min_words; i++)
+ {
+ if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
- pose=i;
+ pose = i;
}
- bestb=0; beste=pose;
+ bestb = 0;
+ beste = pose;
}
- for(i=bestb;i<=beste;i++) {
- if ( prs->words[i].item )
- prs->words[i].selected=1;
- if ( prs->words[i].repeated )
- prs->words[i].skip=1;
- if ( HLIDIGNORE(prs->words[i].type) )
- prs->words[i].replace=1;
+ for (i = bestb; i <= beste; i++)
+ {
+ if (prs->words[i].item)
+ prs->words[i].selected = 1;
+ if (prs->words[i].repeated)
+ prs->words[i].skip = 1;
+ if (HLIDIGNORE(prs->words[i].type))
+ prs->words[i].replace = 1;
- prs->words[i].in=1;
+ prs->words[i].in = 1;
}
if (!prs->startsel)
- prs->startsel=pstrdup("<b>");
+ prs->startsel = pstrdup("<b>");
if (!prs->stopsel)
- prs->stopsel=pstrdup("</b>");
- prs->startsellen=strlen(prs->startsel);
- prs->stopsellen=strlen(prs->stopsel);
+ prs->stopsel = pstrdup("</b>");
+ prs->startsellen = strlen(prs->startsel);
+ prs->stopsellen = strlen(prs->stopsel);
PG_RETURN_POINTER(prs);
}
-