summaryrefslogtreecommitdiff
path: root/contrib/tsearch2/wparser_def.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tsearch2/wparser_def.c')
-rw-r--r--contrib/tsearch2/wparser_def.c390
1 files changed, 0 insertions, 390 deletions
diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c
deleted file mode 100644
index b20909ce5bc..00000000000
--- a/contrib/tsearch2/wparser_def.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * default word parser
- * Teodor Sigaev <[email protected]>
- */
-#include "postgres.h"
-
-#include "utils/builtins.h"
-
-#include "dict.h"
-#include "wparser.h"
-#include "common.h"
-#include "ts_cfg.h"
-#include "wordparser/parser.h"
-#include "wordparser/deflex.h"
-
-PG_FUNCTION_INFO_V1(prsd_lextype);
-Datum prsd_lextype(PG_FUNCTION_ARGS);
-
-Datum
-prsd_lextype(PG_FUNCTION_ARGS)
-{
- LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
- int i;
-
- for (i = 1; i <= LASTNUM; i++)
- {
- descr[i - 1].lexid = i;
- descr[i - 1].alias = pstrdup(tok_alias[i]);
- descr[i - 1].descr = pstrdup(lex_descr[i]);
- }
-
- descr[LASTNUM].lexid = 0;
-
- PG_RETURN_POINTER(descr);
-}
-
-PG_FUNCTION_INFO_V1(prsd_start);
-Datum prsd_start(PG_FUNCTION_ARGS);
-Datum
-prsd_start(PG_FUNCTION_ARGS)
-{
- PG_RETURN_POINTER(TParserInit((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1)));
-}
-
-PG_FUNCTION_INFO_V1(prsd_getlexeme);
-Datum prsd_getlexeme(PG_FUNCTION_ARGS);
-Datum
-prsd_getlexeme(PG_FUNCTION_ARGS)
-{
- TParser *p = (TParser *) PG_GETARG_POINTER(0);
- char **t = (char **) PG_GETARG_POINTER(1);
- int *tlen = (int *) PG_GETARG_POINTER(2);
-
- if (!TParserGet(p))
- PG_RETURN_INT32(0);
-
- *t = p->lexeme;
- *tlen = p->lenbytelexeme;
-
- PG_RETURN_INT32(p->type);
-}
-
-PG_FUNCTION_INFO_V1(prsd_end);
-Datum prsd_end(PG_FUNCTION_ARGS);
-Datum
-prsd_end(PG_FUNCTION_ARGS)
-{
- TParser *p = (TParser *) PG_GETARG_POINTER(0);
-
- TParserClose(p);
- PG_RETURN_VOID();
-}
-
-#define LEAVETOKEN(x) ( (x)==12 )
-#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
-#define ENDPUNCTOKEN(x) ( (x)==12 )
-
-
-#define TS_IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
-#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
-#define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
-#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
-#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || TS_IDIGNORE(x) )
-
-typedef struct
-{
- HLWORD *words;
- int len;
-} hlCheck;
-
-static bool
-checkcondition_HL(void *checkval, ITEM * val)
-{
- int i;
-
- for (i = 0; i < ((hlCheck *) checkval)->len; i++)
- {
- if (((hlCheck *) checkval)->words[i].item == val)
- return true;
- }
- return false;
-}
-
-
-static bool
-hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int *p, int *q)
-{
- int i,
- j;
- ITEM *item = GETQUERY(query);
- int pos = *p;
-
- *q = 0;
- *p = 0x7fffffff;
-
- for (j = 0; j < query->size; j++)
- {
- if (item->type != VAL)
- {
- item++;
- continue;
- }
- for (i = pos; i < prs->curwords; i++)
- {
- if (prs->words[i].item == item)
- {
- if (i > *q)
- *q = i;
- break;
- }
- }
- item++;
- }
-
- if (*q == 0)
- return false;
-
- item = GETQUERY(query);
- for (j = 0; j < query->size; j++)
- {
- if (item->type != VAL)
- {
- item++;
- continue;
- }
- for (i = *q; i >= pos; i--)
- {
- if (prs->words[i].item == item)
- {
- if (i < *p)
- *p = i;
- break;
- }
- }
- item++;
- }
-
- if (*p <= *q)
- {
- hlCheck ch;
-
- ch.words = &(prs->words[*p]);
- ch.len = *q - *p + 1;
- if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
- return true;
- else
- {
- (*p)++;
- return hlCover(prs, query, p, q);
- }
- }
-
- return false;
-}
-
-PG_FUNCTION_INFO_V1(prsd_headline);
-Datum prsd_headline(PG_FUNCTION_ARGS);
-Datum
-prsd_headline(PG_FUNCTION_ARGS)
-{
- HLPRSTEXT *prs = (HLPRSTEXT *) PG_GETARG_POINTER(0);
- text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */
- QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(2); /* can't be toasted */
-
- /* from opt + start and and tag */
- int min_words = 15;
- int max_words = 35;
- int shortword = 3;
-
- int p = 0,
- q = 0;
- int bestb = -1,
- beste = -1;
- int bestlen = -1;
- int pose = 0,
- posb,
- poslen,
- curlen;
-
- int i;
- int highlight = 0;
-
- /* config */
- prs->startsel = NULL;
- prs->stopsel = NULL;
- if (opt)
- {
- Map *map,
- *mptr;
-
- parse_cfgdict(opt, &map);
- mptr = map;
-
- while (mptr && mptr->key)
- {
- if (pg_strcasecmp(mptr->key, "MaxWords") == 0)
- max_words = pg_atoi(mptr->value, 4, 1);
- else if (pg_strcasecmp(mptr->key, "MinWords") == 0)
- min_words = pg_atoi(mptr->value, 4, 1);
- else if (pg_strcasecmp(mptr->key, "ShortWord") == 0)
- shortword = pg_atoi(mptr->value, 4, 1);
- else if (pg_strcasecmp(mptr->key, "StartSel") == 0)
- prs->startsel = pstrdup(mptr->value);
- else if (pg_strcasecmp(mptr->key, "StopSel") == 0)
- prs->stopsel = pstrdup(mptr->value);
- else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0)
- highlight = (
- pg_strcasecmp(mptr->value, "1") == 0 ||
- pg_strcasecmp(mptr->value, "on") == 0 ||
- pg_strcasecmp(mptr->value, "true") == 0 ||
- pg_strcasecmp(mptr->value, "t") == 0 ||
- pg_strcasecmp(mptr->value, "y") == 0 ||
- pg_strcasecmp(mptr->value, "yes") == 0) ?
- 1 : 0;
-
- pfree(mptr->key);
- pfree(mptr->value);
-
- mptr++;
- }
- pfree(map);
-
- if (highlight == 0)
- {
- if (min_words >= max_words)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("MinWords should be less than MaxWords")));
- if (min_words <= 0)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("MinWords should be positive")));
- if (shortword < 0)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("ShortWord should be >= 0")));
- }
- }
-
- if (highlight == 0)
- {
- while (hlCover(prs, query, &p, &q))
- {
- /* find cover len in words */
- curlen = 0;
- poslen = 0;
- for (i = p; i <= q && curlen < max_words; i++)
- {
- if (!NONWORDTOKEN(prs->words[i].type))
- curlen++;
- if (prs->words[i].item && !prs->words[i].repeated)
- poslen++;
- pose = i;
- }
-
- if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
- {
- /* best already finded, so try one more cover */
- p++;
- continue;
- }
-
- posb = p;
- if (curlen < max_words)
- { /* find good end */
- for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
- {
- if (i != q)
- {
- if (!NONWORDTOKEN(prs->words[i].type))
- curlen++;
- if (prs->words[i].item && !prs->words[i].repeated)
- poslen++;
- }
- pose = i;
- if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
- continue;
- if (curlen >= min_words)
- break;
- }
- if (curlen < min_words && i >= prs->curwords)
- { /* got end of text and our cover is shoter
- * than min_words */
- for (i = p; i >= 0; i--)
- {
- if (!NONWORDTOKEN(prs->words[i].type))
- curlen++;
- if (prs->words[i].item && !prs->words[i].repeated)
- poslen++;
- if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
- continue;
- if (curlen >= min_words)
- break;
- }
- posb = (i >= 0) ? i : 0;
- }
- }
- else
- { /* shorter cover :((( */
- for (; curlen > min_words; i--)
- {
- if (!NONWORDTOKEN(prs->words[i].type))
- curlen--;
- if (prs->words[i].item && !prs->words[i].repeated)
- poslen--;
- pose = i;
- if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
- continue;
- break;
- }
- }
-
- if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
- (bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
- (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
- {
- bestb = posb;
- beste = pose;
- bestlen = poslen;
- }
-
- p++;
- }
-
- if (bestlen < 0)
- {
- curlen = 0;
- for (i = 0; i < prs->curwords && curlen < min_words; i++)
- {
- if (!NONWORDTOKEN(prs->words[i].type))
- curlen++;
- pose = i;
- }
- bestb = 0;
- beste = pose;
- }
- }
- else
- {
- bestb = 0;
- beste = prs->curwords - 1;
- }
-
- for (i = bestb; i <= beste; i++)
- {
- if (prs->words[i].item)
- prs->words[i].selected = 1;
- if (highlight == 0)
- {
- if (HLIDIGNORE(prs->words[i].type))
- prs->words[i].replace = 1;
- }
- else
- {
- if (HTMLHLIDIGNORE(prs->words[i].type))
- prs->words[i].replace = 1;
- }
-
- prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
- }
-
- if (!prs->startsel)
- prs->startsel = pstrdup("<b>");
- if (!prs->stopsel)
- prs->stopsel = pstrdup("</b>");
- prs->startsellen = strlen(prs->startsel);
- prs->stopsellen = strlen(prs->stopsel);
-
- PG_RETURN_POINTER(prs);
-}