summaryrefslogtreecommitdiff
path: root/contrib/tsearch2/ts_lexize.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tsearch2/ts_lexize.c')
-rw-r--r--contrib/tsearch2/ts_lexize.c297
1 files changed, 0 insertions, 297 deletions
diff --git a/contrib/tsearch2/ts_lexize.c b/contrib/tsearch2/ts_lexize.c
deleted file mode 100644
index f2e4904eb7b..00000000000
--- a/contrib/tsearch2/ts_lexize.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * lexize stream of lexemes
- * Teodor Sigaev <[email protected]>
- */
-#include "postgres.h"
-
-#include <ctype.h>
-#include <locale.h>
-
-#include "ts_cfg.h"
-#include "dict.h"
-
-void
-LexizeInit(LexizeData * ld, TSCfgInfo * cfg)
-{
- ld->cfg = cfg;
- ld->curDictId = InvalidOid;
- ld->posDict = 0;
- ld->towork.head = ld->towork.tail = ld->curSub = NULL;
- ld->waste.head = ld->waste.tail = NULL;
- ld->lastRes = NULL;
- ld->tmpRes = NULL;
-}
-
-static void
-LPLAddTail(ListParsedLex * list, ParsedLex * newpl)
-{
- if (list->tail)
- {
- list->tail->next = newpl;
- list->tail = newpl;
- }
- else
- list->head = list->tail = newpl;
- newpl->next = NULL;
-}
-
-static ParsedLex *
-LPLRemoveHead(ListParsedLex * list)
-{
- ParsedLex *res = list->head;
-
- if (list->head)
- list->head = list->head->next;
-
- if (list->head == NULL)
- list->tail = NULL;
-
- return res;
-}
-
-
-void
-LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm)
-{
- ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
-
- newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
- newpl->type = type;
- newpl->lemm = lemm;
- newpl->lenlemm = lenlemm;
- LPLAddTail(&ld->towork, newpl);
- ld->curSub = ld->towork.tail;
-}
-
-static void
-RemoveHead(LexizeData * ld)
-{
- LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
-
- ld->posDict = 0;
-}
-
-static void
-setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem)
-{
- if (correspondLexem)
- {
- *correspondLexem = ld->waste.head;
- }
- else
- {
- ParsedLex *tmp,
- *ptr = ld->waste.head;
-
- while (ptr)
- {
- tmp = ptr->next;
- pfree(ptr);
- ptr = tmp;
- }
- }
- ld->waste.head = ld->waste.tail = NULL;
-}
-
-static void
-moveToWaste(LexizeData * ld, ParsedLex * stop)
-{
- bool go = true;
-
- while (ld->towork.head && go)
- {
- if (ld->towork.head == stop)
- {
- ld->curSub = stop->next;
- go = false;
- }
- RemoveHead(ld);
- }
-}
-
-static void
-setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res)
-{
- if (ld->tmpRes)
- {
- TSLexeme *ptr;
-
- for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
- pfree(ptr->lexeme);
- pfree(ld->tmpRes);
- }
- ld->tmpRes = res;
- ld->lastRes = lex;
-}
-
-TSLexeme *
-LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
-{
- int i;
- ListDictionary *map;
- DictInfo *dict;
- TSLexeme *res;
-
- if (ld->curDictId == InvalidOid)
- {
- /*
- * usial mode: dictionary wants only one word, but we should keep in
- * mind that we should go through all stack
- */
-
- while (ld->towork.head)
- {
- ParsedLex *curVal = ld->towork.head;
-
- map = ld->cfg->map + curVal->type;
-
- if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0)
- {
- /* skip this type of lexeme */
- RemoveHead(ld);
- continue;
- }
-
- for (i = ld->posDict; i < map->len; i++)
- {
- dict = finddict(DatumGetObjectId(map->dict_id[i]));
-
- ld->dictState.isend = ld->dictState.getnext = false;
- ld->dictState.private = NULL;
- res = (TSLexeme *) DatumGetPointer(FunctionCall4(
- &(dict->lexize_info),
- PointerGetDatum(dict->dictionary),
- PointerGetDatum(curVal->lemm),
- Int32GetDatum(curVal->lenlemm),
- PointerGetDatum(&ld->dictState)
- ));
-
- if (ld->dictState.getnext)
- {
- /*
- * dictinary wants next word, so setup and store current
- * position and go to multiword mode
- */
-
- ld->curDictId = DatumGetObjectId(map->dict_id[i]);
- ld->posDict = i + 1;
- ld->curSub = curVal->next;
- if (res)
- setNewTmpRes(ld, curVal, res);
- return LexizeExec(ld, correspondLexem);
- }
-
- if (!res) /* dictionary doesn't know this lexeme */
- continue;
-
- RemoveHead(ld);
- setCorrLex(ld, correspondLexem);
- return res;
- }
-
- RemoveHead(ld);
- }
- }
- else
- { /* curDictId is valid */
- dict = finddict(ld->curDictId);
-
- /*
- * Dictionary ld->curDictId asks us about following words
- */
-
- while (ld->curSub)
- {
- ParsedLex *curVal = ld->curSub;
-
- map = ld->cfg->map + curVal->type;
-
- if (curVal->type != 0)
- {
- bool dictExists = false;
-
- if (curVal->type >= ld->cfg->len || map->len == 0)
- {
- /* skip this type of lexeme */
- ld->curSub = curVal->next;
- continue;
- }
-
- /*
- * We should be sure that current type of lexeme is recognized
- * by our dictinonary: we just check is it exist in list of
- * dictionaries ?
- */
- for (i = 0; i < map->len && !dictExists; i++)
- if (ld->curDictId == DatumGetObjectId(map->dict_id[i]))
- dictExists = true;
-
- if (!dictExists)
- {
- /*
- * Dictionary can't work with current tpe of lexeme,
- * return to basic mode and redo all stored lexemes
- */
- ld->curDictId = InvalidOid;
- return LexizeExec(ld, correspondLexem);
- }
- }
-
- ld->dictState.isend = (curVal->type == 0) ? true : false;
- ld->dictState.getnext = false;
-
- res = (TSLexeme *) DatumGetPointer(FunctionCall4(
- &(dict->lexize_info),
- PointerGetDatum(dict->dictionary),
- PointerGetDatum(curVal->lemm),
- Int32GetDatum(curVal->lenlemm),
- PointerGetDatum(&ld->dictState)
- ));
-
- if (ld->dictState.getnext)
- {
- /* Dictionary wants one more */
- ld->curSub = curVal->next;
- if (res)
- setNewTmpRes(ld, curVal, res);
- continue;
- }
-
- if (res || ld->tmpRes)
- {
- /*
- * Dictionary normalizes lexemes, so we remove from stack all
- * used lexemes , return to basic mode and redo end of stack
- * (if it exists)
- */
- if (res)
- {
- moveToWaste(ld, ld->curSub);
- }
- else
- {
- res = ld->tmpRes;
- moveToWaste(ld, ld->lastRes);
- }
-
- /* reset to initial state */
- ld->curDictId = InvalidOid;
- ld->posDict = 0;
- ld->lastRes = NULL;
- ld->tmpRes = NULL;
- setCorrLex(ld, correspondLexem);
- return res;
- }
-
- /*
- * Dict don't want next lexem and didn't recognize anything, redo
- * from ld->towork.head
- */
- ld->curDictId = InvalidOid;
- return LexizeExec(ld, correspondLexem);
- }
- }
-
- setCorrLex(ld, correspondLexem);
- return NULL;
-}