diff options
Diffstat (limited to 'contrib/tsearch2/ts_lexize.c')
-rw-r--r-- | contrib/tsearch2/ts_lexize.c | 297 |
1 files changed, 0 insertions, 297 deletions
diff --git a/contrib/tsearch2/ts_lexize.c b/contrib/tsearch2/ts_lexize.c deleted file mode 100644 index f2e4904eb7b..00000000000 --- a/contrib/tsearch2/ts_lexize.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * lexize stream of lexemes - * Teodor Sigaev <[email protected]> - */ -#include "postgres.h" - -#include <ctype.h> -#include <locale.h> - -#include "ts_cfg.h" -#include "dict.h" - -void -LexizeInit(LexizeData * ld, TSCfgInfo * cfg) -{ - ld->cfg = cfg; - ld->curDictId = InvalidOid; - ld->posDict = 0; - ld->towork.head = ld->towork.tail = ld->curSub = NULL; - ld->waste.head = ld->waste.tail = NULL; - ld->lastRes = NULL; - ld->tmpRes = NULL; -} - -static void -LPLAddTail(ListParsedLex * list, ParsedLex * newpl) -{ - if (list->tail) - { - list->tail->next = newpl; - list->tail = newpl; - } - else - list->head = list->tail = newpl; - newpl->next = NULL; -} - -static ParsedLex * -LPLRemoveHead(ListParsedLex * list) -{ - ParsedLex *res = list->head; - - if (list->head) - list->head = list->head->next; - - if (list->head == NULL) - list->tail = NULL; - - return res; -} - - -void -LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm) -{ - ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); - - newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); - newpl->type = type; - newpl->lemm = lemm; - newpl->lenlemm = lenlemm; - LPLAddTail(&ld->towork, newpl); - ld->curSub = ld->towork.tail; -} - -static void -RemoveHead(LexizeData * ld) -{ - LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork)); - - ld->posDict = 0; -} - -static void -setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem) -{ - if (correspondLexem) - { - *correspondLexem = ld->waste.head; - } - else - { - ParsedLex *tmp, - *ptr = ld->waste.head; - - while (ptr) - { - tmp = ptr->next; - pfree(ptr); - ptr = tmp; - } - } - ld->waste.head = ld->waste.tail = NULL; -} - -static void -moveToWaste(LexizeData * ld, ParsedLex * stop) -{ - bool go = true; - - while (ld->towork.head && go) - { - if (ld->towork.head == stop) - { - ld->curSub = stop->next; - go = false; - } - RemoveHead(ld); - } -} - -static void -setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res) -{ - if (ld->tmpRes) - { - TSLexeme *ptr; - - for (ptr = ld->tmpRes; ptr->lexeme; ptr++) - pfree(ptr->lexeme); - pfree(ld->tmpRes); - } - ld->tmpRes = res; - ld->lastRes = lex; -} - -TSLexeme * -LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem) -{ - int i; - ListDictionary *map; - DictInfo *dict; - TSLexeme *res; - - if (ld->curDictId == InvalidOid) - { - /* - * usial mode: dictionary wants only one word, but we should keep in - * mind that we should go through all stack - */ - - while (ld->towork.head) - { - ParsedLex *curVal = ld->towork.head; - - map = ld->cfg->map + curVal->type; - - if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0) - { - /* skip this type of lexeme */ - RemoveHead(ld); - continue; - } - - for (i = ld->posDict; i < map->len; i++) - { - dict = finddict(DatumGetObjectId(map->dict_id[i])); - - ld->dictState.isend = ld->dictState.getnext = false; - ld->dictState.private = NULL; - res = (TSLexeme *) DatumGetPointer(FunctionCall4( - &(dict->lexize_info), - PointerGetDatum(dict->dictionary), - PointerGetDatum(curVal->lemm), - Int32GetDatum(curVal->lenlemm), - PointerGetDatum(&ld->dictState) - )); - - if (ld->dictState.getnext) - { - /* - * dictinary wants next word, so setup and store current - * position and go to multiword mode - */ - - ld->curDictId = DatumGetObjectId(map->dict_id[i]); - ld->posDict = i + 1; - ld->curSub = curVal->next; - if (res) - setNewTmpRes(ld, curVal, res); - return LexizeExec(ld, correspondLexem); - } - - if (!res) /* dictionary doesn't know this lexeme */ - continue; - - RemoveHead(ld); - setCorrLex(ld, correspondLexem); - return res; - } - - RemoveHead(ld); - } - } - else - { /* curDictId is valid */ - dict = finddict(ld->curDictId); - - /* - * Dictionary ld->curDictId asks us about following words - */ - - while (ld->curSub) - { - ParsedLex *curVal = ld->curSub; - - map = ld->cfg->map + curVal->type; - - if (curVal->type != 0) - { - bool dictExists = false; - - if (curVal->type >= ld->cfg->len || map->len == 0) - { - /* skip this type of lexeme */ - ld->curSub = curVal->next; - continue; - } - - /* - * We should be sure that current type of lexeme is recognized - * by our dictinonary: we just check is it exist in list of - * dictionaries ? - */ - for (i = 0; i < map->len && !dictExists; i++) - if (ld->curDictId == DatumGetObjectId(map->dict_id[i])) - dictExists = true; - - if (!dictExists) - { - /* - * Dictionary can't work with current tpe of lexeme, - * return to basic mode and redo all stored lexemes - */ - ld->curDictId = InvalidOid; - return LexizeExec(ld, correspondLexem); - } - } - - ld->dictState.isend = (curVal->type == 0) ? true : false; - ld->dictState.getnext = false; - - res = (TSLexeme *) DatumGetPointer(FunctionCall4( - &(dict->lexize_info), - PointerGetDatum(dict->dictionary), - PointerGetDatum(curVal->lemm), - Int32GetDatum(curVal->lenlemm), - PointerGetDatum(&ld->dictState) - )); - - if (ld->dictState.getnext) - { - /* Dictionary wants one more */ - ld->curSub = curVal->next; - if (res) - setNewTmpRes(ld, curVal, res); - continue; - } - - if (res || ld->tmpRes) - { - /* - * Dictionary normalizes lexemes, so we remove from stack all - * used lexemes , return to basic mode and redo end of stack - * (if it exists) - */ - if (res) - { - moveToWaste(ld, ld->curSub); - } - else - { - res = ld->tmpRes; - moveToWaste(ld, ld->lastRes); - } - - /* reset to initial state */ - ld->curDictId = InvalidOid; - ld->posDict = 0; - ld->lastRes = NULL; - ld->tmpRes = NULL; - setCorrLex(ld, correspondLexem); - return res; - } - - /* - * Dict don't want next lexem and didn't recognize anything, redo - * from ld->towork.head - */ - ld->curDictId = InvalidOid; - return LexizeExec(ld, correspondLexem); - } - } - - setCorrLex(ld, correspondLexem); - return NULL; -} |