diff options
Diffstat (limited to 'contrib/tsearch/morph.c')
-rw-r--r-- | contrib/tsearch/morph.c | 210 |
1 files changed, 119 insertions, 91 deletions
diff --git a/contrib/tsearch/morph.c b/contrib/tsearch/morph.c index a31946ac868..3a66ecbbc68 100644 --- a/contrib/tsearch/morph.c +++ b/contrib/tsearch/morph.c @@ -3,7 +3,7 @@ * New dictionary is include in dict.h. For languages which * use latin charset it may be need to modify mapdict table. * Teodor Sigaev <[email protected]> - */ + */ #include "postgres.h" #include "utils/elog.h" @@ -20,157 +20,183 @@ * All of this methods are optional, but * if all methods are NULL, then dictionary does nothing :) * Return value of lemmatize must be palloced or the same. - * Return value of init must be malloced in other case - * it will be free in end of transaction! + * Return value of init must be malloced in other case + * it will be free in end of transaction! */ -typedef struct { - char localename[LOCALE_NAME_BUFLEN]; +typedef struct +{ + char localename[LOCALE_NAME_BUFLEN]; /* init dictionary */ - void* (*init)(void); + void *(*init) (void); /* close dictionary */ - void (*close)(void*); + void (*close) (void *); /* find in dictionary */ - char* (*lemmatize)(void*,char*,int*); - int (*is_stoplemm)(void*,char*,int); - int (*is_stemstoplemm)(void*,char*,int); -} DICT; + char *(*lemmatize) (void *, char *, int *); + int (*is_stoplemm) (void *, char *, int); + int (*is_stemstoplemm) (void *, char *, int); +} DICT; /* insert all dictionaries */ #define DICT_BODY #include "dict.h" -#undef DICT_BODY +#undef DICT_BODY -/* fill dictionary's structure */ +/* fill dictionary's structure */ #define DICT_TABLE -DICT dicts[] = { +DICT dicts[] = { { - "C",NULL,NULL,NULL,NULL,NULL /* fake dictionary */ + "C", NULL, NULL, NULL, NULL, NULL /* fake dictionary */ } #include "dict.h" }; + #undef DICT_TABLE /* array for storing dictinary's objects (if needed) */ -void* dictobjs[ lengthof(dicts) ]; +void *dictobjs[ + lengthof(dicts)]; #define STOPLEXEM -2 #define BYLOCALE -1 -#define NODICT 0 -#define DEFAULTDICT 1 - +#define NODICT 0 +#define DEFAULTDICT 1 + #define MAXNDICT 2 -typedef int2 MAPDICT[MAXNDICT]; +typedef int2 MAPDICT[MAXNDICT]; + #define GETDICT(x,i) *( ((int2*)(x)) + (i) ) /* map dictionaries for lexem type */ static MAPDICT mapdict[] = { - {NODICT, NODICT}, /* not used */ - {DEFAULTDICT, NODICT}, /* LATWORD */ - {BYLOCALE, NODICT}, /* NONLATINWORD */ - {BYLOCALE, DEFAULTDICT}, /* UWORD */ - {NODICT, NODICT}, /* EMAIL */ - {NODICT, NODICT}, /* FURL */ - {NODICT, NODICT}, /* HOST */ - {NODICT, NODICT}, /* FLOAT */ - {NODICT, NODICT}, /* FINT */ - {BYLOCALE, DEFAULTDICT}, /* PARTWORD */ - {BYLOCALE, NODICT}, /* NONLATINPARTWORD */ - {DEFAULTDICT, NODICT}, /* LATPARTWORD */ - {STOPLEXEM, NODICT}, /* SPACE */ - {STOPLEXEM, NODICT}, /* SYMTAG */ - {STOPLEXEM, NODICT}, /* HTTP */ - {BYLOCALE, DEFAULTDICT}, /* DEFISWORD */ - {DEFAULTDICT, NODICT}, /* DEFISLATWORD */ - {BYLOCALE, NODICT}, /* DEFISNONLATINWORD */ - {NODICT, NODICT}, /* URI */ - {NODICT, NODICT} /* FILEPATH */ + {NODICT, NODICT}, /* not used */ + {DEFAULTDICT, NODICT}, /* LATWORD */ + {BYLOCALE, NODICT}, /* NONLATINWORD */ + {BYLOCALE, DEFAULTDICT}, /* UWORD */ + {NODICT, NODICT}, /* EMAIL */ + {NODICT, NODICT}, /* FURL */ + {NODICT, NODICT}, /* HOST */ + {NODICT, NODICT}, /* FLOAT */ + {NODICT, NODICT}, /* FINT */ + {BYLOCALE, DEFAULTDICT}, /* PARTWORD */ + {BYLOCALE, NODICT}, /* NONLATINPARTWORD */ + {DEFAULTDICT, NODICT}, /* LATPARTWORD */ + {STOPLEXEM, NODICT}, /* SPACE */ + {STOPLEXEM, NODICT}, /* SYMTAG */ + {STOPLEXEM, NODICT}, /* HTTP */ + {BYLOCALE, DEFAULTDICT}, /* DEFISWORD */ + {DEFAULTDICT, NODICT}, /* DEFISLATWORD */ + {BYLOCALE, NODICT}, /* DEFISNONLATINWORD */ + {NODICT, NODICT}, /* URI */ + {NODICT, NODICT} /* FILEPATH */ }; -static bool inited=false; +static bool inited = false; + +void +initmorph(void) +{ + int i, + j, + k; + MAPDICT *md; + bool needinit[lengthof(dicts)]; -void initmorph(void) { - int i,j,k; - MAPDICT *md; - bool needinit[ lengthof(dicts) ]; #ifdef USE_LOCALE - PG_LocaleCategories lc; + PG_LocaleCategories lc; - int bylocaledict = NODICT; + int bylocaledict = NODICT; #endif - if ( inited ) return; - for(i=1; i<lengthof(dicts);i++) + if (inited) + return; + for (i = 1; i < lengthof(dicts); i++) needinit[i] = false; - + #ifdef USE_LOCALE PGLC_current(&lc); - for(i=1;i<lengthof(dicts);i++) - if (strcmp( dicts[i].localename, lc.lang ) == 0) { + for (i = 1; i < lengthof(dicts); i++) + if (strcmp(dicts[i].localename, lc.lang) == 0) + { bylocaledict = i; break; } PGLC_free_categories(&lc); #endif - for(i=1; i<lengthof(mapdict);i++) { - k=0; + for (i = 1; i < lengthof(mapdict); i++) + { + k = 0; md = &mapdict[i]; - for(j=0;j<MAXNDICT;j++) { - GETDICT(md,k) = GETDICT(md,j); - if ( GETDICT(md,k) == NODICT ) { + for (j = 0; j < MAXNDICT; j++) + { + GETDICT(md, k) = GETDICT(md, j); + if (GETDICT(md, k) == NODICT) break; - } else if ( GETDICT(md,k) == BYLOCALE ) { + else if (GETDICT(md, k) == BYLOCALE) + { #ifdef USE_LOCALE - if ( bylocaledict == NODICT ) + if (bylocaledict == NODICT) continue; - GETDICT(md,k) = bylocaledict; + GETDICT(md, k) = bylocaledict; #else continue; #endif } - if ( GETDICT(md,k) >= (int2)lengthof(dicts) ) + if (GETDICT(md, k) >= (int2) lengthof(dicts)) continue; - needinit[ GETDICT(md,k) ] = true; - k++; + needinit[GETDICT(md, k)] = true; + k++; } - for(;k<MAXNDICT;k++) - if ( GETDICT(md,k) != STOPLEXEM ) - GETDICT(md,k) = NODICT; + for (; k < MAXNDICT; k++) + if (GETDICT(md, k) != STOPLEXEM) + GETDICT(md, k) = NODICT; } - for(i=1; i<lengthof(dicts);i++) - if ( needinit[i] && dicts[i].init ) - dictobjs[i] = (*(dicts[i].init))(); - + for (i = 1; i < lengthof(dicts); i++) + if (needinit[i] && dicts[i].init) + dictobjs[i] = (*(dicts[i].init)) (); + inited = true; return; } -char* lemmatize( char* word, int *len, int type ) { - int2 nd; - int i; - DICT *dict; +char * +lemmatize(char *word, int *len, int type) +{ + int2 nd; + int i; + DICT *dict; - for(i=0;i<MAXNDICT;i++) { - nd = GETDICT( &mapdict[type], i ); - if ( nd == NODICT ) { - /* there is no dictionary */ + for (i = 0; i < MAXNDICT; i++) + { + nd = GETDICT(&mapdict[type], i); + if (nd == NODICT) + { + /* there is no dictionary */ return word; - } else if ( nd == STOPLEXEM ) { + } + else if (nd == STOPLEXEM) + { /* word is stopword */ return NULL; - } else { - dict = &dicts[ nd ]; - if ( dict->is_stoplemm && (*(dict->is_stoplemm))(dictobjs[nd], word, *len) ) + } + else + { + dict = &dicts[nd]; + if (dict->is_stoplemm && (*(dict->is_stoplemm)) (dictobjs[nd], word, *len)) return NULL; - if ( dict->lemmatize ) { - int oldlen = *len; - char *newword = (*(dict->lemmatize))(dictobjs[nd], word, len); + if (dict->lemmatize) + { + int oldlen = *len; + char *newword = (*(dict->lemmatize)) (dictobjs[nd], word, len); + /* word is recognized by distionary */ - if ( newword != word || *len != oldlen ) { - if ( dict->is_stemstoplemm && - (*(dict->is_stemstoplemm))(dictobjs[nd], word, *len) ) { - if ( newword != word && newword) + if (newword != word || *len != oldlen) + { + if (dict->is_stemstoplemm && + (*(dict->is_stemstoplemm)) (dictobjs[nd], word, *len)) + { + if (newword != word && newword) pfree(newword); return NULL; } @@ -183,6 +209,8 @@ char* lemmatize( char* word, int *len, int type ) { return word; } -bool is_stoptype(int type) { - return ( GETDICT( &mapdict[type], 0 ) == STOPLEXEM ) ? true : false; +bool +is_stoptype(int type) +{ + return (GETDICT(&mapdict[type], 0) == STOPLEXEM) ? true : false; } |