summaryrefslogtreecommitdiff
path: root/contrib/tsearch/morph.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tsearch/morph.c')
-rw-r--r--contrib/tsearch/morph.c210
1 files changed, 119 insertions, 91 deletions
diff --git a/contrib/tsearch/morph.c b/contrib/tsearch/morph.c
index a31946ac868..3a66ecbbc68 100644
--- a/contrib/tsearch/morph.c
+++ b/contrib/tsearch/morph.c
@@ -3,7 +3,7 @@
* New dictionary is include in dict.h. For languages which
* use latin charset it may be need to modify mapdict table.
* Teodor Sigaev <[email protected]>
- */
+ */
#include "postgres.h"
#include "utils/elog.h"
@@ -20,157 +20,183 @@
* All of this methods are optional, but
* if all methods are NULL, then dictionary does nothing :)
* Return value of lemmatize must be palloced or the same.
- * Return value of init must be malloced in other case
- * it will be free in end of transaction!
+ * Return value of init must be malloced in other case
+ * it will be free in end of transaction!
*/
-typedef struct {
- char localename[LOCALE_NAME_BUFLEN];
+typedef struct
+{
+ char localename[LOCALE_NAME_BUFLEN];
/* init dictionary */
- void* (*init)(void);
+ void *(*init) (void);
/* close dictionary */
- void (*close)(void*);
+ void (*close) (void *);
/* find in dictionary */
- char* (*lemmatize)(void*,char*,int*);
- int (*is_stoplemm)(void*,char*,int);
- int (*is_stemstoplemm)(void*,char*,int);
-} DICT;
+ char *(*lemmatize) (void *, char *, int *);
+ int (*is_stoplemm) (void *, char *, int);
+ int (*is_stemstoplemm) (void *, char *, int);
+} DICT;
/* insert all dictionaries */
#define DICT_BODY
#include "dict.h"
-#undef DICT_BODY
+#undef DICT_BODY
-/* fill dictionary's structure */
+/* fill dictionary's structure */
#define DICT_TABLE
-DICT dicts[] = {
+DICT dicts[] = {
{
- "C",NULL,NULL,NULL,NULL,NULL /* fake dictionary */
+ "C", NULL, NULL, NULL, NULL, NULL /* fake dictionary */
}
#include "dict.h"
};
+
#undef DICT_TABLE
/* array for storing dictinary's objects (if needed) */
-void* dictobjs[ lengthof(dicts) ];
+void *dictobjs[
+ lengthof(dicts)];
#define STOPLEXEM -2
#define BYLOCALE -1
-#define NODICT 0
-#define DEFAULTDICT 1
-
+#define NODICT 0
+#define DEFAULTDICT 1
+
#define MAXNDICT 2
-typedef int2 MAPDICT[MAXNDICT];
+typedef int2 MAPDICT[MAXNDICT];
+
#define GETDICT(x,i) *( ((int2*)(x)) + (i) )
/* map dictionaries for lexem type */
static MAPDICT mapdict[] = {
- {NODICT, NODICT}, /* not used */
- {DEFAULTDICT, NODICT}, /* LATWORD */
- {BYLOCALE, NODICT}, /* NONLATINWORD */
- {BYLOCALE, DEFAULTDICT}, /* UWORD */
- {NODICT, NODICT}, /* EMAIL */
- {NODICT, NODICT}, /* FURL */
- {NODICT, NODICT}, /* HOST */
- {NODICT, NODICT}, /* FLOAT */
- {NODICT, NODICT}, /* FINT */
- {BYLOCALE, DEFAULTDICT}, /* PARTWORD */
- {BYLOCALE, NODICT}, /* NONLATINPARTWORD */
- {DEFAULTDICT, NODICT}, /* LATPARTWORD */
- {STOPLEXEM, NODICT}, /* SPACE */
- {STOPLEXEM, NODICT}, /* SYMTAG */
- {STOPLEXEM, NODICT}, /* HTTP */
- {BYLOCALE, DEFAULTDICT}, /* DEFISWORD */
- {DEFAULTDICT, NODICT}, /* DEFISLATWORD */
- {BYLOCALE, NODICT}, /* DEFISNONLATINWORD */
- {NODICT, NODICT}, /* URI */
- {NODICT, NODICT} /* FILEPATH */
+ {NODICT, NODICT}, /* not used */
+ {DEFAULTDICT, NODICT}, /* LATWORD */
+ {BYLOCALE, NODICT}, /* NONLATINWORD */
+ {BYLOCALE, DEFAULTDICT}, /* UWORD */
+ {NODICT, NODICT}, /* EMAIL */
+ {NODICT, NODICT}, /* FURL */
+ {NODICT, NODICT}, /* HOST */
+ {NODICT, NODICT}, /* FLOAT */
+ {NODICT, NODICT}, /* FINT */
+ {BYLOCALE, DEFAULTDICT}, /* PARTWORD */
+ {BYLOCALE, NODICT}, /* NONLATINPARTWORD */
+ {DEFAULTDICT, NODICT}, /* LATPARTWORD */
+ {STOPLEXEM, NODICT}, /* SPACE */
+ {STOPLEXEM, NODICT}, /* SYMTAG */
+ {STOPLEXEM, NODICT}, /* HTTP */
+ {BYLOCALE, DEFAULTDICT}, /* DEFISWORD */
+ {DEFAULTDICT, NODICT}, /* DEFISLATWORD */
+ {BYLOCALE, NODICT}, /* DEFISNONLATINWORD */
+ {NODICT, NODICT}, /* URI */
+ {NODICT, NODICT} /* FILEPATH */
};
-static bool inited=false;
+static bool inited = false;
+
+void
+initmorph(void)
+{
+ int i,
+ j,
+ k;
+ MAPDICT *md;
+ bool needinit[lengthof(dicts)];
-void initmorph(void) {
- int i,j,k;
- MAPDICT *md;
- bool needinit[ lengthof(dicts) ];
#ifdef USE_LOCALE
- PG_LocaleCategories lc;
+ PG_LocaleCategories lc;
- int bylocaledict = NODICT;
+ int bylocaledict = NODICT;
#endif
- if ( inited ) return;
- for(i=1; i<lengthof(dicts);i++)
+ if (inited)
+ return;
+ for (i = 1; i < lengthof(dicts); i++)
needinit[i] = false;
-
+
#ifdef USE_LOCALE
PGLC_current(&lc);
- for(i=1;i<lengthof(dicts);i++)
- if (strcmp( dicts[i].localename, lc.lang ) == 0) {
+ for (i = 1; i < lengthof(dicts); i++)
+ if (strcmp(dicts[i].localename, lc.lang) == 0)
+ {
bylocaledict = i;
break;
}
PGLC_free_categories(&lc);
#endif
- for(i=1; i<lengthof(mapdict);i++) {
- k=0;
+ for (i = 1; i < lengthof(mapdict); i++)
+ {
+ k = 0;
md = &mapdict[i];
- for(j=0;j<MAXNDICT;j++) {
- GETDICT(md,k) = GETDICT(md,j);
- if ( GETDICT(md,k) == NODICT ) {
+ for (j = 0; j < MAXNDICT; j++)
+ {
+ GETDICT(md, k) = GETDICT(md, j);
+ if (GETDICT(md, k) == NODICT)
break;
- } else if ( GETDICT(md,k) == BYLOCALE ) {
+ else if (GETDICT(md, k) == BYLOCALE)
+ {
#ifdef USE_LOCALE
- if ( bylocaledict == NODICT )
+ if (bylocaledict == NODICT)
continue;
- GETDICT(md,k) = bylocaledict;
+ GETDICT(md, k) = bylocaledict;
#else
continue;
#endif
}
- if ( GETDICT(md,k) >= (int2)lengthof(dicts) )
+ if (GETDICT(md, k) >= (int2) lengthof(dicts))
continue;
- needinit[ GETDICT(md,k) ] = true;
- k++;
+ needinit[GETDICT(md, k)] = true;
+ k++;
}
- for(;k<MAXNDICT;k++)
- if ( GETDICT(md,k) != STOPLEXEM )
- GETDICT(md,k) = NODICT;
+ for (; k < MAXNDICT; k++)
+ if (GETDICT(md, k) != STOPLEXEM)
+ GETDICT(md, k) = NODICT;
}
- for(i=1; i<lengthof(dicts);i++)
- if ( needinit[i] && dicts[i].init )
- dictobjs[i] = (*(dicts[i].init))();
-
+ for (i = 1; i < lengthof(dicts); i++)
+ if (needinit[i] && dicts[i].init)
+ dictobjs[i] = (*(dicts[i].init)) ();
+
inited = true;
return;
}
-char* lemmatize( char* word, int *len, int type ) {
- int2 nd;
- int i;
- DICT *dict;
+char *
+lemmatize(char *word, int *len, int type)
+{
+ int2 nd;
+ int i;
+ DICT *dict;
- for(i=0;i<MAXNDICT;i++) {
- nd = GETDICT( &mapdict[type], i );
- if ( nd == NODICT ) {
- /* there is no dictionary */
+ for (i = 0; i < MAXNDICT; i++)
+ {
+ nd = GETDICT(&mapdict[type], i);
+ if (nd == NODICT)
+ {
+ /* there is no dictionary */
return word;
- } else if ( nd == STOPLEXEM ) {
+ }
+ else if (nd == STOPLEXEM)
+ {
/* word is stopword */
return NULL;
- } else {
- dict = &dicts[ nd ];
- if ( dict->is_stoplemm && (*(dict->is_stoplemm))(dictobjs[nd], word, *len) )
+ }
+ else
+ {
+ dict = &dicts[nd];
+ if (dict->is_stoplemm && (*(dict->is_stoplemm)) (dictobjs[nd], word, *len))
return NULL;
- if ( dict->lemmatize ) {
- int oldlen = *len;
- char *newword = (*(dict->lemmatize))(dictobjs[nd], word, len);
+ if (dict->lemmatize)
+ {
+ int oldlen = *len;
+ char *newword = (*(dict->lemmatize)) (dictobjs[nd], word, len);
+
/* word is recognized by distionary */
- if ( newword != word || *len != oldlen ) {
- if ( dict->is_stemstoplemm &&
- (*(dict->is_stemstoplemm))(dictobjs[nd], word, *len) ) {
- if ( newword != word && newword)
+ if (newword != word || *len != oldlen)
+ {
+ if (dict->is_stemstoplemm &&
+ (*(dict->is_stemstoplemm)) (dictobjs[nd], word, *len))
+ {
+ if (newword != word && newword)
pfree(newword);
return NULL;
}
@@ -183,6 +209,8 @@ char* lemmatize( char* word, int *len, int type ) {
return word;
}
-bool is_stoptype(int type) {
- return ( GETDICT( &mapdict[type], 0 ) == STOPLEXEM ) ? true : false;
+bool
+is_stoptype(int type)
+{
+ return (GETDICT(&mapdict[type], 0) == STOPLEXEM) ? true : false;
}