diff options
Diffstat (limited to 'contrib/pg_trgm/trgm_op.c')
-rw-r--r-- | contrib/pg_trgm/trgm_op.c | 283 |
1 files changed, 161 insertions, 122 deletions
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c index 01ece90cdf5..57fb944a569 100644 --- a/contrib/pg_trgm/trgm_op.c +++ b/contrib/pg_trgm/trgm_op.c @@ -3,136 +3,162 @@ #include "utils/array.h" #include "catalog/pg_type.h" -float4 trgm_limit = 0.3; +float4 trgm_limit = 0.3; PG_FUNCTION_INFO_V1(set_limit); -Datum set_limit(PG_FUNCTION_ARGS); +Datum set_limit(PG_FUNCTION_ARGS); Datum -set_limit(PG_FUNCTION_ARGS) { - float4 nlimit = PG_GETARG_FLOAT4(0); - if ( nlimit < 0 || nlimit > 1.0 ) - elog(ERROR,"Wrong limit, should be between 0 and 1"); +set_limit(PG_FUNCTION_ARGS) +{ + float4 nlimit = PG_GETARG_FLOAT4(0); + + if (nlimit < 0 || nlimit > 1.0) + elog(ERROR, "Wrong limit, should be between 0 and 1"); trgm_limit = nlimit; PG_RETURN_FLOAT4(trgm_limit); } PG_FUNCTION_INFO_V1(show_limit); -Datum show_limit(PG_FUNCTION_ARGS); +Datum show_limit(PG_FUNCTION_ARGS); Datum -show_limit(PG_FUNCTION_ARGS) { +show_limit(PG_FUNCTION_ARGS) +{ PG_RETURN_FLOAT4(trgm_limit); } -#define WORDWAIT 0 -#define INWORD 1 +#define WORDWAIT 0 +#define INWORD 1 static int -comp_trgm(const void *a, const void *b) { - return CMPTRGM(a,b); +comp_trgm(const void *a, const void *b) +{ + return CMPTRGM(a, b); } static int -unique_array (trgm *a, int len) { - trgm *curend, *tmp; +unique_array(trgm * a, int len) +{ + trgm *curend, + *tmp; curend = tmp = a; while (tmp - a < len) - if ( CMPTRGM(tmp, curend) ) { + if (CMPTRGM(tmp, curend)) + { curend++; - CPTRGM(curend,tmp); + CPTRGM(curend, tmp); tmp++; - } else + } + else tmp++; - + return curend + 1 - a; } -TRGM* -generate_trgm(char *str, int slen) { - TRGM* trg; - char *buf,*sptr,*bufptr; - trgm *tptr; - int state=WORDWAIT; - int wl,len; +TRGM * +generate_trgm(char *str, int slen) +{ + TRGM *trg; + char *buf, + *sptr, + *bufptr; + trgm *tptr; + int state = WORDWAIT; + int wl, + len; - trg = (TRGM*) palloc(TRGMHRDSIZE+sizeof(trgm) * (slen/2 + 1) * 3); + trg = (TRGM *) palloc(TRGMHRDSIZE + sizeof(trgm) * (slen / 2 + 1) * 3); trg->flag = ARRKEY; trg->len = TRGMHRDSIZE; - if ( slen+LPADDING+RPADDING<3 || slen == 0 ) + if (slen + LPADDING + RPADDING < 3 || slen == 0) return trg; tptr = GETARR(trg); - buf = palloc(sizeof(char) * (slen+4)); + buf = palloc(sizeof(char) * (slen + 4)); sptr = str; - if ( LPADDING > 0 ) { + if (LPADDING > 0) + { *buf = ' '; - if ( LPADDING > 1 ) - *(buf+1) = ' '; + if (LPADDING > 1) + *(buf + 1) = ' '; } - bufptr = buf+LPADDING; - while( sptr-str < slen ) { - if ( state == WORDWAIT ) { - if ( + bufptr = buf + LPADDING; + while (sptr - str < slen) + { + if (state == WORDWAIT) + { + if ( #ifdef KEEPONLYALNUM - isalnum((unsigned char)*sptr) + isalnum((unsigned char) *sptr) #else - !isspace( (unsigned char)*sptr ) + !isspace((unsigned char) *sptr) #endif - ) { - *bufptr = *sptr; /* start put word in buffer */ + ) + { + *bufptr = *sptr; /* start put word in buffer */ bufptr++; state = INWORD; - if ( sptr-str == slen-1 /* last char */ ) + if (sptr - str == slen - 1 /* last char */ ) goto gettrg; } - } else { + } + else + { if ( #ifdef KEEPONLYALNUM - !isalnum((unsigned char)*sptr) + !isalnum((unsigned char) *sptr) #else - isspace( (unsigned char)*sptr ) + isspace((unsigned char) *sptr) #endif - ) { -gettrg: + ) + { + gettrg: /* word in buffer, so count trigrams */ *bufptr = ' '; - *(bufptr+1) = ' '; - wl = bufptr - (buf+LPADDING) - 2 + LPADDING + RPADDING; - if ( wl<=0 ) { - bufptr = buf+LPADDING; + *(bufptr + 1) = ' '; + wl = bufptr - (buf + LPADDING) - 2 + LPADDING + RPADDING; + if (wl <= 0) + { + bufptr = buf + LPADDING; state = WORDWAIT; sptr++; continue; } #ifdef IGNORECASE - do { /* lower word */ - int wwl = bufptr-buf; - bufptr = buf+LPADDING; - while( bufptr-buf < wwl ) { - *bufptr = tolower( (unsigned char) *bufptr ); + do + { /* lower word */ + int wwl = bufptr - buf; + + bufptr = buf + LPADDING; + while (bufptr - buf < wwl) + { + *bufptr = tolower((unsigned char) *bufptr); bufptr++; } - } while(0); + } while (0); #endif bufptr = buf; /* set trigrams */ - while( bufptr-buf < wl ) { + while (bufptr - buf < wl) + { CPTRGM(tptr, bufptr); bufptr++; tptr++; } - bufptr = buf+LPADDING; + bufptr = buf + LPADDING; state = WORDWAIT; - } else { - *bufptr = *sptr; /* put in buffer */ + } + else + { + *bufptr = *sptr; /* put in buffer */ bufptr++; - if ( sptr-str == slen-1 ) + if (sptr - str == slen - 1) goto gettrg; } } @@ -141,13 +167,14 @@ gettrg: pfree(buf); - if ( (len=tptr-GETARR(trg)) == 0 ) + if ((len = tptr - GETARR(trg)) == 0) return trg; - if ( len>0 ) { - qsort( (void*)GETARR(trg), len, sizeof(trgm), comp_trgm ); - len = unique_array( GETARR(trg), len ); - } + if (len > 0) + { + qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); + len = unique_array(GETARR(trg), len); + } trg->len = CALCGTSIZE(ARRKEY, len); @@ -156,68 +183,78 @@ gettrg: PG_FUNCTION_INFO_V1(show_trgm); -Datum show_trgm(PG_FUNCTION_ARGS); +Datum show_trgm(PG_FUNCTION_ARGS); Datum -show_trgm(PG_FUNCTION_ARGS) { - text *in = PG_GETARG_TEXT_P(0); - TRGM *trg; - Datum *d; - ArrayType *a; - trgm *ptr; +show_trgm(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(0); + TRGM *trg; + Datum *d; + ArrayType *a; + trgm *ptr; trg = generate_trgm(VARDATA(in), VARSIZE(in) - VARHDRSZ); - d = (Datum*)palloc( sizeof(Datum)*(1+ARRNELEM(trg)) ); + d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg))); ptr = GETARR(trg); - while( ptr-GETARR(trg) < ARRNELEM(trg) ) { - text *item=(text*)palloc(VARHDRSZ + 3); - VARATT_SIZEP(item) = VARHDRSZ+3; + while (ptr - GETARR(trg) < ARRNELEM(trg)) + { + text *item = (text *) palloc(VARHDRSZ + 3); + + VARATT_SIZEP(item) = VARHDRSZ + 3; CPTRGM(VARDATA(item), ptr); - d[ ptr-GETARR(trg) ] = PointerGetDatum(item); + d[ptr - GETARR(trg)] = PointerGetDatum(item); ptr++; } a = construct_array( - d, - ARRNELEM(trg), - TEXTOID, - -1, - false, - 'i' - ); + d, + ARRNELEM(trg), + TEXTOID, + -1, + false, + 'i' + ); ptr = GETARR(trg); - while( ptr-GETARR(trg) < ARRNELEM(trg) ) { - pfree(DatumGetPointer(d[ ptr-GETARR(trg) ])); + while (ptr - GETARR(trg) < ARRNELEM(trg)) + { + pfree(DatumGetPointer(d[ptr - GETARR(trg)])); ptr++; } - + pfree(d); pfree(trg); - PG_FREE_IF_COPY(in,0); + PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(a); } float4 -cnt_sml(TRGM *trg1, TRGM *trg2) { - trgm *ptr1, *ptr2; - int count=0; - int len1, len2; - +cnt_sml(TRGM * trg1, TRGM * trg2) +{ + trgm *ptr1, + *ptr2; + int count = 0; + int len1, + len2; + ptr1 = GETARR(trg1); ptr2 = GETARR(trg2); - + len1 = ARRNELEM(trg1); len2 = ARRNELEM(trg2); - while( ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2 ) { - int res = CMPTRGM(ptr1,ptr2); - if ( res < 0 ) { + while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2) + { + int res = CMPTRGM(ptr1, ptr2); + + if (res < 0) ptr1++; - } else if ( res > 0 ) { + else if (res > 0) ptr2++; - } else { + else + { ptr1++; ptr2++; count++; @@ -225,45 +262,47 @@ cnt_sml(TRGM *trg1, TRGM *trg2) { } #ifdef DIVUNION - return ( ( ((float4)count) / ((float4)(len1+len2-count)) ) ); + return ((((float4) count) / ((float4) (len1 + len2 - count)))); #else - return ( ((float)count) / ((float)( (len1>len2) ? len1 : len2 )) ); + return (((float) count) / ((float) ((len1 > len2) ? len1 : len2))); #endif } PG_FUNCTION_INFO_V1(similarity); -Datum similarity(PG_FUNCTION_ARGS); +Datum similarity(PG_FUNCTION_ARGS); Datum -similarity(PG_FUNCTION_ARGS) { - text *in1 = PG_GETARG_TEXT_P(0); - text *in2 = PG_GETARG_TEXT_P(1); - TRGM *trg1, *trg2; - float4 res; +similarity(PG_FUNCTION_ARGS) +{ + text *in1 = PG_GETARG_TEXT_P(0); + text *in2 = PG_GETARG_TEXT_P(1); + TRGM *trg1, + *trg2; + float4 res; trg1 = generate_trgm(VARDATA(in1), VARSIZE(in1) - VARHDRSZ); trg2 = generate_trgm(VARDATA(in2), VARSIZE(in2) - VARHDRSZ); - res = cnt_sml(trg1,trg2); - + res = cnt_sml(trg1, trg2); + pfree(trg1); pfree(trg2); - PG_FREE_IF_COPY(in1,0); - PG_FREE_IF_COPY(in2,1); - + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_FLOAT4(res); } PG_FUNCTION_INFO_V1(similarity_op); -Datum similarity_op(PG_FUNCTION_ARGS); +Datum similarity_op(PG_FUNCTION_ARGS); Datum -similarity_op(PG_FUNCTION_ARGS) { - float4 res=DatumGetFloat4( DirectFunctionCall2( - similarity, - PG_GETARG_DATUM(0), - PG_GETARG_DATUM(1) - ) ); - PG_RETURN_BOOL( res >= trgm_limit ); +similarity_op(PG_FUNCTION_ARGS) +{ + float4 res = DatumGetFloat4(DirectFunctionCall2( + similarity, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1) + )); + + PG_RETURN_BOOL(res >= trgm_limit); } - - |