diff options
Diffstat (limited to 'contrib/tsearch2/rank.c')
-rw-r--r-- | contrib/tsearch2/rank.c | 753 |
1 files changed, 431 insertions, 322 deletions
diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c index 0840eb83470..5b62c9810bc 100644 --- a/contrib/tsearch2/rank.c +++ b/contrib/tsearch2/rank.c @@ -37,29 +37,35 @@ Datum rank_cd_def(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(get_covers); Datum get_covers(PG_FUNCTION_ARGS); -static float weights[]={0.1, 0.2, 0.4, 1.0}; +static float weights[] = {0.1, 0.2, 0.4, 1.0}; #define wpos(wep) ( w[ ((WordEntryPos*)(wep))->weight ] ) -#define DEF_NORM_METHOD 0 +#define DEF_NORM_METHOD 0 /* * Returns a weight of a word collocation */ -static float4 word_distance ( int4 w ) { - if ( w>100 ) - return 1e-30; +static float4 +word_distance(int4 w) +{ + if (w > 100) + return 1e-30; - return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) ); + return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2)); } static int -cnt_length( tsvector *t ) { - WordEntry *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t); - int len = 0, clen; - - while(ptr < end) { - if ( (clen=POSDATALEN(t, ptr)) == 0 ) +cnt_length(tsvector * t) +{ + WordEntry *ptr = ARRPTR(t), + *end = (WordEntry *) STRPTR(t); + int len = 0, + clen; + + while (ptr < end) + { + if ((clen = POSDATALEN(t, ptr)) == 0) len += 1; else len += clen; @@ -70,191 +76,225 @@ cnt_length( tsvector *t ) { } static int4 -WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) { - if (ptr->len == item->length) - return strncmp( - eval + ptr->pos, - qval + item->distance, - item->length); - - return (ptr->len > item->length) ? 1 : -1; +WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) +{ + if (ptr->len == item->length) + return strncmp( + eval + ptr->pos, + qval + item->distance, + item->length); + + return (ptr->len > item->length) ? 1 : -1; } -static WordEntry* -find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) { - WordEntry *StopLow = ARRPTR(t); - WordEntry *StopHigh = (WordEntry*)STRPTR(t); - WordEntry *StopMiddle; - int difference; - - /* Loop invariant: StopLow <= item < StopHigh */ - - while (StopLow < StopHigh) - { - StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item); - if (difference == 0) - return StopMiddle; - else if (difference < 0) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - - return NULL; +static WordEntry * +find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item) +{ + WordEntry *StopLow = ARRPTR(t); + WordEntry *StopHigh = (WordEntry *) STRPTR(t); + WordEntry *StopMiddle; + int difference; + + /* Loop invariant: StopLow <= item < StopHigh */ + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item); + if (difference == 0) + return StopMiddle; + else if (difference < 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return NULL; } -static WordEntryPos POSNULL[]={ - {0,0}, - {0,MAXENTRYPOS-1} +static WordEntryPos POSNULL[] = { + {0, 0}, + {0, MAXENTRYPOS - 1} }; static float -calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) { - uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size); - int i,k,l,p; - WordEntry *entry; - WordEntryPos *post,*ct; - int4 dimt,lenct,dist; - float res=-1.0; - ITEM *item=GETQUERY(q); - - memset(pos,0,sizeof(uint16**) * q->size); - *(uint16*)POSNULL = lengthof(POSNULL)-1; - - for(i=0; i<q->size; i++) { - - if ( item[i].type != VAL ) +calc_rank_and(float *w, tsvector * t, QUERYTYPE * q) +{ + uint16 **pos = (uint16 **) palloc(sizeof(uint16 *) * q->size); + int i, + k, + l, + p; + WordEntry *entry; + WordEntryPos *post, + *ct; + int4 dimt, + lenct, + dist; + float res = -1.0; + ITEM *item = GETQUERY(q); + + memset(pos, 0, sizeof(uint16 **) * q->size); + *(uint16 *) POSNULL = lengthof(POSNULL) - 1; + + for (i = 0; i < q->size; i++) + { + + if (item[i].type != VAL) continue; - entry=find_wordentry(t,q,&(item[i])); - if ( !entry ) + entry = find_wordentry(t, q, &(item[i])); + if (!entry) continue; - if ( entry->haspos ) - pos[i] = (uint16*)_POSDATAPTR(t,entry); + if (entry->haspos) + pos[i] = (uint16 *) _POSDATAPTR(t, entry); else - pos[i] = (uint16*)POSNULL; - - - dimt = *(uint16*)(pos[i]); - post = (WordEntryPos*)(pos[i]+1); - for( k=0; k<i; k++ ) { - if ( !pos[k] ) continue; - lenct = *(uint16*)(pos[k]); - ct = (WordEntryPos*)(pos[k]+1); - for(l=0; l<dimt; l++) { - for(p=0; p<lenct; p++) { - dist = abs( post[l].pos - ct[p].pos ); - if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) { - float curw; - if ( !dist ) dist=MAXENTRYPOS; - curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) ); - res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw ); + pos[i] = (uint16 *) POSNULL; + + + dimt = *(uint16 *) (pos[i]); + post = (WordEntryPos *) (pos[i] + 1); + for (k = 0; k < i; k++) + { + if (!pos[k]) + continue; + lenct = *(uint16 *) (pos[k]); + ct = (WordEntryPos *) (pos[k] + 1); + for (l = 0; l < dimt; l++) + { + for (p = 0; p < lenct; p++) + { + dist = abs(post[l].pos - ct[p].pos); + if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL))) + { + float curw; + + if (!dist) + dist = MAXENTRYPOS; + curw = sqrt(wpos(&(post[l])) * wpos(&(ct[p])) * word_distance(dist)); + res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); } } } } } pfree(pos); - return res; + return res; } static float -calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) { - WordEntry *entry; - WordEntryPos *post; - int4 dimt,j,i; - float res=-1.0; - ITEM *item=GETQUERY(q); - - *(uint16*)POSNULL = lengthof(POSNULL)-1; - - for(i=0; i<q->size; i++) { - if ( item[i].type != VAL ) +calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) +{ + WordEntry *entry; + WordEntryPos *post; + int4 dimt, + j, + i; + float res = -1.0; + ITEM *item = GETQUERY(q); + + *(uint16 *) POSNULL = lengthof(POSNULL) - 1; + + for (i = 0; i < q->size; i++) + { + if (item[i].type != VAL) continue; - entry=find_wordentry(t,q,&(item[i])); - if ( !entry ) + entry = find_wordentry(t, q, &(item[i])); + if (!entry) continue; - if ( entry->haspos ) { - dimt = POSDATALEN(t,entry); - post = POSDATAPTR(t,entry); - } else { - dimt = *(uint16*)POSNULL; - post = POSNULL+1; + if (entry->haspos) + { + dimt = POSDATALEN(t, entry); + post = POSDATAPTR(t, entry); + } + else + { + dimt = *(uint16 *) POSNULL; + post = POSNULL + 1; } - for(j=0;j<dimt;j++) { - if ( res < 0 ) - res = wpos( &(post[j]) ); + for (j = 0; j < dimt; j++) + { + if (res < 0) + res = wpos(&(post[j])); else - res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) ); + res = 1.0 - (1.0 - res) * (1.0 - wpos(&(post[j]))); } } return res; } static float -calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) { - ITEM *item = GETQUERY(q); - float res=0.0; +calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method) +{ + ITEM *item = GETQUERY(q); + float res = 0.0; if (!t->size || !q->size) return 0.0; - res = ( item->type != VAL && item->val == (int4) '&' ) ? - calc_rank_and(w,t,q) : calc_rank_or(w,t,q); + res = (item->type != VAL && item->val == (int4) '&') ? + calc_rank_and(w, t, q) : calc_rank_or(w, t, q); - if ( res < 0 ) + if (res < 0) res = 1e-20; - switch(method) { - case 0: break; - case 1: res /= log((float)cnt_length(t)); break; - case 2: res /= (float)cnt_length(t); break; + switch (method) + { + case 0: + break; + case 1: + res /= log((float) cnt_length(t)); + break; + case 2: + res /= (float) cnt_length(t); + break; default: - /* internal error */ - elog(ERROR,"unrecognized normalization method: %d", method); - } + /* internal error */ + elog(ERROR, "unrecognized normalization method: %d", method); + } return res; } Datum -rank(PG_FUNCTION_ARGS) { +rank(PG_FUNCTION_ARGS) +{ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); - int method=DEF_NORM_METHOD; - float res=0.0; - float ws[ lengthof(weights) ]; - int i; + int method = DEF_NORM_METHOD; + float res = 0.0; + float ws[lengthof(weights)]; + int i; - if ( ARR_NDIM(win) != 1 ) + if (ARR_NDIM(win) != 1) ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), errmsg("array of weight must be one-dimensional"))); - if ( ARRNELEMS(win) < lengthof(weights) ) + if (ARRNELEMS(win) < lengthof(weights)) ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), errmsg("array of weight is too short"))); - for(i=0;i<lengthof(weights);i++) { - ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i]; - if ( ws[ i ] > 1.0 ) + for (i = 0; i < lengthof(weights); i++) + { + ws[i] = (((float4 *) ARR_DATA_PTR(win))[i] >= 0) ? ((float4 *) ARR_DATA_PTR(win))[i] : weights[i]; + if (ws[i] > 1.0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("weight out of range"))); - } + } + + if (PG_NARGS() == 4) + method = PG_GETARG_INT32(3); - if ( PG_NARGS() == 4 ) - method=PG_GETARG_INT32(3); + res = calc_rank(ws, txt, query, method); - res=calc_rank(ws, txt, query, method); - PG_FREE_IF_COPY(win, 0); PG_FREE_IF_COPY(txt, 1); PG_FREE_IF_COPY(query, 2); @@ -262,108 +302,127 @@ rank(PG_FUNCTION_ARGS) { } Datum -rank_def(PG_FUNCTION_ARGS) { - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); +rank_def(PG_FUNCTION_ARGS) +{ + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - float res=0.0; - int method=DEF_NORM_METHOD; + float res = 0.0; + int method = DEF_NORM_METHOD; - if ( PG_NARGS() == 3 ) - method=PG_GETARG_INT32(2); + if (PG_NARGS() == 3) + method = PG_GETARG_INT32(2); + + res = calc_rank(weights, txt, query, method); - res=calc_rank(weights, txt, query, method); - PG_FREE_IF_COPY(txt, 0); PG_FREE_IF_COPY(query, 1); PG_RETURN_FLOAT4(res); } -typedef struct { - ITEM *item; - int32 pos; -} DocRepresentation; +typedef struct +{ + ITEM *item; + int32 pos; +} DocRepresentation; static int -compareDocR(const void *a, const void *b) { - if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos ) +compareDocR(const void *a, const void *b) +{ + if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos) return 1; - return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1; + return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1; } -typedef struct { +typedef struct +{ DocRepresentation *doc; - int len; + int len; } ChkDocR; static bool -checkcondition_DR(void *checkval, ITEM *val) { - DocRepresentation *ptr = ((ChkDocR*)checkval)->doc; +checkcondition_DR(void *checkval, ITEM * val) +{ + DocRepresentation *ptr = ((ChkDocR *) checkval)->doc; - while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) { - if ( val == ptr->item ) + while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len) + { + if (val == ptr->item) return true; ptr++; - } + } return false; } static bool -Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) { - int i; - DocRepresentation *ptr,*f=(DocRepresentation*)0xffffffff; - ITEM *item=GETQUERY(query); - int lastpos=*pos; - int oldq=*q; - - *p=0x7fffffff; - *q=0; - - for(i=0; i<query->size; i++) { - if ( item->type != VAL ) { +Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q) +{ + int i; + DocRepresentation *ptr, + *f = (DocRepresentation *) 0xffffffff; + ITEM *item = GETQUERY(query); + int lastpos = *pos; + int oldq = *q; + + *p = 0x7fffffff; + *q = 0; + + for (i = 0; i < query->size; i++) + { + if (item->type != VAL) + { item++; continue; } ptr = doc + *pos; - while(ptr-doc<len) { - if ( ptr->item == item ) { - if ( ptr->pos > *q ) { + while (ptr - doc < len) + { + if (ptr->item == item) + { + if (ptr->pos > *q) + { *q = ptr->pos; - lastpos= ptr - doc; - } + lastpos = ptr - doc; + } break; - } + } ptr++; } item++; } - if (*q==0 ) + if (*q == 0) return false; - if (*q==oldq) { /* already check this pos */ + if (*q == oldq) + { /* already check this pos */ (*pos)++; - return Cover(doc, len, query, pos,p,q); - } + return Cover(doc, len, query, pos, p, q); + } - item=GETQUERY(query); - for(i=0; i<query->size; i++) { - if ( item->type != VAL ) { + item = GETQUERY(query); + for (i = 0; i < query->size; i++) + { + if (item->type != VAL) + { item++; continue; } ptr = doc + lastpos; - while(ptr>=doc+*pos) { - if ( ptr->item == item ) { - if ( ptr->pos < *p ) { + while (ptr >= doc + *pos) + { + if (ptr->item == item) + { + if (ptr->pos < *p) + { *p = ptr->pos; - f=ptr; + f = ptr; } break; } @@ -371,106 +430,135 @@ Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int * } item++; } - - if ( *p<=*q ) { - ChkDocR ch = { f, (doc + lastpos)-f+1 }; - *pos = f-doc+1; - if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { - /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ + + if (*p <= *q) + { + ChkDocR ch = {f, (doc + lastpos) - f + 1}; + + *pos = f - doc + 1; + if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR)) + { + /* + * elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, + * *q); + */ return true; - } else - return Cover(doc, len, query, pos,p,q); + } + else + return Cover(doc, len, query, pos, p, q); } - + return false; } -static DocRepresentation* -get_docrep(tsvector *txt, QUERYTYPE *query, int *doclen) { - ITEM *item=GETQUERY(query); - WordEntry *entry; - WordEntryPos *post; - int4 dimt,j,i; - int len=query->size*4,cur=0; +static DocRepresentation * +get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen) +{ + ITEM *item = GETQUERY(query); + WordEntry *entry; + WordEntryPos *post; + int4 dimt, + j, + i; + int len = query->size * 4, + cur = 0; DocRepresentation *doc; - *(uint16*)POSNULL = lengthof(POSNULL)-1; - doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len); - for(i=0; i<query->size; i++) { - if ( item[i].type != VAL ) + *(uint16 *) POSNULL = lengthof(POSNULL) - 1; + doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); + for (i = 0; i < query->size; i++) + { + if (item[i].type != VAL) continue; - entry=find_wordentry(txt,query,&(item[i])); - if ( !entry ) + entry = find_wordentry(txt, query, &(item[i])); + if (!entry) continue; - if ( entry->haspos ) { - dimt = POSDATALEN(txt,entry); - post = POSDATAPTR(txt,entry); - } else { - dimt = *(uint16*)POSNULL; - post = POSNULL+1; + if (entry->haspos) + { + dimt = POSDATALEN(txt, entry); + post = POSDATAPTR(txt, entry); + } + else + { + dimt = *(uint16 *) POSNULL; + post = POSNULL + 1; } - while( cur+dimt >= len ) { - len*=2; - doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len); + while (cur + dimt >= len) + { + len *= 2; + doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); } - for(j=0;j<dimt;j++) { - doc[cur].item=&(item[i]); - doc[cur].pos=post[j].pos; + for (j = 0; j < dimt; j++) + { + doc[cur].item = &(item[i]); + doc[cur].pos = post[j].pos; cur++; } } - *doclen=cur; - - if ( cur>0 ) { - if ( cur>1 ) + *doclen = cur; + + if (cur > 0) + { + if (cur > 1) qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); return doc; } - + pfree(doc); return NULL; } Datum -rank_cd(PG_FUNCTION_ARGS) { - int K = PG_GETARG_INT32(0); - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); +rank_cd(PG_FUNCTION_ARGS) +{ + int K = PG_GETARG_INT32(0); + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); - int method=DEF_NORM_METHOD; - DocRepresentation *doc; - float res=0.0; - int p=0,q=0,len,cur; + int method = DEF_NORM_METHOD; + DocRepresentation *doc; + float res = 0.0; + int p = 0, + q = 0, + len, + cur; doc = get_docrep(txt, query, &len); - if ( !doc ) { + if (!doc) + { PG_FREE_IF_COPY(txt, 1); PG_FREE_IF_COPY(query, 2); PG_RETURN_FLOAT4(0.0); } - cur=0; - if (K<=0) - K=4; - while( Cover(doc, len, query, &cur, &p, &q) ) - res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0; - - if ( PG_NARGS() == 4 ) - method=PG_GETARG_INT32(3); - - switch(method) { - case 0: break; - case 1: res /= log((float)cnt_length(txt)); break; - case 2: res /= (float)cnt_length(txt); break; + cur = 0; + if (K <= 0) + K = 4; + while (Cover(doc, len, query, &cur, &p, &q)) + res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0; + + if (PG_NARGS() == 4) + method = PG_GETARG_INT32(3); + + switch (method) + { + case 0: + break; + case 1: + res /= log((float) cnt_length(txt)); + break; + case 2: + res /= (float) cnt_length(txt); + break; default: - /* internal error */ - elog(ERROR,"unrecognized normalization method: %d", method); - } + /* internal error */ + elog(ERROR, "unrecognized normalization method: %d", method); + } pfree(doc); PG_FREE_IF_COPY(txt, 1); @@ -481,120 +569,141 @@ rank_cd(PG_FUNCTION_ARGS) { Datum -rank_cd_def(PG_FUNCTION_ARGS) { - PG_RETURN_DATUM( DirectFunctionCall4( - rank_cd, - Int32GetDatum(-1), - PG_GETARG_DATUM(0), - PG_GETARG_DATUM(1), - ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD) - )); +rank_cd_def(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall4( + rank_cd, + Int32GetDatum(-1), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD) + )); } /**************debug*************/ -typedef struct { - char *w; - int2 len; - int2 pos; - int2 start; - int2 finish; -} DocWord; +typedef struct +{ + char *w; + int2 len; + int2 pos; + int2 start; + int2 finish; +} DocWord; static int -compareDocWord(const void *a, const void *b) { - if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos ) +compareDocWord(const void *a, const void *b) +{ + if (((DocWord *) a)->pos == ((DocWord *) b)->pos) return 1; - return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1; + return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1; } -Datum -get_covers(PG_FUNCTION_ARGS) { - tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); +Datum +get_covers(PG_FUNCTION_ARGS) +{ + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - WordEntry *pptr=ARRPTR(txt); - int i,dlen=0,j,cur=0,len=0,rlen; - DocWord *dw,*dwptr; - text *out; - char *cptr; + WordEntry *pptr = ARRPTR(txt); + int i, + dlen = 0, + j, + cur = 0, + len = 0, + rlen; + DocWord *dw, + *dwptr; + text *out; + char *cptr; DocRepresentation *doc; - int pos=0,p,q,olddwpos=0; - int ncover=1; + int pos = 0, + p, + q, + olddwpos = 0; + int ncover = 1; doc = get_docrep(txt, query, &rlen); - if ( !doc ) { - out=palloc(VARHDRSZ); + if (!doc) + { + out = palloc(VARHDRSZ); VARATT_SIZEP(out) = VARHDRSZ; - PG_FREE_IF_COPY(txt,0); - PG_FREE_IF_COPY(query,1); + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); PG_RETURN_POINTER(out); } - for(i=0;i<txt->size;i++) { + for (i = 0; i < txt->size; i++) + { if (!pptr[i].haspos) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("no pos info"))); - dlen += POSDATALEN(txt,&(pptr[i])); + dlen += POSDATALEN(txt, &(pptr[i])); } - dwptr=dw=palloc(sizeof(DocWord)*dlen); - memset(dw,0,sizeof(DocWord)*dlen); + dwptr = dw = palloc(sizeof(DocWord) * dlen); + memset(dw, 0, sizeof(DocWord) * dlen); + + for (i = 0; i < txt->size; i++) + { + WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i])); - for(i=0;i<txt->size;i++) { - WordEntryPos *posdata = POSDATAPTR(txt,&(pptr[i])); - for(j=0;j<POSDATALEN(txt,&(pptr[i]));j++) { - dw[cur].w=STRPTR(txt)+pptr[i].pos; - dw[cur].len=pptr[i].len; - dw[cur].pos=posdata[j].pos; + for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++) + { + dw[cur].w = STRPTR(txt) + pptr[i].pos; + dw[cur].len = pptr[i].len; + dw[cur].pos = posdata[j].pos; cur++; } - len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i])); + len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i])); } qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord); - while( Cover(doc, rlen, query, &pos, &p, &q) ) { - dwptr=dw+olddwpos; - while(dwptr->pos < p && dwptr-dw<dlen) + while (Cover(doc, rlen, query, &pos, &p, &q)) + { + dwptr = dw + olddwpos; + while (dwptr->pos < p && dwptr - dw < dlen) dwptr++; - olddwpos=dwptr-dw; - dwptr->start=ncover; - while(dwptr->pos < q+1 && dwptr-dw<dlen) + olddwpos = dwptr - dw; + dwptr->start = ncover; + while (dwptr->pos < q + 1 && dwptr - dw < dlen) dwptr++; - (dwptr-1)->finish=ncover; - len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/; - ncover++; - } - - out=palloc(VARHDRSZ+len); - cptr=((char*)out)+VARHDRSZ; - dwptr=dw; - - while( dwptr-dw < dlen) { - if ( dwptr->start ) { - sprintf(cptr,"{%d ",dwptr->start); - cptr=strchr(cptr,'\0'); + (dwptr - 1)->finish = ncover; + len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ; + ncover++; + } + + out = palloc(VARHDRSZ + len); + cptr = ((char *) out) + VARHDRSZ; + dwptr = dw; + + while (dwptr - dw < dlen) + { + if (dwptr->start) + { + sprintf(cptr, "{%d ", dwptr->start); + cptr = strchr(cptr, '\0'); } - memcpy(cptr,dwptr->w,dwptr->len); - cptr+=dwptr->len; - *cptr=' '; + memcpy(cptr, dwptr->w, dwptr->len); + cptr += dwptr->len; + *cptr = ' '; cptr++; - if ( dwptr->finish ) { - sprintf(cptr,"}%d ",dwptr->finish); - cptr=strchr(cptr,'\0'); + if (dwptr->finish) + { + sprintf(cptr, "}%d ", dwptr->finish); + cptr = strchr(cptr, '\0'); } dwptr++; - } + } + + VARATT_SIZEP(out) = cptr - ((char *) out); - VARATT_SIZEP(out) = cptr - ((char*)out); - pfree(dw); pfree(doc); - PG_FREE_IF_COPY(txt,0); - PG_FREE_IF_COPY(query,1); + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); PG_RETURN_POINTER(out); } - |