summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeodor Sigaev2006-11-20 14:03:30 +0000
committerTeodor Sigaev2006-11-20 14:03:30 +0000
commit419fe7cd1b2e658dfec236422308a21cab6c6c30 (patch)
tree0bf7b1e96e9734b9553f810c029dfe6d156a71d0
parent1a5c450f3024ac57cd6079186c71b3baf39e84eb (diff)
Fix bug http://archives.postgresql.org/pgsql-bugs/2006-10/msg00258.php.
Fix string's length calculation for recoding, fix strlower() to avoid wrong assumption about length of recoded string (was: recoded string is no greater that source, it may not true for multibyte encodings) Thanks to Thomas H. <[email protected]> and Magnus Hagander <[email protected]>
-rw-r--r--contrib/tsearch2/dict_ex.c6
-rw-r--r--contrib/tsearch2/dict_snowball.c8
-rw-r--r--contrib/tsearch2/dict_syn.c11
-rw-r--r--contrib/tsearch2/ispell/spell.c75
-rw-r--r--contrib/tsearch2/stopword.c16
-rw-r--r--contrib/tsearch2/ts_locale.c77
6 files changed, 131 insertions, 62 deletions
diff --git a/contrib/tsearch2/dict_ex.c b/contrib/tsearch2/dict_ex.c
index ccb7f3fcbe3..2fd5cbb7009 100644
--- a/contrib/tsearch2/dict_ex.c
+++ b/contrib/tsearch2/dict_ex.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ex.c,v 1.8 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ex.c,v 1.9 2006/11/20 14:03:30 teodor Exp $ */
/*
* example of dictionary
@@ -52,9 +52,11 @@ dex_lexize(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
- char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+ char *utxt = pnstrdup(in, PG_GETARG_INT32(2));
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
+ char *txt = lowerstr(utxt);
+ pfree(utxt);
memset(res, 0, sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
diff --git a/contrib/tsearch2/dict_snowball.c b/contrib/tsearch2/dict_snowball.c
index f983ae8e13b..66677448249 100644
--- a/contrib/tsearch2/dict_snowball.c
+++ b/contrib/tsearch2/dict_snowball.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.12 2006/07/11 16:35:31 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.13 2006/11/20 14:03:30 teodor Exp $ */
/*
* example of Snowball dictionary
@@ -142,9 +142,11 @@ snb_lexize(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
- char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+ char *utxt = pnstrdup(in, PG_GETARG_INT32(2));
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
-
+ char *txt = lowerstr(utxt);
+
+ pfree(utxt);
memset(res, 0, sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
diff --git a/contrib/tsearch2/dict_syn.c b/contrib/tsearch2/dict_syn.c
index d19686d63e0..cddbd473508 100644
--- a/contrib/tsearch2/dict_syn.c
+++ b/contrib/tsearch2/dict_syn.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict_syn.c,v 1.9 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict_syn.c,v 1.10 2006/11/20 14:03:30 teodor Exp $ */
/*
* ISpell interface
@@ -132,8 +132,8 @@ syn_init(PG_FUNCTION_ARGS)
continue;
*end = '\0';
- d->syn[cur].in = strdup(lowerstr(starti));
- d->syn[cur].out = strdup(lowerstr(starto));
+ d->syn[cur].in = lowerstr(starti);
+ d->syn[cur].out = lowerstr(starto);
if (!(d->syn[cur].in && d->syn[cur].out))
{
fclose(fin);
@@ -163,12 +163,15 @@ syn_lexize(PG_FUNCTION_ARGS)
Syn key,
*found;
TSLexeme *res = NULL;
+ char *wrd;
if (!PG_GETARG_INT32(2))
PG_RETURN_POINTER(NULL);
key.out = NULL;
- key.in = lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
+ wrd = pnstrdup(in, PG_GETARG_INT32(2));
+ key.in = lowerstr(wrd);
+ pfree(wrd);
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
pfree(key.in);
diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c
index 9e4d689cd41..6eedc7f3426 100644
--- a/contrib/tsearch2/ispell/spell.c
+++ b/contrib/tsearch2/ispell/spell.c
@@ -147,7 +147,7 @@ NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
int
NIImportDictionary(IspellDict * Conf, const char *filename)
{
- char str[BUFSIZ];
+ char str[BUFSIZ], *pstr;
FILE *dict;
if (!(dict = fopen(filename, "r")))
@@ -190,9 +190,10 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
}
s += pg_mblen(s);
}
- lowerstr(str);
+ pstr = lowerstr(str);
- NIAddSpell(Conf, str, flag);
+ NIAddSpell(Conf, pstr, flag);
+ pfree(pstr);
}
fclose(dict);
return (0);
@@ -418,8 +419,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl, int line)
int
NIImportAffixes(IspellDict * Conf, const char *filename)
{
- char str[BUFSIZ];
- char tmpstr[BUFSIZ];
+ char str[BUFSIZ], *pstr = NULL;
char mask[BUFSIZ];
char find[BUFSIZ];
char repl[BUFSIZ];
@@ -439,11 +439,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
while (fgets(str, sizeof(str), affix))
{
line++;
+ if ( *str == '#' || *str == '\n' )
+ continue;
+
pg_verifymbstr(str, strlen(str), false);
- memcpy(tmpstr, str, 32); /* compoundwords... */
- tmpstr[32] = '\0';
- lowerstr(tmpstr);
- if (STRNCMP(tmpstr, "compoundwords") == 0)
+ if ( pstr )
+ pfree( pstr );
+ pstr = lowerstr(str);
+ if (STRNCMP(pstr, "compoundwords") == 0)
{
s = findchar(str, 'l');
if (s)
@@ -458,21 +461,21 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
continue;
}
}
- if (STRNCMP(tmpstr, "suffixes") == 0)
+ if (STRNCMP(pstr, "suffixes") == 0)
{
suffixes = 1;
prefixes = 0;
oldformat++;
continue;
}
- if (STRNCMP(tmpstr, "prefixes") == 0)
+ if (STRNCMP(pstr, "prefixes") == 0)
{
suffixes = 0;
prefixes = 1;
oldformat++;
continue;
}
- if (STRNCMP(tmpstr, "flag") == 0)
+ if (STRNCMP(pstr, "flag") == 0)
{
s = str + 4;
flagflags = 0;
@@ -523,14 +526,16 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
if ((!suffixes) && (!prefixes))
continue;
- lowerstr(str);
- if (!parse_affentry(str, mask, find, repl, line))
+ if (!parse_affentry(pstr, mask, find, repl, line))
continue;
NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
}
fclose(affix);
+ if ( pstr )
+ pfree( pstr );
+
return (0);
}
@@ -538,11 +543,11 @@ int
NIImportOOAffixes(IspellDict * Conf, const char *filename)
{
char str[BUFSIZ];
- char type[BUFSIZ];
+ char type[BUFSIZ], *ptype = NULL;
char sflag[BUFSIZ];
- char mask[BUFSIZ];
- char find[BUFSIZ];
- char repl[BUFSIZ];
+ char mask[BUFSIZ], *pmask;
+ char find[BUFSIZ], *pfind;
+ char repl[BUFSIZ], *prepl;
bool isSuffix = false;
int flag = 0;
char flagflags = 0;
@@ -577,8 +582,10 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask);
- lowerstr(type);
- if (scanread < 4 || (STRNCMP(type, "sfx") && STRNCMP(type, "pfx")))
+ if (ptype)
+ pfree(ptype);
+ ptype = lowerstr(type);
+ if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
continue;
if (scanread == 4)
@@ -586,29 +593,35 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
if (strlen(sflag) != 1)
continue;
flag = *sflag;
- isSuffix = (STRNCMP(type, "sfx") == 0) ? true : false;
- lowerstr(find);
+ isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
+ pfind = lowerstr(find);
if (t_iseq(find, 'y'))
flagflags |= FF_CROSSPRODUCT;
else
flagflags = 0;
+ pfree(pfind);
}
else
{
if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
continue;
- lowerstr(repl);
- lowerstr(find);
- lowerstr(mask);
+ prepl = lowerstr(repl);
+ pfind = lowerstr(find);
+ pmask = lowerstr(mask);
if (t_iseq(find, '0'))
*find = '\0';
if (t_iseq(repl, '0'))
*repl = '\0';
NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX);
+ pfree(prepl);
+ pfree(pfind);
+ pfree(pmask);
}
}
+ if (ptype)
+ pfree(ptype);
fclose(affix);
return 0;
@@ -1053,7 +1066,6 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
if (wrdlen > MAXNORMLEN)
return NULL;
- lowerstr(word);
cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
*cur = NULL;
@@ -1354,13 +1366,17 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
}
TSLexeme *
-NINormalizeWord(IspellDict * Conf, char *word)
+NINormalizeWord(IspellDict * Conf, char *uword)
{
- char **res = NormalizeSubWord(Conf, word, 0);
+ char **res;
+ char *word;
TSLexeme *lcur = NULL,
*lres = NULL;
uint16 NVariant = 1;
+ word = lowerstr(uword);
+ res = NormalizeSubWord(Conf, word, 0);
+
if (res)
{
char **ptr = res;
@@ -1431,6 +1447,9 @@ NINormalizeWord(IspellDict * Conf, char *word)
var = ptr;
}
}
+
+ pfree(word);
+
return lres;
}
diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c
index 73db8abba69..b9b7699594e 100644
--- a/contrib/tsearch2/stopword.c
+++ b/contrib/tsearch2/stopword.c
@@ -36,7 +36,7 @@ readstoplist(text *in, StopList * s)
{
char *filename = to_absfilename(text2char(in));
FILE *hin;
- char buf[STOPBUFLEN];
+ char buf[STOPBUFLEN], *pbuf;
int reallen = 0;
if ((hin = fopen(filename, "r")) == NULL)
@@ -49,7 +49,6 @@ readstoplist(text *in, StopList * s)
{
buf[strlen(buf) - 1] = '\0';
pg_verifymbstr(buf, strlen(buf), false);
- lowerstr(buf);
if (*buf == '\0')
continue;
@@ -70,7 +69,14 @@ readstoplist(text *in, StopList * s)
stop = tmp;
}
- stop[s->len] = strdup(buf);
+ if (s->wordop)
+ {
+ pbuf = s->wordop(buf);
+ stop[s->len] = strdup(pbuf);
+ pfree(pbuf);
+ } else
+ stop[s->len] = strdup(buf);
+
if (!stop[s->len])
{
freestoplist(s);
@@ -79,8 +85,6 @@ readstoplist(text *in, StopList * s)
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
- if (s->wordop)
- stop[s->len] = (s->wordop) (stop[s->len]);
(s->len)++;
}
@@ -106,7 +110,5 @@ sortstoplist(StopList * s)
bool
searchstoplist(StopList * s, char *key)
{
- if (s->wordop)
- key = (*(s->wordop)) (key);
return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false;
}
diff --git a/contrib/tsearch2/ts_locale.c b/contrib/tsearch2/ts_locale.c
index 203c977e4ea..cac5317a105 100644
--- a/contrib/tsearch2/ts_locale.c
+++ b/contrib/tsearch2/ts_locale.c
@@ -14,21 +14,12 @@ wchar2char(char *to, const wchar_t *from, size_t len)
{
if (GetDatabaseEncoding() == PG_UTF8)
{
- int r,
- nbytes;
+ int r;
if (len == 0)
return 0;
- /* in any case, *to should be allocated with enough space */
- nbytes = WideCharToMultiByte(CP_UTF8, 0, from, len, NULL, 0, NULL, NULL);
- if (nbytes == 0)
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("UTF-16 to UTF-8 translation failed: %lu",
- GetLastError())));
-
- r = WideCharToMultiByte(CP_UTF8, 0, from, len, to, nbytes,
+ r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
NULL, NULL);
if (r == 0)
@@ -36,6 +27,8 @@ wchar2char(char *to, const wchar_t *from, size_t len)
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("UTF-16 to UTF-8 translation failed: %lu",
GetLastError())));
+ Assert(r <= len);
+
return r;
}
@@ -56,7 +49,7 @@ char2wchar(wchar_t *to, const char *from, size_t len)
if (!r)
{
- pg_verifymbstr(from, len, false);
+ pg_verifymbstr(from, strlen(from), false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
@@ -97,6 +90,11 @@ char *
lowerstr(char *str)
{
char *ptr = str;
+ char *out;
+ int len = strlen(str);
+
+ if ( len == 0 )
+ return pstrdup("");
#ifdef TS_USE_WIDE
@@ -110,24 +108,67 @@ lowerstr(char *str)
{
wchar_t *wstr,
*wptr;
- int len = strlen(str);
+ int wlen;
+
+ /*
+ *alloc number of wchar_t for worst case, len contains
+ * number of bytes <= number of characters and
+ * alloc 1 wchar_t for 0, because wchar2char(wcstombs in really)
+ * wants zero-terminated string
+ */
+ wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len+1));
+
+ /*
+ * str SHOULD be cstring, so wlen contains number
+ * of converted character
+ */
+ wlen = char2wchar(wstr, str, len);
+ if ( wlen < 0 )
+ ereport(ERROR,
+ (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+ errmsg("transalation failed from server encoding to wchar_t")));
+
+ Assert(wlen<=len);
+ wstr[wlen] = 0;
- wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
- char2wchar(wstr, str, len + 1);
while (*wptr)
{
*wptr = towlower((wint_t) *wptr);
wptr++;
}
- wchar2char(str, wstr, len);
+
+ /*
+ * Alloc result string for worst case + '\0'
+ */
+ len = sizeof(char)*pg_database_encoding_max_length()*(wlen+1);
+ out = (char*)palloc(len);
+
+ /*
+ * wlen now is number of bytes which is always >= number of characters
+ */
+ wlen = wchar2char(out, wstr, len);
pfree(wstr);
+
+ if ( wlen < 0 )
+ ereport(ERROR,
+ (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+ errmsg("transalation failed from wchar_t to server encoding %d", errno)));
+ Assert(wlen<=len);
+ out[wlen]='\0';
}
else
#endif
+ {
+ char *outptr;
+
+ outptr = out = (char*)palloc( sizeof(char) * (len+1) );
while (*ptr)
{
- *ptr = tolower(*(unsigned char *) ptr);
+ *outptr++ = tolower(*(unsigned char *) ptr);
ptr++;
}
- return str;
+ *outptr = '\0';
+ }
+
+ return out;
}