summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeil Conway2005-01-26 08:04:04 +0000
committerNeil Conway2005-01-26 08:04:04 +0000
commit1ac9f0e9f79c02a48c374dee206d3952e5520f84 (patch)
treeb1f940533ee2e7dc5165c41a8945c1074b6d7990
parentfd5437c78b1d5e91ca034a5d4a820e668af33850 (diff)
The attached patch implements the soundex difference function which
compares two strings' soundex values for similarity, from Kris Jurka. Also mark the text_soundex() function as STRICT, to avoid crashing on NULL input.
-rw-r--r--contrib/fuzzystrmatch/README.fuzzystrmatch4
-rw-r--r--contrib/fuzzystrmatch/README.soundex19
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.c20
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.h1
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.sql.in26
5 files changed, 56 insertions, 14 deletions
diff --git a/contrib/fuzzystrmatch/README.fuzzystrmatch b/contrib/fuzzystrmatch/README.fuzzystrmatch
index 876bdef2842..ad2d12c3885 100644
--- a/contrib/fuzzystrmatch/README.fuzzystrmatch
+++ b/contrib/fuzzystrmatch/README.fuzzystrmatch
@@ -33,6 +33,10 @@
* Folded existing soundex contrib into this one. Renamed text_soundex() (C function)
* to soundex() for consistency.
*
+ * difference()
+ * ------------
+ * Return the difference between two strings' soundex values. Kris Jurka
+ *
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written agreement
* is hereby granted, provided that the above copyright notice and this
diff --git a/contrib/fuzzystrmatch/README.soundex b/contrib/fuzzystrmatch/README.soundex
index ec4f50fd1df..5a58655cdcf 100644
--- a/contrib/fuzzystrmatch/README.soundex
+++ b/contrib/fuzzystrmatch/README.soundex
@@ -7,15 +7,25 @@ United States Census in 1880, 1900, and 1910, but it has little use
beyond English names (or the English pronunciation of names), and
it is not a linguistic tool.
+When comparing two soundex values to determine similarity, the
+difference function reports how close the match is on a scale
+from zero to four, with zero being no match and four being an
+exact match.
+
The following are some usage examples:
SELECT soundex('hello world!');
+SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
+SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
+SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+
CREATE TABLE s (nm text)\g
insert into s values ('john')\g
insert into s values ('joan')\g
insert into s values ('wobbly')\g
+insert into s values ('jack')\g
select * from s
where soundex(nm) = soundex('john')\g
@@ -58,5 +68,10 @@ FROM s
WHERE text_sx_eq(nm,'john')\g
SELECT *
-from s
-where s.nm #= 'john';
+FROM s
+WHERE s.nm #= 'john';
+
+SELECT *
+FROM s
+WHERE difference(s.nm, 'john') > 2;
+
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index d3627097cfc..90505a8b2c7 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -755,3 +755,23 @@ _soundex(const char *instr, char *outstr)
++count;
}
}
+
+PG_FUNCTION_INFO_V1(difference);
+
+Datum
+difference(PG_FUNCTION_ARGS)
+{
+ char sndx1[SOUNDEX_LEN+1], sndx2[SOUNDEX_LEN+1];
+ int i, result;
+
+ _soundex(_textout(PG_GETARG_TEXT_P(0)), sndx1);
+ _soundex(_textout(PG_GETARG_TEXT_P(1)), sndx2);
+
+ result = 0;
+ for (i=0; i<SOUNDEX_LEN; i++) {
+ if (sndx1[i] == sndx2[i])
+ result++;
+ }
+
+ PG_RETURN_INT32(result);
+}
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.h b/contrib/fuzzystrmatch/fuzzystrmatch.h
index 05c2022aecb..8253e60ac22 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.h
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.h
@@ -60,6 +60,7 @@
extern Datum levenshtein(PG_FUNCTION_ARGS);
extern Datum metaphone(PG_FUNCTION_ARGS);
extern Datum soundex(PG_FUNCTION_ARGS);
+extern Datum difference(PG_FUNCTION_ARGS);
/*
* Soundex
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.sql.in b/contrib/fuzzystrmatch/fuzzystrmatch.sql.in
index 07e6dce7e33..b4e4fadc449 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.sql.in
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.sql.in
@@ -1,28 +1,30 @@
-- Adjust this setting to control where the objects get created.
SET search_path = public;
-CREATE FUNCTION levenshtein (text,text)
-RETURNS int
+CREATE FUNCTION levenshtein (text,text) RETURNS int
AS 'MODULE_PATHNAME','levenshtein'
-LANGUAGE 'C' WITH (iscachable, isstrict);
+LANGUAGE C IMMUTABLE STRICT;
-CREATE FUNCTION metaphone (text,int)
-RETURNS text
+CREATE FUNCTION metaphone (text,int) RETURNS text
AS 'MODULE_PATHNAME','metaphone'
-LANGUAGE 'C' WITH (iscachable, isstrict);
+LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex'
-LANGUAGE 'C' WITH (iscachable, isstrict);
+LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION text_soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex'
-LANGUAGE 'C';
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE FUNCTION difference(text,text) RETURNS int
+AS 'MODULE_PATHNAME', 'difference'
+LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION dmetaphone (text) RETURNS text
-LANGUAGE C IMMUTABLE STRICT
-AS 'MODULE_PATHNAME', 'dmetaphone';
+AS 'MODULE_PATHNAME', 'dmetaphone'
+LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION dmetaphone_alt (text) RETURNS text
-LANGUAGE C IMMUTABLE STRICT
-AS 'MODULE_PATHNAME', 'dmetaphone_alt';
+AS 'MODULE_PATHNAME', 'dmetaphone_alt'
+LANGUAGE C IMMUTABLE STRICT;