summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/varlena.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r--src/backend/utils/adt/varlena.c184
1 files changed, 93 insertions, 91 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 5fd2bef617f..779729d724a 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -56,14 +56,15 @@ typedef struct
typedef struct
{
- char *buf1; /* 1st string, or abbreviation original string buf */
- char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
- int buflen1;
- int buflen2;
- bool collate_c;
- hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
- hyperLogLogState full_card; /* Full key cardinality state */
- double prop_card; /* Required cardinality proportion */
+ char *buf1; /* 1st string, or abbreviation original string
+ * buf */
+ char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
+ int buflen1;
+ int buflen2;
+ bool collate_c;
+ hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
+ hyperLogLogState full_card; /* Full key cardinality state */
+ double prop_card; /* Required cardinality proportion */
#ifdef HAVE_LOCALE_T
pg_locale_t locale;
#endif
@@ -82,9 +83,9 @@ typedef struct
#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
static void btsortsupport_worker(SortSupport ssup, Oid collid);
-static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
-static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
-static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup);
static Datum bttext_abbrev_convert(Datum original, SortSupport ssup);
static bool bttext_abbrev_abort(int memtupcount, SortSupport ssup);
static int32 text_length(Datum str);
@@ -1415,8 +1416,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
}
/*
- * memcmp() can't tell us which of two unequal strings sorts first, but
- * it's a cheap way to tell if they're equal. Testing shows that
+ * memcmp() can't tell us which of two unequal strings sorts first,
+ * but it's a cheap way to tell if they're equal. Testing shows that
* memcmp() followed by strcoll() is only trivially slower than
* strcoll() by itself, so we don't lose much if this doesn't work out
* very often, and if it does - for example, because there are many
@@ -1726,9 +1727,9 @@ bttextcmp(PG_FUNCTION_ARGS)
Datum
bttextsortsupport(PG_FUNCTION_ARGS)
{
- SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
- Oid collid = ssup->ssup_collation;
- MemoryContext oldcontext;
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
+ MemoryContext oldcontext;
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
@@ -1742,30 +1743,30 @@ bttextsortsupport(PG_FUNCTION_ARGS)
static void
btsortsupport_worker(SortSupport ssup, Oid collid)
{
- bool abbreviate = ssup->abbreviate;
- bool collate_c = false;
- TextSortSupport *tss;
+ bool abbreviate = ssup->abbreviate;
+ bool collate_c = false;
+ TextSortSupport *tss;
#ifdef HAVE_LOCALE_T
- pg_locale_t locale = 0;
+ pg_locale_t locale = 0;
#endif
/*
* If possible, set ssup->comparator to a function which can be used to
* directly compare two datums. If we can do this, we'll avoid the
- * overhead of a trip through the fmgr layer for every comparison,
- * which can be substantial.
+ * overhead of a trip through the fmgr layer for every comparison, which
+ * can be substantial.
*
- * Most typically, we'll set the comparator to bttextfastcmp_locale,
- * which uses strcoll() to perform comparisons. However, if LC_COLLATE
- * = C, we can make things quite a bit faster with bttextfastcmp_c,
- * which uses memcmp() rather than strcoll().
+ * Most typically, we'll set the comparator to bttextfastcmp_locale, which
+ * uses strcoll() to perform comparisons. However, if LC_COLLATE = C, we
+ * can make things quite a bit faster with bttextfastcmp_c, which uses
+ * memcmp() rather than strcoll().
*
- * There is a further exception on Windows. When the database encoding
- * is UTF-8 and we are not using the C collation, complex hacks are
- * required. We don't currently have a comparator that handles that case,
- * so we fall back on the slow method of having the sort code invoke
- * bttextcmp() via the fmgr trampoline.
+ * There is a further exception on Windows. When the database encoding is
+ * UTF-8 and we are not using the C collation, complex hacks are required.
+ * We don't currently have a comparator that handles that case, so we fall
+ * back on the slow method of having the sort code invoke bttextcmp() via
+ * the fmgr trampoline.
*/
if (lc_collate_is_c(collid))
{
@@ -1808,13 +1809,13 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
* It's possible that there are platforms where the use of abbreviated
* keys should be disabled at compile time. Having only 4 byte datums
* could make worst-case performance drastically more likely, for example.
- * Moreover, Darwin's strxfrm() implementations is known to not effectively
- * concentrate a significant amount of entropy from the original string in
- * earlier transformed blobs. It's possible that other supported platforms
- * are similarly encumbered. However, even in those cases, the abbreviated
- * keys optimization may win, and if it doesn't, the "abort abbreviation"
- * code may rescue us. So, for now, we don't disable this anywhere on the
- * basis of performance.
+ * Moreover, Darwin's strxfrm() implementations is known to not
+ * effectively concentrate a significant amount of entropy from the
+ * original string in earlier transformed blobs. It's possible that other
+ * supported platforms are similarly encumbered. However, even in those
+ * cases, the abbreviated keys optimization may win, and if it doesn't,
+ * the "abort abbreviation" code may rescue us. So, for now, we don't
+ * disable this anywhere on the basis of performance.
*/
/*
@@ -1893,16 +1894,16 @@ bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
static int
bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
{
- text *arg1 = DatumGetTextPP(x);
- text *arg2 = DatumGetTextPP(y);
- TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
+ text *arg1 = DatumGetTextPP(x);
+ text *arg2 = DatumGetTextPP(y);
+ TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
/* working state */
- char *a1p,
- *a2p;
- int len1,
- len2,
- result;
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
a1p = VARDATA_ANY(arg1);
a2p = VARDATA_ANY(arg2);
@@ -1943,9 +1944,9 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
result = strcoll(tss->buf1, tss->buf2);
/*
- * In some locales strcoll() can claim that nonidentical strings are equal.
- * Believing that would be bad news for a number of reasons, so we follow
- * Perl's lead and sort "equal" strings according to strcmp().
+ * In some locales strcoll() can claim that nonidentical strings are
+ * equal. Believing that would be bad news for a number of reasons, so we
+ * follow Perl's lead and sort "equal" strings according to strcmp().
*/
if (result == 0)
result = strcmp(tss->buf1, tss->buf2);
@@ -1966,9 +1967,9 @@ done:
static int
bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
{
- char *a = (char *) &x;
- char *b = (char *) &y;
- int result;
+ char *a = (char *) &x;
+ char *b = (char *) &y;
+ int result;
result = memcmp(a, b, sizeof(Datum));
@@ -1989,15 +1990,15 @@ bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
static Datum
bttext_abbrev_convert(Datum original, SortSupport ssup)
{
- TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
- text *authoritative = DatumGetTextPP(original);
- char *authoritative_data = VARDATA_ANY(authoritative);
+ TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
+ text *authoritative = DatumGetTextPP(original);
+ char *authoritative_data = VARDATA_ANY(authoritative);
/* working state */
- Datum res;
- char *pres;
- int len;
- uint32 hash;
+ Datum res;
+ char *pres;
+ int len;
+ uint32 hash;
/*
* Abbreviated key representation is a pass-by-value Datum that is treated
@@ -2009,8 +2010,8 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
len = VARSIZE_ANY_EXHDR(authoritative);
/*
- * If we're using the C collation, use memcmp(), rather than strxfrm(),
- * to abbreviate keys. The full comparator for the C locale is always
+ * If we're using the C collation, use memcmp(), rather than strxfrm(), to
+ * abbreviate keys. The full comparator for the C locale is always
* memcmp(), and we can't risk having this give a different answer.
* Besides, this should be faster, too.
*/
@@ -2018,7 +2019,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
else
{
- Size bsize;
+ Size bsize;
/*
* We're not using the C collation, so fall back on strxfrm.
@@ -2075,8 +2076,8 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
/*
* Maintain approximate cardinality of both abbreviated keys and original,
* authoritative keys using HyperLogLog. Used as cheap insurance against
- * the worst case, where we do many string transformations for no saving in
- * full strcoll()-based comparisons. These statistics are used by
+ * the worst case, where we do many string transformations for no saving
+ * in full strcoll()-based comparisons. These statistics are used by
* bttext_abbrev_abort().
*
* First, Hash key proper, or a significant fraction of it. Mix in length
@@ -2094,8 +2095,8 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
/* Hash abbreviated key */
#if SIZEOF_DATUM == 8
{
- uint32 lohalf,
- hihalf;
+ uint32 lohalf,
+ hihalf;
lohalf = (uint32) res;
hihalf = (uint32) (res >> 32);
@@ -2118,8 +2119,9 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
static bool
bttext_abbrev_abort(int memtupcount, SortSupport ssup)
{
- TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
- double abbrev_distinct, key_distinct;
+ TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
+ double abbrev_distinct,
+ key_distinct;
Assert(ssup->abbreviate);
@@ -2131,9 +2133,9 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
key_distinct = estimateHyperLogLog(&tss->full_card);
/*
- * Clamp cardinality estimates to at least one distinct value. While NULLs
- * are generally disregarded, if only NULL values were seen so far, that
- * might misrepresent costs if we failed to clamp.
+ * Clamp cardinality estimates to at least one distinct value. While
+ * NULLs are generally disregarded, if only NULL values were seen so far,
+ * that might misrepresent costs if we failed to clamp.
*/
if (abbrev_distinct <= 1.0)
abbrev_distinct = 1.0;
@@ -2149,7 +2151,7 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
#ifdef TRACE_SORT
if (trace_sort)
{
- double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
+ double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
elog(LOG, "bttext_abbrev: abbrev_distinct after %d: %f "
"(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
@@ -2180,26 +2182,26 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
* When we have exceeded 10,000 tuples, decay required cardinality
* aggressively for next call.
*
- * This is useful because the number of comparisons required on average
- * increases at a linearithmic rate, and at roughly 10,000 tuples that
- * factor will start to dominate over the linear costs of string
- * transformation (this is a conservative estimate). The decay rate is
- * chosen to be a little less aggressive than halving -- which (since
- * we're called at points at which memtupcount has doubled) would never
- * see the cost model actually abort past the first call following a
- * decay. This decay rate is mostly a precaution against a sudden,
- * violent swing in how well abbreviated cardinality tracks full key
- * cardinality. The decay also serves to prevent a marginal case from
- * being aborted too late, when too much has already been invested in
- * string transformation.
+ * This is useful because the number of comparisons required on
+ * average increases at a linearithmic rate, and at roughly 10,000
+ * tuples that factor will start to dominate over the linear costs of
+ * string transformation (this is a conservative estimate). The decay
+ * rate is chosen to be a little less aggressive than halving -- which
+ * (since we're called at points at which memtupcount has doubled)
+ * would never see the cost model actually abort past the first call
+ * following a decay. This decay rate is mostly a precaution against
+ * a sudden, violent swing in how well abbreviated cardinality tracks
+ * full key cardinality. The decay also serves to prevent a marginal
+ * case from being aborted too late, when too much has already been
+ * invested in string transformation.
*
- * It's possible for sets of several million distinct strings with mere
- * tens of thousands of distinct abbreviated keys to still benefit very
- * significantly. This will generally occur provided each abbreviated
- * key is a proxy for a roughly uniform number of the set's full keys.
- * If it isn't so, we hope to catch that early and abort. If it isn't
- * caught early, by the time the problem is apparent it's probably not
- * worth aborting.
+ * It's possible for sets of several million distinct strings with
+ * mere tens of thousands of distinct abbreviated keys to still
+ * benefit very significantly. This will generally occur provided
+ * each abbreviated key is a proxy for a roughly uniform number of the
+ * set's full keys. If it isn't so, we hope to catch that early and
+ * abort. If it isn't caught early, by the time the problem is
+ * apparent it's probably not worth aborting.
*/
if (memtupcount > 10000)
tss->prop_card *= 0.65;