summaryrefslogtreecommitdiff
path: root/src/backend/access/brin/brin_tuple.c
diff options
context:
space:
mode:
authorTomas Vondra2020-11-06 23:39:19 +0000
committerTomas Vondra2020-11-06 23:39:19 +0000
commit7577dd84807a808f295dbfb0191540750ff98002 (patch)
tree88b836d929d1e07c2f443c3a7adc79f300a463c0 /src/backend/access/brin/brin_tuple.c
parenteeda7f6338095701cfe1ba3da37070508efe019e (diff)
Properly detoast data in brin_form_tuple
brin_form_tuple failed to consider the values may be toasted, inserting the toast pointer into the index. This may easily result in index corruption, as the toast data may be deleted and cleaned up by vacuum. The cleanup however does not care about indexes, leaving invalid toast pointers behind, which triggers errors like this: ERROR: missing chunk number 0 for toast value 16433 in pg_toast_16426 A less severe consequence are inconsistent failures due to the index row being too large, depending on whether brin_form_tuple operated on plain or toasted version of the row. For example CREATE TABLE t (val TEXT); INSERT INTO t VALUES ('... long value ...') CREATE INDEX idx ON t USING brin (val); would likely succeed, as the row would likely include toast pointer. Switching the order of INSERT and CREATE INDEX would likely fail: ERROR: index row size 8712 exceeds maximum 8152 for index "idx" because this happens before the row values are toasted. The bug exists since PostgreSQL 9.5 where BRIN indexes were introduced. So backpatch all the way back. Author: Tomas Vondra Reviewed-by: Alvaro Herrera Backpatch-through: 9.5 Discussion: https://postgr.es/m/20201001184133.oq5uq75sb45pu3aw@development Discussion: https://postgr.es/m/20201104010544.zexj52mlldagzowv%40development
Diffstat (limited to 'src/backend/access/brin/brin_tuple.c')
-rw-r--r--src/backend/access/brin/brin_tuple.c102
1 files changed, 101 insertions, 1 deletions
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 46e6b23c874..6774f597a4d 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -32,12 +32,23 @@
#include "postgres.h"
#include "access/brin_tuple.h"
+#include "access/detoast.h"
+#include "access/heaptoast.h"
#include "access/htup_details.h"
+#include "access/toast_internals.h"
#include "access/tupdesc.h"
#include "access/tupmacs.h"
#include "utils/datum.h"
#include "utils/memutils.h"
+
+/*
+ * This enables de-toasting of index entries. Needed until VACUUM is
+ * smart enough to rebuild indexes from scratch.
+ */
+#define TOAST_INDEX_HACK
+
+
static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
char *tp, bits8 *nullbits, bool nulls,
Datum *values, bool *allnulls, bool *hasnulls);
@@ -99,6 +110,12 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
Size len,
hoff,
data_len;
+ int i;
+
+#ifdef TOAST_INDEX_HACK
+ Datum *untoasted_values;
+ int nuntoasted = 0;
+#endif
Assert(brdesc->bd_totalstored > 0);
@@ -107,6 +124,10 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
phony_nullbitmap = (bits8 *)
palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
+#ifdef TOAST_INDEX_HACK
+ untoasted_values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
+#endif
+
/*
* Set up the values/nulls arrays for heap_fill_tuple
*/
@@ -138,10 +159,84 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
if (tuple->bt_columns[keyno].bv_hasnulls)
anynulls = true;
+ /*
+ * Now obtain the values of each stored datum. Note that some values
+ * might be toasted, and we cannot rely on the original heap values
+ * sticking around forever, so we must detoast them. Also try to
+ * compress them.
+ */
for (datumno = 0;
datumno < brdesc->bd_info[keyno]->oi_nstored;
datumno++)
- values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno];
+ {
+ Datum value = tuple->bt_columns[keyno].bv_values[datumno];
+
+#ifdef TOAST_INDEX_HACK
+
+ /* We must look at the stored type, not at the index descriptor. */
+ TypeCacheEntry *atttype = brdesc->bd_info[keyno]->oi_typcache[datumno];
+
+ /* Do we need to free the value at the end? */
+ bool free_value = false;
+
+ /* For non-varlena types we don't need to do anything special */
+ if (atttype->typlen != -1)
+ {
+ values[idxattno++] = value;
+ continue;
+ }
+
+ /*
+ * Do nothing if value is not of varlena type. We don't need to
+ * care about NULL values here, thanks to bv_allnulls above.
+ *
+ * If value is stored EXTERNAL, must fetch it so we are not
+ * depending on outside storage.
+ *
+ * XXX Is this actually true? Could it be that the summary is
+ * NULL even for range with non-NULL data? E.g. degenerate bloom
+ * filter may be thrown away, etc.
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(value)))
+ {
+ value = PointerGetDatum(detoast_external_attr((struct varlena *)
+ DatumGetPointer(value)));
+ free_value = true;
+ }
+
+ /*
+ * If value is above size target, and is of a compressible datatype,
+ * try to compress it in-line.
+ */
+ if (!VARATT_IS_EXTENDED(DatumGetPointer(value)) &&
+ VARSIZE(DatumGetPointer(value)) > TOAST_INDEX_TARGET &&
+ (atttype->typstorage == TYPSTORAGE_EXTENDED ||
+ atttype->typstorage == TYPSTORAGE_MAIN))
+ {
+ Datum cvalue = toast_compress_datum(value);
+
+ if (DatumGetPointer(cvalue) != NULL)
+ {
+ /* successful compression */
+ if (free_value)
+ pfree(DatumGetPointer(value));
+
+ value = cvalue;
+ free_value = true;
+ }
+ }
+
+ /*
+ * If we untoasted / compressed the value, we need to free it
+ * after forming the index tuple.
+ */
+ if (free_value)
+ untoasted_values[nuntoasted++] = value;
+
+#endif
+
+ values[idxattno++] = value;
+ }
}
/* Assert we did not overrun temp arrays */
@@ -193,6 +288,11 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
pfree(nulls);
pfree(phony_nullbitmap);
+#ifdef TOAST_INDEX_HACK
+ for (i = 0; i < nuntoasted; i++)
+ pfree(DatumGetPointer(untoasted_values[i]));
+#endif
+
/*
* Now fill in the real null bitmasks. allnulls first.
*/