summaryrefslogtreecommitdiff
path: root/src/backend/access/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/common')
-rw-r--r--src/backend/access/common/Makefile5
-rw-r--r--src/backend/access/common/detoast.c869
-rw-r--r--src/backend/access/common/heaptuple.c4
-rw-r--r--src/backend/access/common/indextuple.c9
-rw-r--r--src/backend/access/common/reloptions.c2
-rw-r--r--src/backend/access/common/toast_internals.c632
6 files changed, 1515 insertions, 6 deletions
diff --git a/src/backend/access/common/Makefile b/src/backend/access/common/Makefile
index d4695043377..9ac19d9f9e5 100644
--- a/src/backend/access/common/Makefile
+++ b/src/backend/access/common/Makefile
@@ -12,7 +12,8 @@ subdir = src/backend/access/common
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = bufmask.o heaptuple.o indextuple.o printsimple.o printtup.o \
- relation.o reloptions.o scankey.o session.o tupconvert.o tupdesc.o
+OBJS = bufmask.o detoast.o heaptuple.o indextuple.o printsimple.o \
+ printtup.o relation.o reloptions.o scankey.o session.o toast_internals.o \
+ tupconvert.o tupdesc.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
new file mode 100644
index 00000000000..c8b49d6a124
--- /dev/null
+++ b/src/backend/access/common/detoast.c
@@ -0,0 +1,869 @@
+/*-------------------------------------------------------------------------
+ *
+ * detoast.c
+ * Retrieve compressed or external variable size attributes.
+ *
+ * Copyright (c) 2000-2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/access/common/detoast.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "access/genam.h"
+#include "access/heaptoast.h"
+#include "access/table.h"
+#include "access/toast_internals.h"
+#include "common/pg_lzcompress.h"
+#include "utils/expandeddatum.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+
+static struct varlena *toast_fetch_datum(struct varlena *attr);
+static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
+ int32 sliceoffset, int32 length);
+static struct varlena *toast_decompress_datum(struct varlena *attr);
+static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
+
+/* ----------
+ * heap_tuple_fetch_attr -
+ *
+ * Public entry point to get back a toasted value from
+ * external source (possibly still in compressed format).
+ *
+ * This will return a datum that contains all the data internally, ie, not
+ * relying on external storage or memory, but it can still be compressed or
+ * have a short header. Note some callers assume that if the input is an
+ * EXTERNAL datum, the result will be a pfree'able chunk.
+ * ----------
+ */
+struct varlena *
+heap_tuple_fetch_attr(struct varlena *attr)
+{
+ struct varlena *result;
+
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ {
+ /*
+ * This is an external stored plain value
+ */
+ result = toast_fetch_datum(attr);
+ }
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ /*
+ * This is an indirect pointer --- dereference it
+ */
+ struct varatt_indirect redirect;
+
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+ attr = (struct varlena *) redirect.pointer;
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+ /* recurse if value is still external in some other way */
+ if (VARATT_IS_EXTERNAL(attr))
+ return heap_tuple_fetch_attr(attr);
+
+ /*
+ * Copy into the caller's memory context, in case caller tries to
+ * pfree the result.
+ */
+ result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ memcpy(result, attr, VARSIZE_ANY(attr));
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /*
+ * This is an expanded-object pointer --- get flat format
+ */
+ ExpandedObjectHeader *eoh;
+ Size resultsize;
+
+ eoh = DatumGetEOHP(PointerGetDatum(attr));
+ resultsize = EOH_get_flat_size(eoh);
+ result = (struct varlena *) palloc(resultsize);
+ EOH_flatten_into(eoh, (void *) result, resultsize);
+ }
+ else
+ {
+ /*
+ * This is a plain value inside of the main tuple - why am I called?
+ */
+ result = attr;
+ }
+
+ return result;
+}
+
+
+/* ----------
+ * heap_tuple_untoast_attr -
+ *
+ * Public entry point to get back a toasted value from compression
+ * or external storage. The result is always non-extended varlena form.
+ *
+ * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
+ * datum, the result will be a pfree'able chunk.
+ * ----------
+ */
+struct varlena *
+heap_tuple_untoast_attr(struct varlena *attr)
+{
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ {
+ /*
+ * This is an externally stored datum --- fetch it back from there
+ */
+ attr = toast_fetch_datum(attr);
+ /* If it's compressed, decompress it */
+ if (VARATT_IS_COMPRESSED(attr))
+ {
+ struct varlena *tmp = attr;
+
+ attr = toast_decompress_datum(tmp);
+ pfree(tmp);
+ }
+ }
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ /*
+ * This is an indirect pointer --- dereference it
+ */
+ struct varatt_indirect redirect;
+
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+ attr = (struct varlena *) redirect.pointer;
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+ /* recurse in case value is still extended in some other way */
+ attr = heap_tuple_untoast_attr(attr);
+
+ /* if it isn't, we'd better copy it */
+ if (attr == (struct varlena *) redirect.pointer)
+ {
+ struct varlena *result;
+
+ result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ memcpy(result, attr, VARSIZE_ANY(attr));
+ attr = result;
+ }
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /*
+ * This is an expanded-object pointer --- get flat format
+ */
+ attr = heap_tuple_fetch_attr(attr);
+ /* flatteners are not allowed to produce compressed/short output */
+ Assert(!VARATT_IS_EXTENDED(attr));
+ }
+ else if (VARATT_IS_COMPRESSED(attr))
+ {
+ /*
+ * This is a compressed value inside of the main tuple
+ */
+ attr = toast_decompress_datum(attr);
+ }
+ else if (VARATT_IS_SHORT(attr))
+ {
+ /*
+ * This is a short-header varlena --- convert to 4-byte header format
+ */
+ Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
+ Size new_size = data_size + VARHDRSZ;
+ struct varlena *new_attr;
+
+ new_attr = (struct varlena *) palloc(new_size);
+ SET_VARSIZE(new_attr, new_size);
+ memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
+ attr = new_attr;
+ }
+
+ return attr;
+}
+
+
+/* ----------
+ * heap_tuple_untoast_attr_slice -
+ *
+ * Public entry point to get back part of a toasted value
+ * from compression or external storage.
+ * ----------
+ */
+struct varlena *
+heap_tuple_untoast_attr_slice(struct varlena *attr,
+ int32 sliceoffset, int32 slicelength)
+{
+ struct varlena *preslice;
+ struct varlena *result;
+ char *attrdata;
+ int32 attrsize;
+
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ {
+ struct varatt_external toast_pointer;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ /* fast path for non-compressed external datums */
+ if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+ return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
+
+ /* fetch it back (compressed marker will get set automatically) */
+ preslice = toast_fetch_datum(attr);
+ }
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ struct varatt_indirect redirect;
+
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
+
+ return heap_tuple_untoast_attr_slice(redirect.pointer,
+ sliceoffset, slicelength);
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /* pass it off to heap_tuple_fetch_attr to flatten */
+ preslice = heap_tuple_fetch_attr(attr);
+ }
+ else
+ preslice = attr;
+
+ Assert(!VARATT_IS_EXTERNAL(preslice));
+
+ if (VARATT_IS_COMPRESSED(preslice))
+ {
+ struct varlena *tmp = preslice;
+
+ /* Decompress enough to encompass the slice and the offset */
+ if (slicelength > 0 && sliceoffset >= 0)
+ preslice = toast_decompress_datum_slice(tmp, slicelength + sliceoffset);
+ else
+ preslice = toast_decompress_datum(tmp);
+
+ if (tmp != attr)
+ pfree(tmp);
+ }
+
+ if (VARATT_IS_SHORT(preslice))
+ {
+ attrdata = VARDATA_SHORT(preslice);
+ attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
+ }
+ else
+ {
+ attrdata = VARDATA(preslice);
+ attrsize = VARSIZE(preslice) - VARHDRSZ;
+ }
+
+ /* slicing of datum for compressed cases and plain value */
+
+ if (sliceoffset >= attrsize)
+ {
+ sliceoffset = 0;
+ slicelength = 0;
+ }
+
+ if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
+ slicelength = attrsize - sliceoffset;
+
+ result = (struct varlena *) palloc(slicelength + VARHDRSZ);
+ SET_VARSIZE(result, slicelength + VARHDRSZ);
+
+ memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
+
+ if (preslice != attr)
+ pfree(preslice);
+
+ return result;
+}
+
+/* ----------
+ * toast_fetch_datum -
+ *
+ * Reconstruct an in memory Datum from the chunks saved
+ * in the toast relation
+ * ----------
+ */
+static struct varlena *
+toast_fetch_datum(struct varlena *attr)
+{
+ Relation toastrel;
+ Relation *toastidxs;
+ ScanKeyData toastkey;
+ SysScanDesc toastscan;
+ HeapTuple ttup;
+ TupleDesc toasttupDesc;
+ struct varlena *result;
+ struct varatt_external toast_pointer;
+ int32 ressize;
+ int32 residx,
+ nextidx;
+ int32 numchunks;
+ Pointer chunk;
+ bool isnull;
+ char *chunkdata;
+ int32 chunksize;
+ int num_indexes;
+ int validIndex;
+ SnapshotData SnapshotToast;
+
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
+
+ /* Must copy to access aligned fields */
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ ressize = toast_pointer.va_extsize;
+ numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
+
+ result = (struct varlena *) palloc(ressize + VARHDRSZ);
+
+ if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+ SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
+ else
+ SET_VARSIZE(result, ressize + VARHDRSZ);
+
+ /*
+ * Open the toast relation and its indexes
+ */
+ toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
+ toasttupDesc = toastrel->rd_att;
+
+ /* Look for the valid index of the toast relation */
+ validIndex = toast_open_indexes(toastrel,
+ AccessShareLock,
+ &toastidxs,
+ &num_indexes);
+
+ /*
+ * Setup a scan key to fetch from the index by va_valueid
+ */
+ ScanKeyInit(&toastkey,
+ (AttrNumber) 1,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(toast_pointer.va_valueid));
+
+ /*
+ * Read the chunks by index
+ *
+ * Note that because the index is actually on (valueid, chunkidx) we will
+ * see the chunks in chunkidx order, even though we didn't explicitly ask
+ * for it.
+ */
+ nextidx = 0;
+
+ init_toast_snapshot(&SnapshotToast);
+ toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+ &SnapshotToast, 1, &toastkey);
+ while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
+ {
+ /*
+ * Have a chunk, extract the sequence number and the data
+ */
+ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
+ Assert(!isnull);
+ chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
+ Assert(!isnull);
+ if (!VARATT_IS_EXTENDED(chunk))
+ {
+ chunksize = VARSIZE(chunk) - VARHDRSZ;
+ chunkdata = VARDATA(chunk);
+ }
+ else if (VARATT_IS_SHORT(chunk))
+ {
+ /* could happen due to heap_form_tuple doing its thing */
+ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
+ chunkdata = VARDATA_SHORT(chunk);
+ }
+ else
+ {
+ /* should never happen */
+ elog(ERROR, "found toasted toast chunk for toast value %u in %s",
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
+ chunksize = 0; /* keep compiler quiet */
+ chunkdata = NULL;
+ }
+
+ /*
+ * Some checks on the data we've found
+ */
+ if (residx != nextidx)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s",
+ residx, nextidx,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel))));
+ if (residx < numchunks - 1)
+ {
+ if (chunksize != TOAST_MAX_CHUNK_SIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
+ chunksize, (int) TOAST_MAX_CHUNK_SIZE,
+ residx, numchunks,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel))));
+ }
+ else if (residx == numchunks - 1)
+ {
+ if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
+ chunksize,
+ (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
+ residx,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel))));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
+ residx,
+ 0, numchunks - 1,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel))));
+
+ /*
+ * Copy the data into proper place in our result
+ */
+ memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
+ chunkdata,
+ chunksize);
+
+ nextidx++;
+ }
+
+ /*
+ * Final checks that we successfully fetched the datum
+ */
+ if (nextidx != numchunks)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("missing chunk number %d for toast value %u in %s",
+ nextidx,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel))));
+
+ /*
+ * End scan and close relations
+ */
+ systable_endscan_ordered(toastscan);
+ toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
+ table_close(toastrel, AccessShareLock);
+
+ return result;
+}
+
+/* ----------
+ * toast_fetch_datum_slice -
+ *
+ * Reconstruct a segment of a Datum from the chunks saved
+ * in the toast relation
+ *
+ * Note that this function only supports non-compressed external datums.
+ * ----------
+ */
+static struct varlena *
+toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
+{
+ Relation toastrel;
+ Relation *toastidxs;
+ ScanKeyData toastkey[3];
+ int nscankeys;
+ SysScanDesc toastscan;
+ HeapTuple ttup;
+ TupleDesc toasttupDesc;
+ struct varlena *result;
+ struct varatt_external toast_pointer;
+ int32 attrsize;
+ int32 residx;
+ int32 nextidx;
+ int numchunks;
+ int startchunk;
+ int endchunk;
+ int32 startoffset;
+ int32 endoffset;
+ int totalchunks;
+ Pointer chunk;
+ bool isnull;
+ char *chunkdata;
+ int32 chunksize;
+ int32 chcpystrt;
+ int32 chcpyend;
+ int num_indexes;
+ int validIndex;
+ SnapshotData SnapshotToast;
+
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
+
+ /* Must copy to access aligned fields */
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ /*
+ * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
+ * we can't return a compressed datum which is meaningful to toast later
+ */
+ Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+
+ attrsize = toast_pointer.va_extsize;
+ totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
+
+ if (sliceoffset >= attrsize)
+ {
+ sliceoffset = 0;
+ length = 0;
+ }
+
+ if (((sliceoffset + length) > attrsize) || length < 0)
+ length = attrsize - sliceoffset;
+
+ result = (struct varlena *) palloc(length + VARHDRSZ);
+
+ SET_VARSIZE(result, length + VARHDRSZ);
+
+ if (length == 0)
+ return result; /* Can save a lot of work at this point! */
+
+ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
+ endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
+ numchunks = (endchunk - startchunk) + 1;
+
+ startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
+ endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
+
+ /*
+ * Open the toast relation and its indexes
+ */
+ toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
+ toasttupDesc = toastrel->rd_att;
+
+ /* Look for the valid index of toast relation */
+ validIndex = toast_open_indexes(toastrel,
+ AccessShareLock,
+ &toastidxs,
+ &num_indexes);
+
+ /*
+ * Setup a scan key to fetch from the index. This is either two keys or
+ * three depending on the number of chunks.
+ */
+ ScanKeyInit(&toastkey[0],
+ (AttrNumber) 1,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(toast_pointer.va_valueid));
+
+ /*
+ * Use equality condition for one chunk, a range condition otherwise:
+ */
+ if (numchunks == 1)
+ {
+ ScanKeyInit(&toastkey[1],
+ (AttrNumber) 2,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(startchunk));
+ nscankeys = 2;
+ }
+ else
+ {
+ ScanKeyInit(&toastkey[1],
+ (AttrNumber) 2,
+ BTGreaterEqualStrategyNumber, F_INT4GE,
+ Int32GetDatum(startchunk));
+ ScanKeyInit(&toastkey[2],
+ (AttrNumber) 2,
+ BTLessEqualStrategyNumber, F_INT4LE,
+ Int32GetDatum(endchunk));
+ nscankeys = 3;
+ }
+
+ /*
+ * Read the chunks by index
+ *
+ * The index is on (valueid, chunkidx) so they will come in order
+ */
+ init_toast_snapshot(&SnapshotToast);
+ nextidx = startchunk;
+ toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+ &SnapshotToast, nscankeys, toastkey);
+ while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
+ {
+ /*
+ * Have a chunk, extract the sequence number and the data
+ */
+ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
+ Assert(!isnull);
+ chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
+ Assert(!isnull);
+ if (!VARATT_IS_EXTENDED(chunk))
+ {
+ chunksize = VARSIZE(chunk) - VARHDRSZ;
+ chunkdata = VARDATA(chunk);
+ }
+ else if (VARATT_IS_SHORT(chunk))
+ {
+ /* could happen due to heap_form_tuple doing its thing */
+ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
+ chunkdata = VARDATA_SHORT(chunk);
+ }
+ else
+ {
+ /* should never happen */
+ elog(ERROR, "found toasted toast chunk for toast value %u in %s",
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
+ chunksize = 0; /* keep compiler quiet */
+ chunkdata = NULL;
+ }
+
+ /*
+ * Some checks on the data we've found
+ */
+ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
+ elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
+ residx, nextidx,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
+ if (residx < totalchunks - 1)
+ {
+ if (chunksize != TOAST_MAX_CHUNK_SIZE)
+ elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
+ chunksize, (int) TOAST_MAX_CHUNK_SIZE,
+ residx, totalchunks,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
+ }
+ else if (residx == totalchunks - 1)
+ {
+ if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
+ elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
+ chunksize,
+ (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
+ residx,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
+ }
+ else
+ elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
+ residx,
+ 0, totalchunks - 1,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
+
+ /*
+ * Copy the data into proper place in our result
+ */
+ chcpystrt = 0;
+ chcpyend = chunksize - 1;
+ if (residx == startchunk)
+ chcpystrt = startoffset;
+ if (residx == endchunk)
+ chcpyend = endoffset;
+
+ memcpy(VARDATA(result) +
+ (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
+ chunkdata + chcpystrt,
+ (chcpyend - chcpystrt) + 1);
+
+ nextidx++;
+ }
+
+ /*
+ * Final checks that we successfully fetched the datum
+ */
+ if (nextidx != (endchunk + 1))
+ elog(ERROR, "missing chunk number %d for toast value %u in %s",
+ nextidx,
+ toast_pointer.va_valueid,
+ RelationGetRelationName(toastrel));
+
+ /*
+ * End scan and close relations
+ */
+ systable_endscan_ordered(toastscan);
+ toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
+ table_close(toastrel, AccessShareLock);
+
+ return result;
+}
+
+/* ----------
+ * toast_decompress_datum -
+ *
+ * Decompress a compressed version of a varlena datum
+ */
+static struct varlena *
+toast_decompress_datum(struct varlena *attr)
+{
+ struct varlena *result;
+
+ Assert(VARATT_IS_COMPRESSED(attr));
+
+ result = (struct varlena *)
+ palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
+ SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
+
+ if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
+ VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
+ VARDATA(result),
+ TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
+ elog(ERROR, "compressed data is corrupted");
+
+ return result;
+}
+
+
+/* ----------
+ * toast_decompress_datum_slice -
+ *
+ * Decompress the front of a compressed version of a varlena datum.
+ * offset handling happens in heap_tuple_untoast_attr_slice.
+ * Here we just decompress a slice from the front.
+ */
+static struct varlena *
+toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
+{
+ struct varlena *result;
+ int32 rawsize;
+
+ Assert(VARATT_IS_COMPRESSED(attr));
+
+ result = (struct varlena *) palloc(slicelength + VARHDRSZ);
+
+ rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
+ VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
+ VARDATA(result),
+ slicelength, false);
+ if (rawsize < 0)
+ elog(ERROR, "compressed data is corrupted");
+
+ SET_VARSIZE(result, rawsize + VARHDRSZ);
+ return result;
+}
+
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * (including the VARHDRSZ header)
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+ struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+ Size result;
+
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ {
+ /* va_rawsize is the size of the original datum -- including header */
+ struct varatt_external toast_pointer;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+ result = toast_pointer.va_rawsize;
+ }
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ struct varatt_indirect toast_pointer;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
+
+ return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ result = EOH_get_flat_size(DatumGetEOHP(value));
+ }
+ else if (VARATT_IS_COMPRESSED(attr))
+ {
+ /* here, va_rawsize is just the payload size */
+ result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
+ }
+ else if (VARATT_IS_SHORT(attr))
+ {
+ /*
+ * we have to normalize the header length to VARHDRSZ or else the
+ * callers of this function will be confused.
+ */
+ result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
+ }
+ else
+ {
+ /* plain untoasted datum */
+ result = VARSIZE(attr);
+ }
+ return result;
+}
+
+/* ----------
+ * toast_datum_size
+ *
+ * Return the physical storage size (possibly compressed) of a varlena datum
+ * ----------
+ */
+Size
+toast_datum_size(Datum value)
+{
+ struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+ Size result;
+
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ {
+ /*
+ * Attribute is stored externally - return the extsize whether
+ * compressed or not. We do not count the size of the toast pointer
+ * ... should we?
+ */
+ struct varatt_external toast_pointer;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+ result = toast_pointer.va_extsize;
+ }
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ struct varatt_indirect toast_pointer;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+ return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ result = EOH_get_flat_size(DatumGetEOHP(value));
+ }
+ else if (VARATT_IS_SHORT(attr))
+ {
+ result = VARSIZE_SHORT(attr);
+ }
+ else
+ {
+ /*
+ * Attribute is stored inline either compressed or not, just calculate
+ * the size of the datum in either case.
+ */
+ result = VARSIZE(attr);
+ }
+ return result;
+}
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index a48a6cd757f..cc948958d7e 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -18,7 +18,7 @@
* (In performance-critical code paths we can use pg_detoast_datum_packed
* and the appropriate access macros to avoid that overhead.) Note that this
* conversion is performed directly in heap_form_tuple, without invoking
- * tuptoaster.c.
+ * heaptoast.c.
*
* This change will break any code that assumes it needn't detoast values
* that have been put into a tuple but never sent to disk. Hopefully there
@@ -57,9 +57,9 @@
#include "postgres.h"
+#include "access/heaptoast.h"
#include "access/sysattr.h"
#include "access/tupdesc_details.h"
-#include "access/tuptoaster.h"
#include "executor/tuptable.h"
#include "utils/expandeddatum.h"
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index cb23be859de..07586201b9d 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -16,10 +16,17 @@
#include "postgres.h"
+#include "access/detoast.h"
+#include "access/heaptoast.h"
#include "access/htup_details.h"
#include "access/itup.h"
-#include "access/tuptoaster.h"
+#include "access/toast_internals.h"
+/*
+ * This enables de-toasting of index entries. Needed until VACUUM is
+ * smart enough to rebuild indexes from scratch.
+ */
+#define TOAST_INDEX_HACK
/* ----------------------------------------------------------------
* index_ tuple interface routines
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 42647b05265..20f4ed3c386 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -19,11 +19,11 @@
#include "access/gist_private.h"
#include "access/hash.h"
+#include "access/heaptoast.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "access/reloptions.h"
#include "access/spgist.h"
-#include "access/tuptoaster.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "commands/tablespace.h"
diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
new file mode 100644
index 00000000000..a9712424901
--- /dev/null
+++ b/src/backend/access/common/toast_internals.c
@@ -0,0 +1,632 @@
+/*-------------------------------------------------------------------------
+ *
+ * toast_internals.c
+ * Functions for internal use by the TOAST system.
+ *
+ * Copyright (c) 2000-2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/access/common/toast_internals.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/heaptoast.h"
+#include "access/table.h"
+#include "access/toast_internals.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "common/pg_lzcompress.h"
+#include "miscadmin.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
+static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
+
+/* ----------
+ * toast_compress_datum -
+ *
+ * Create a compressed version of a varlena datum
+ *
+ * If we fail (ie, compressed result is actually bigger than original)
+ * then return NULL. We must not use compressed data if it'd expand
+ * the tuple!
+ *
+ * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
+ * copying them. But we can't handle external or compressed datums.
+ * ----------
+ */
+Datum
+toast_compress_datum(Datum value)
+{
+ struct varlena *tmp;
+ int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
+ int32 len;
+
+ Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
+ Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
+
+ /*
+ * No point in wasting a palloc cycle if value size is out of the allowed
+ * range for compression
+ */
+ if (valsize < PGLZ_strategy_default->min_input_size ||
+ valsize > PGLZ_strategy_default->max_input_size)
+ return PointerGetDatum(NULL);
+
+ tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
+ TOAST_COMPRESS_HDRSZ);
+
+ /*
+ * We recheck the actual size even if pglz_compress() reports success,
+ * because it might be satisfied with having saved as little as one byte
+ * in the compressed data --- which could turn into a net loss once you
+ * consider header and alignment padding. Worst case, the compressed
+ * format might require three padding bytes (plus header, which is
+ * included in VARSIZE(tmp)), whereas the uncompressed format would take
+ * only one header byte and no padding if the value is short enough. So
+ * we insist on a savings of more than 2 bytes to ensure we have a gain.
+ */
+ len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
+ valsize,
+ TOAST_COMPRESS_RAWDATA(tmp),
+ PGLZ_strategy_default);
+ if (len >= 0 &&
+ len + TOAST_COMPRESS_HDRSZ < valsize - 2)
+ {
+ TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize);
+ SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ);
+ /* successful compression */
+ return PointerGetDatum(tmp);
+ }
+ else
+ {
+ /* incompressible data */
+ pfree(tmp);
+ return PointerGetDatum(NULL);
+ }
+}
+
+/* ----------
+ * toast_save_datum -
+ *
+ * Save one single datum into the secondary relation and return
+ * a Datum reference for it.
+ *
+ * rel: the main relation we're working with (not the toast rel!)
+ * value: datum to be pushed to toast storage
+ * oldexternal: if not NULL, toast pointer previously representing the datum
+ * options: options to be passed to heap_insert() for toast rows
+ * ----------
+ */
+Datum
+toast_save_datum(Relation rel, Datum value,
+ struct varlena *oldexternal, int options)
+{
+ Relation toastrel;
+ Relation *toastidxs;
+ HeapTuple toasttup;
+ TupleDesc toasttupDesc;
+ Datum t_values[3];
+ bool t_isnull[3];
+ CommandId mycid = GetCurrentCommandId(true);
+ struct varlena *result;
+ struct varatt_external toast_pointer;
+ union
+ {
+ struct varlena hdr;
+ /* this is to make the union big enough for a chunk: */
+ char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
+ /* ensure union is aligned well enough: */
+ int32 align_it;
+ } chunk_data;
+ int32 chunk_size;
+ int32 chunk_seq = 0;
+ char *data_p;
+ int32 data_todo;
+ Pointer dval = DatumGetPointer(value);
+ int num_indexes;
+ int validIndex;
+
+ Assert(!VARATT_IS_EXTERNAL(value));
+
+ /*
+ * Open the toast relation and its indexes. We can use the index to check
+ * uniqueness of the OID we assign to the toasted item, even though it has
+ * additional columns besides OID.
+ */
+ toastrel = table_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
+ toasttupDesc = toastrel->rd_att;
+
+ /* Open all the toast indexes and look for the valid one */
+ validIndex = toast_open_indexes(toastrel,
+ RowExclusiveLock,
+ &toastidxs,
+ &num_indexes);
+
+ /*
+ * Get the data pointer and length, and compute va_rawsize and va_extsize.
+ *
+ * va_rawsize is the size of the equivalent fully uncompressed datum, so
+ * we have to adjust for short headers.
+ *
+ * va_extsize is the actual size of the data payload in the toast records.
+ */
+ if (VARATT_IS_SHORT(dval))
+ {
+ data_p = VARDATA_SHORT(dval);
+ data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
+ toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
+ toast_pointer.va_extsize = data_todo;
+ }
+ else if (VARATT_IS_COMPRESSED(dval))
+ {
+ data_p = VARDATA(dval);
+ data_todo = VARSIZE(dval) - VARHDRSZ;
+ /* rawsize in a compressed datum is just the size of the payload */
+ toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
+ toast_pointer.va_extsize = data_todo;
+ /* Assert that the numbers look like it's compressed */
+ Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+ }
+ else
+ {
+ data_p = VARDATA(dval);
+ data_todo = VARSIZE(dval) - VARHDRSZ;
+ toast_pointer.va_rawsize = VARSIZE(dval);
+ toast_pointer.va_extsize = data_todo;
+ }
+
+ /*
+ * Insert the correct table OID into the result TOAST pointer.
+ *
+ * Normally this is the actual OID of the target toast table, but during
+ * table-rewriting operations such as CLUSTER, we have to insert the OID
+ * of the table's real permanent toast table instead. rd_toastoid is set
+ * if we have to substitute such an OID.
+ */
+ if (OidIsValid(rel->rd_toastoid))
+ toast_pointer.va_toastrelid = rel->rd_toastoid;
+ else
+ toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
+
+ /*
+ * Choose an OID to use as the value ID for this toast value.
+ *
+ * Normally we just choose an unused OID within the toast table. But
+ * during table-rewriting operations where we are preserving an existing
+ * toast table OID, we want to preserve toast value OIDs too. So, if
+ * rd_toastoid is set and we had a prior external value from that same
+ * toast table, re-use its value ID. If we didn't have a prior external
+ * value (which is a corner case, but possible if the table's attstorage
+ * options have been changed), we have to pick a value ID that doesn't
+ * conflict with either new or existing toast value OIDs.
+ */
+ if (!OidIsValid(rel->rd_toastoid))
+ {
+ /* normal case: just choose an unused OID */
+ toast_pointer.va_valueid =
+ GetNewOidWithIndex(toastrel,
+ RelationGetRelid(toastidxs[validIndex]),
+ (AttrNumber) 1);
+ }
+ else
+ {
+ /* rewrite case: check to see if value was in old toast table */
+ toast_pointer.va_valueid = InvalidOid;
+ if (oldexternal != NULL)
+ {
+ struct varatt_external old_toast_pointer;
+
+ Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
+ /* Must copy to access aligned fields */
+ VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
+ if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
+ {
+ /* This value came from the old toast table; reuse its OID */
+ toast_pointer.va_valueid = old_toast_pointer.va_valueid;
+
+ /*
+ * There is a corner case here: the table rewrite might have
+ * to copy both live and recently-dead versions of a row, and
+ * those versions could easily reference the same toast value.
+ * When we copy the second or later version of such a row,
+ * reusing the OID will mean we select an OID that's already
+ * in the new toast table. Check for that, and if so, just
+ * fall through without writing the data again.
+ *
+ * While annoying and ugly-looking, this is a good thing
+ * because it ensures that we wind up with only one copy of
+ * the toast value when there is only one copy in the old
+ * toast table. Before we detected this case, we'd have made
+ * multiple copies, wasting space; and what's worse, the
+ * copies belonging to already-deleted heap tuples would not
+ * be reclaimed by VACUUM.
+ */
+ if (toastrel_valueid_exists(toastrel,
+ toast_pointer.va_valueid))
+ {
+ /* Match, so short-circuit the data storage loop below */
+ data_todo = 0;
+ }
+ }
+ }
+ if (toast_pointer.va_valueid == InvalidOid)
+ {
+ /*
+ * new value; must choose an OID that doesn't conflict in either
+ * old or new toast table
+ */
+ do
+ {
+ toast_pointer.va_valueid =
+ GetNewOidWithIndex(toastrel,
+ RelationGetRelid(toastidxs[validIndex]),
+ (AttrNumber) 1);
+ } while (toastid_valueid_exists(rel->rd_toastoid,
+ toast_pointer.va_valueid));
+ }
+ }
+
+ /*
+ * Initialize constant parts of the tuple data
+ */
+ t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
+ t_values[2] = PointerGetDatum(&chunk_data);
+ t_isnull[0] = false;
+ t_isnull[1] = false;
+ t_isnull[2] = false;
+
+ /*
+ * Split up the item into chunks
+ */
+ while (data_todo > 0)
+ {
+ int i;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Calculate the size of this chunk
+ */
+ chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
+
+ /*
+ * Build a tuple and store it
+ */
+ t_values[1] = Int32GetDatum(chunk_seq++);
+ SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
+ memcpy(VARDATA(&chunk_data), data_p, chunk_size);
+ toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
+
+ heap_insert(toastrel, toasttup, mycid, options, NULL);
+
+ /*
+ * Create the index entry. We cheat a little here by not using
+ * FormIndexDatum: this relies on the knowledge that the index columns
+ * are the same as the initial columns of the table for all the
+ * indexes. We also cheat by not providing an IndexInfo: this is okay
+ * for now because btree doesn't need one, but we might have to be
+ * more honest someday.
+ *
+ * Note also that there had better not be any user-created index on
+ * the TOAST table, since we don't bother to update anything else.
+ */
+ for (i = 0; i < num_indexes; i++)
+ {
+ /* Only index relations marked as ready can be updated */
+ if (toastidxs[i]->rd_index->indisready)
+ index_insert(toastidxs[i], t_values, t_isnull,
+ &(toasttup->t_self),
+ toastrel,
+ toastidxs[i]->rd_index->indisunique ?
+ UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+ NULL);
+ }
+
+ /*
+ * Free memory
+ */
+ heap_freetuple(toasttup);
+
+ /*
+ * Move on to next chunk
+ */
+ data_todo -= chunk_size;
+ data_p += chunk_size;
+ }
+
+ /*
+ * Done - close toast relation and its indexes
+ */
+ toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
+ table_close(toastrel, RowExclusiveLock);
+
+ /*
+ * Create the TOAST pointer value that we'll return
+ */
+ result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
+ SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
+ memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
+
+ return PointerGetDatum(result);
+}
+
+/* ----------
+ * toast_delete_datum -
+ *
+ * Delete a single external stored value.
+ * ----------
+ */
+void
+toast_delete_datum(Relation rel, Datum value, bool is_speculative)
+{
+ struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+ struct varatt_external toast_pointer;
+ Relation toastrel;
+ Relation *toastidxs;
+ ScanKeyData toastkey;
+ SysScanDesc toastscan;
+ HeapTuple toasttup;
+ int num_indexes;
+ int validIndex;
+ SnapshotData SnapshotToast;
+
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ return;
+
+ /* Must copy to access aligned fields */
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ /*
+ * Open the toast relation and its indexes
+ */
+ toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock);
+
+ /* Fetch valid relation used for process */
+ validIndex = toast_open_indexes(toastrel,
+ RowExclusiveLock,
+ &toastidxs,
+ &num_indexes);
+
+ /*
+ * Setup a scan key to find chunks with matching va_valueid
+ */
+ ScanKeyInit(&toastkey,
+ (AttrNumber) 1,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(toast_pointer.va_valueid));
+
+ /*
+ * Find all the chunks. (We don't actually care whether we see them in
+ * sequence or not, but since we've already locked the index we might as
+ * well use systable_beginscan_ordered.)
+ */
+ init_toast_snapshot(&SnapshotToast);
+ toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
+ &SnapshotToast, 1, &toastkey);
+ while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
+ {
+ /*
+ * Have a chunk, delete it
+ */
+ if (is_speculative)
+ heap_abort_speculative(toastrel, &toasttup->t_self);
+ else
+ simple_heap_delete(toastrel, &toasttup->t_self);
+ }
+
+ /*
+ * End scan and close relations
+ */
+ systable_endscan_ordered(toastscan);
+ toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
+ table_close(toastrel, RowExclusiveLock);
+}
+
+/* ----------
+ * toastrel_valueid_exists -
+ *
+ * Test whether a toast value with the given ID exists in the toast relation.
+ * For safety, we consider a value to exist if there are either live or dead
+ * toast rows with that ID; see notes for GetNewOidWithIndex().
+ * ----------
+ */
+static bool
+toastrel_valueid_exists(Relation toastrel, Oid valueid)
+{
+ bool result = false;
+ ScanKeyData toastkey;
+ SysScanDesc toastscan;
+ int num_indexes;
+ int validIndex;
+ Relation *toastidxs;
+
+ /* Fetch a valid index relation */
+ validIndex = toast_open_indexes(toastrel,
+ RowExclusiveLock,
+ &toastidxs,
+ &num_indexes);
+
+ /*
+ * Setup a scan key to find chunks with matching va_valueid
+ */
+ ScanKeyInit(&toastkey,
+ (AttrNumber) 1,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(valueid));
+
+ /*
+ * Is there any such chunk?
+ */
+ toastscan = systable_beginscan(toastrel,
+ RelationGetRelid(toastidxs[validIndex]),
+ true, SnapshotAny, 1, &toastkey);
+
+ if (systable_getnext(toastscan) != NULL)
+ result = true;
+
+ systable_endscan(toastscan);
+
+ /* Clean up */
+ toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
+
+ return result;
+}
+
+/* ----------
+ * toastid_valueid_exists -
+ *
+ * As above, but work from toast rel's OID not an open relation
+ * ----------
+ */
+static bool
+toastid_valueid_exists(Oid toastrelid, Oid valueid)
+{
+ bool result;
+ Relation toastrel;
+
+ toastrel = table_open(toastrelid, AccessShareLock);
+
+ result = toastrel_valueid_exists(toastrel, valueid);
+
+ table_close(toastrel, AccessShareLock);
+
+ return result;
+}
+
+/* ----------
+ * toast_get_valid_index
+ *
+ * Get OID of valid index associated to given toast relation. A toast
+ * relation can have only one valid index at the same time.
+ */
+Oid
+toast_get_valid_index(Oid toastoid, LOCKMODE lock)
+{
+ int num_indexes;
+ int validIndex;
+ Oid validIndexOid;
+ Relation *toastidxs;
+ Relation toastrel;
+
+ /* Open the toast relation */
+ toastrel = table_open(toastoid, lock);
+
+ /* Look for the valid index of the toast relation */
+ validIndex = toast_open_indexes(toastrel,
+ lock,
+ &toastidxs,
+ &num_indexes);
+ validIndexOid = RelationGetRelid(toastidxs[validIndex]);
+
+ /* Close the toast relation and all its indexes */
+ toast_close_indexes(toastidxs, num_indexes, lock);
+ table_close(toastrel, lock);
+
+ return validIndexOid;
+}
+
+/* ----------
+ * toast_open_indexes
+ *
+ * Get an array of the indexes associated to the given toast relation
+ * and return as well the position of the valid index used by the toast
+ * relation in this array. It is the responsibility of the caller of this
+ * function to close the indexes as well as free them.
+ */
+int
+toast_open_indexes(Relation toastrel,
+ LOCKMODE lock,
+ Relation **toastidxs,
+ int *num_indexes)
+{
+ int i = 0;
+ int res = 0;
+ bool found = false;
+ List *indexlist;
+ ListCell *lc;
+
+ /* Get index list of the toast relation */
+ indexlist = RelationGetIndexList(toastrel);
+ Assert(indexlist != NIL);
+
+ *num_indexes = list_length(indexlist);
+
+ /* Open all the index relations */
+ *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
+ foreach(lc, indexlist)
+ (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
+
+ /* Fetch the first valid index in list */
+ for (i = 0; i < *num_indexes; i++)
+ {
+ Relation toastidx = (*toastidxs)[i];
+
+ if (toastidx->rd_index->indisvalid)
+ {
+ res = i;
+ found = true;
+ break;
+ }
+ }
+
+ /*
+ * Free index list, not necessary anymore as relations are opened and a
+ * valid index has been found.
+ */
+ list_free(indexlist);
+
+ /*
+ * The toast relation should have one valid index, so something is going
+ * wrong if there is nothing.
+ */
+ if (!found)
+ elog(ERROR, "no valid index found for toast relation with Oid %u",
+ RelationGetRelid(toastrel));
+
+ return res;
+}
+
+/* ----------
+ * toast_close_indexes
+ *
+ * Close an array of indexes for a toast relation and free it. This should
+ * be called for a set of indexes opened previously with toast_open_indexes.
+ */
+void
+toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
+{
+ int i;
+
+ /* Close relations and clean up things */
+ for (i = 0; i < num_indexes; i++)
+ index_close(toastidxs[i], lock);
+ pfree(toastidxs);
+}
+
+/* ----------
+ * init_toast_snapshot
+ *
+ * Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot
+ * to initialize the TOAST snapshot; since we don't know which one to use,
+ * just use the oldest one. This is safe: at worst, we will get a "snapshot
+ * too old" error that might have been avoided otherwise.
+ */
+void
+init_toast_snapshot(Snapshot toast_snapshot)
+{
+ Snapshot snapshot = GetOldestSnapshot();
+
+ if (snapshot == NULL)
+ elog(ERROR, "no known snapshots");
+
+ InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
+}