summaryrefslogtreecommitdiff
path: root/src/include/access/relscan.h
diff options
context:
space:
mode:
authorAndres Freund2019-03-11 19:46:41 +0000
committerAndres Freund2019-03-11 19:46:41 +0000
commitc2fe139c201c48f1133e9fbea2dd99b8efe2fadd (patch)
treeab0a6261b412b8284b6c91af158f72af97e02a35 /src/include/access/relscan.h
parenta47841528107921f02c280e0c5f91c5a1d86adb0 (diff)
tableam: Add and use scan APIs.
Too allow table accesses to be not directly dependent on heap, several new abstractions are needed. Specifically: 1) Heap scans need to be generalized into table scans. Do this by introducing TableScanDesc, which will be the "base class" for individual AMs. This contains the AM independent fields from HeapScanDesc. The previous heap_{beginscan,rescan,endscan} et al. have been replaced with a table_ version. There's no direct replacement for heap_getnext(), as that returned a HeapTuple, which is undesirable for a other AMs. Instead there's table_scan_getnextslot(). But note that heap_getnext() lives on, it's still used widely to access catalog tables. This is achieved by new scan_begin, scan_end, scan_rescan, scan_getnextslot callbacks. 2) The portion of parallel scans that's shared between backends need to be able to do so without the user doing per-AM work. To achieve that new parallelscan_{estimate, initialize, reinitialize} callbacks are introduced, which operate on a new ParallelTableScanDesc, which again can be subclassed by AMs. As it is likely that several AMs are going to be block oriented, block oriented callbacks that can be shared between such AMs are provided and used by heap. table_block_parallelscan_{estimate, intiialize, reinitialize} as callbacks, and table_block_parallelscan_{nextpage, init} for use in AMs. These operate on a ParallelBlockTableScanDesc. 3) Index scans need to be able to access tables to return a tuple, and there needs to be state across individual accesses to the heap to store state like buffers. That's now handled by introducing a sort-of-scan IndexFetchTable, which again is intended to be subclassed by individual AMs (for heap IndexFetchHeap). The relevant callbacks for an AM are index_fetch_{end, begin, reset} to create the necessary state, and index_fetch_tuple to retrieve an indexed tuple. Note that index_fetch_tuple implementations need to be smarter than just blindly fetching the tuples for AMs that have optimizations similar to heap's HOT - the currently alive tuple in the update chain needs to be fetched if appropriate. Similar to table_scan_getnextslot(), it's undesirable to continue to return HeapTuples. Thus index_fetch_heap (might want to rename that later) now accepts a slot as an argument. Core code doesn't have a lot of call sites performing index scans without going through the systable_* API (in contrast to loads of heap_getnext calls and working directly with HeapTuples). Index scans now store the result of a search in IndexScanDesc->xs_heaptid, rather than xs_ctup->t_self. As the target is not generally a HeapTuple anymore that seems cleaner. To be able to sensible adapt code to use the above, two further callbacks have been introduced: a) slot_callbacks returns a TupleTableSlotOps* suitable for creating slots capable of holding a tuple of the AMs type. table_slot_callbacks() and table_slot_create() are based upon that, but have additional logic to deal with views, foreign tables, etc. While this change could have been done separately, nearly all the call sites that needed to be adapted for the rest of this commit also would have been needed to be adapted for table_slot_callbacks(), making separation not worthwhile. b) tuple_satisfies_snapshot checks whether the tuple in a slot is currently visible according to a snapshot. That's required as a few places now don't have a buffer + HeapTuple around, but a slot (which in heap's case internally has that information). Additionally a few infrastructure changes were needed: I) SysScanDesc, as used by systable_{beginscan, getnext} et al. now internally uses a slot to keep track of tuples. While systable_getnext() still returns HeapTuples, and will so for the foreseeable future, the index API (see 1) above) now only deals with slots. The remainder, and largest part, of this commit is then adjusting all scans in postgres to use the new APIs. Author: Andres Freund, Haribabu Kommi, Alvaro Herrera Discussion: https://postgr.es/m/[email protected] https://postgr.es/m/[email protected]
Diffstat (limited to 'src/include/access/relscan.h')
-rw-r--r--src/include/access/relscan.h122
1 files changed, 68 insertions, 54 deletions
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index b78ef2f47d0..82de4cdcf2c 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -21,63 +21,76 @@
#include "storage/spin.h"
#include "utils/relcache.h"
+
+struct ParallelTableScanDescData;
+
/*
- * Shared state for parallel heap scan.
- *
- * Each backend participating in a parallel heap scan has its own
- * HeapScanDesc in backend-private memory, and those objects all contain
- * a pointer to this structure. The information here must be sufficient
- * to properly initialize each new HeapScanDesc as workers join the scan,
- * and it must act as a font of block numbers for those workers.
+ * Generic descriptor for table scans. This is the base-class for table scans,
+ * which needs to be embedded in the scans of individual AMs.
*/
-typedef struct ParallelHeapScanDescData
-{
- Oid phs_relid; /* OID of relation to scan */
- bool phs_syncscan; /* report location to syncscan logic? */
- BlockNumber phs_nblocks; /* # blocks in relation at start of scan */
- slock_t phs_mutex; /* mutual exclusion for setting startblock */
- BlockNumber phs_startblock; /* starting block number */
- pg_atomic_uint64 phs_nallocated; /* number of blocks allocated to
- * workers so far. */
- bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */
- char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
-} ParallelHeapScanDescData;
-
-typedef struct HeapScanDescData
+typedef struct TableScanDescData
{
/* scan parameters */
Relation rs_rd; /* heap relation descriptor */
struct SnapshotData *rs_snapshot; /* snapshot to see */
int rs_nkeys; /* number of scan keys */
- struct ScanKeyData *rs_key; /* array of scan key descriptors */
+ struct ScanKeyData *rs_key; /* array of scan key descriptors */
bool rs_bitmapscan; /* true if this is really a bitmap scan */
bool rs_samplescan; /* true if this is really a sample scan */
bool rs_pageatatime; /* verify visibility page-at-a-time? */
bool rs_allow_strat; /* allow or disallow use of access strategy */
bool rs_allow_sync; /* allow or disallow use of syncscan */
bool rs_temp_snap; /* unregister snapshot at scan end? */
-
- /* state set up at initscan time */
- BlockNumber rs_nblocks; /* total number of blocks in rel */
- BlockNumber rs_startblock; /* block # to start at */
- BlockNumber rs_numblocks; /* max number of blocks to scan */
- /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
- BufferAccessStrategy rs_strategy; /* access strategy for reads */
bool rs_syncscan; /* report location to syncscan logic? */
- /* scan current state */
- bool rs_inited; /* false = scan not init'd yet */
- HeapTupleData rs_ctup; /* current tuple in scan, if any */
- BlockNumber rs_cblock; /* current block # in scan, if any */
- Buffer rs_cbuf; /* current buffer in scan, if any */
- /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
- struct ParallelHeapScanDescData *rs_parallel; /* parallel scan information */
+ struct ParallelTableScanDescData *rs_parallel; /* parallel scan
+ * information */
- /* these fields only used in page-at-a-time mode and for bitmap scans */
- int rs_cindex; /* current tuple's index in vistuples */
- int rs_ntuples; /* number of visible tuples on page */
- OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
-} HeapScanDescData;
+} TableScanDescData;
+typedef struct TableScanDescData *TableScanDesc;
+
+/*
+ * Shared state for parallel table scan.
+ *
+ * Each backend participating in a parallel table scan has its own
+ * TableScanDesc in backend-private memory, and those objects all contain a
+ * pointer to this structure. The information here must be sufficient to
+ * properly initialize each new TableScanDesc as workers join the scan, and it
+ * must act as a information what to scan for those workers.
+ */
+typedef struct ParallelTableScanDescData
+{
+ Oid phs_relid; /* OID of relation to scan */
+ bool phs_syncscan; /* report location to syncscan logic? */
+ bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */
+ Size phs_snapshot_off; /* data for snapshot */
+} ParallelTableScanDescData;
+typedef struct ParallelTableScanDescData *ParallelTableScanDesc;
+
+/*
+ * Shared state for parallel table scans, for block oriented storage.
+ */
+typedef struct ParallelBlockTableScanDescData
+{
+ ParallelTableScanDescData base;
+
+ BlockNumber phs_nblocks; /* # blocks in relation at start of scan */
+ slock_t phs_mutex; /* mutual exclusion for setting startblock */
+ BlockNumber phs_startblock; /* starting block number */
+ pg_atomic_uint64 phs_nallocated; /* number of blocks allocated to
+ * workers so far. */
+} ParallelBlockTableScanDescData;
+typedef struct ParallelBlockTableScanDescData *ParallelBlockTableScanDesc;
+
+/*
+ * Base class for fetches from a table via an index. This is the base-class
+ * for such scans, which needs to be embedded in the respective struct for
+ * individual AMs.
+ */
+typedef struct IndexFetchTableData
+{
+ Relation rel;
+} IndexFetchTableData;
/*
* We use the same IndexScanDescData structure for both amgettuple-based
@@ -92,7 +105,7 @@ typedef struct IndexScanDescData
struct SnapshotData *xs_snapshot; /* snapshot to see */
int numberOfKeys; /* number of index qualifier conditions */
int numberOfOrderBys; /* number of ordering operators */
- struct ScanKeyData *keyData; /* array of index qualifier descriptors */
+ struct ScanKeyData *keyData; /* array of index qualifier descriptors */
struct ScanKeyData *orderByData; /* array of ordering op descriptors */
bool xs_want_itup; /* caller requests index tuples */
bool xs_temp_snap; /* unregister snapshot at scan end? */
@@ -115,12 +128,13 @@ typedef struct IndexScanDescData
IndexTuple xs_itup; /* index tuple returned by AM */
struct TupleDescData *xs_itupdesc; /* rowtype descriptor of xs_itup */
HeapTuple xs_hitup; /* index data returned by AM, as HeapTuple */
- struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */
+ struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */
+
+ ItemPointerData xs_heaptid; /* result */
+ bool xs_heap_continue; /* T if must keep walking, potential
+ * further results */
+ IndexFetchTableData *xs_heapfetch;
- /* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */
- HeapTupleData xs_ctup; /* current heap tuple, if any */
- Buffer xs_cbuf; /* current heap buffer in scan, if any */
- /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
bool xs_recheck; /* T means scan keys must be rechecked */
/*
@@ -134,9 +148,6 @@ typedef struct IndexScanDescData
bool *xs_orderbynulls;
bool xs_recheckorderby;
- /* state data for traversing HOT chains in index_getnext */
- bool xs_continue_hot; /* T if must keep walking HOT chain */
-
/* parallel index scan information, in shared memory */
struct ParallelIndexScanDescData *parallel_scan;
} IndexScanDescData;
@@ -150,14 +161,17 @@ typedef struct ParallelIndexScanDescData
char ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
} ParallelIndexScanDescData;
-/* Struct for heap-or-index scans of system tables */
+struct TupleTableSlot;
+
+/* Struct for storage-or-index scans of system tables */
typedef struct SysScanDescData
{
Relation heap_rel; /* catalog being scanned */
Relation irel; /* NULL if doing heap scan */
- struct HeapScanDescData *scan; /* only valid in heap-scan case */
- struct IndexScanDescData *iscan; /* only valid in index-scan case */
- struct SnapshotData *snapshot; /* snapshot to unregister at end of scan */
+ struct TableScanDescData *scan; /* only valid in storage-scan case */
+ struct IndexScanDescData *iscan; /* only valid in index-scan case */
+ struct SnapshotData *snapshot; /* snapshot to unregister at end of scan */
+ struct TupleTableSlot *slot;
} SysScanDescData;
#endif /* RELSCAN_H */