summaryrefslogtreecommitdiff
path: root/src/include/access/nbtree.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/access/nbtree.h')
-rw-r--r--src/include/access/nbtree.h62
1 files changed, 27 insertions, 35 deletions
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 90a68375a2e..5fb523ecec3 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -925,13 +925,13 @@ typedef BTVacuumPostingData *BTVacuumPosting;
* Index scans work a page at a time: we pin and read-lock the page, identify
* all the matching items on the page and save them in BTScanPosData, then
* release the read-lock while returning the items to the caller for
- * processing. This approach minimizes lock/unlock traffic. Note that we
- * keep the pin on the index page until the caller is done with all the items
- * (this is needed for VACUUM synchronization, see nbtree/README). When we
- * are ready to step to the next page, if the caller has told us any of the
- * items were killed, we re-lock the page to mark them killed, then unlock.
- * Finally we drop the pin and step to the next page in the appropriate
- * direction.
+ * processing. This approach minimizes lock/unlock traffic. We must always
+ * drop the lock to make it okay for caller to process the returned items.
+ * Whether or not we can also release the pin during this window will vary.
+ * We drop the pin eagerly (when safe) to avoid blocking progress by VACUUM
+ * (see nbtree/README section about making concurrent TID recycling safe).
+ * We'll always release both the lock and the pin on the current page before
+ * moving on to its sibling page.
*
* If we are doing an index-only scan, we save the entire IndexTuple for each
* matched item, otherwise only its heap TID and offset. The IndexTuples go
@@ -950,28 +950,15 @@ typedef struct BTScanPosItem /* what we remember about each match */
typedef struct BTScanPosData
{
- Buffer buf; /* if valid, the buffer is pinned */
+ Buffer buf; /* currPage buf (invalid means unpinned) */
- XLogRecPtr lsn; /* pos in the WAL stream when page was read */
+ /* page details as of the saved position's call to _bt_readpage */
BlockNumber currPage; /* page referenced by items array */
- BlockNumber nextPage; /* page's right link when we scanned it */
+ BlockNumber prevPage; /* currPage's left link */
+ BlockNumber nextPage; /* currPage's right link */
+ XLogRecPtr lsn; /* currPage's LSN */
- /*
- * moreLeft and moreRight track whether we think there may be matching
- * index entries to the left and right of the current page, respectively.
- * We can clear the appropriate one of these flags when _bt_checkkeys()
- * sets BTReadPageState.continuescan = false.
- */
- bool moreLeft;
- bool moreRight;
-
- /*
- * Direction of the scan at the time that _bt_readpage was called.
- *
- * Used by btrestrpos to "restore" the scan's array keys by resetting each
- * array to its first element's value (first in this scan direction). This
- * avoids the need to directly track the array keys in btmarkpos.
- */
+ /* scan direction for the saved position's call to _bt_readpage */
ScanDirection dir;
/*
@@ -981,6 +968,13 @@ typedef struct BTScanPosData
int nextTupleOffset;
/*
+ * moreLeft and moreRight track whether we think there may be matching
+ * index entries to the left and right of the current page, respectively.
+ */
+ bool moreLeft;
+ bool moreRight;
+
+ /*
* The items array is always ordered in index order (ie, increasing
* indexoffset). When scanning backwards it is convenient to fill the
* array back-to-front, so we start at the last slot and fill downwards.
@@ -1021,11 +1015,8 @@ typedef BTScanPosData *BTScanPos;
)
#define BTScanPosInvalidate(scanpos) \
do { \
- (scanpos).currPage = InvalidBlockNumber; \
- (scanpos).nextPage = InvalidBlockNumber; \
(scanpos).buf = InvalidBuffer; \
- (scanpos).lsn = InvalidXLogRecPtr; \
- (scanpos).nextTupleOffset = 0; \
+ (scanpos).currPage = InvalidBlockNumber; \
} while (0)
/* We need one of these for each equality-type SK_SEARCHARRAY scan key */
@@ -1091,7 +1082,6 @@ typedef struct BTReadPageState
OffsetNumber minoff; /* Lowest non-pivot tuple's offset */
OffsetNumber maxoff; /* Highest non-pivot tuple's offset */
IndexTuple finaltup; /* Needed by scans with array keys */
- BlockNumber prev_scan_page; /* previous _bt_parallel_release block */
Page page; /* Page being read */
/* Per-tuple input parameters, set by _bt_readpage for _bt_checkkeys */
@@ -1192,12 +1182,14 @@ extern int btgettreeheight(Relation rel);
/*
* prototypes for internal functions in nbtree.c
*/
-extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno,
- bool first);
-extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page);
+extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
+ BlockNumber *last_curr_page, bool first);
+extern void _bt_parallel_release(IndexScanDesc scan,
+ BlockNumber next_scan_page,
+ BlockNumber curr_page);
extern void _bt_parallel_done(IndexScanDesc scan);
extern void _bt_parallel_primscan_schedule(IndexScanDesc scan,
- BlockNumber prev_scan_page);
+ BlockNumber curr_page);
/*
* prototypes for functions in nbtdedup.c