summaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtsearch.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtsearch.c')
-rw-r--r--src/backend/access/nbtree/nbtsearch.c2617
1 files changed, 1349 insertions, 1268 deletions
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 1d1c8072b93..8b1f75b7533 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* btsearch.c--
- * search code for postgres btrees.
+ * search code for postgres btrees.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.23 1997/08/19 21:29:42 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.24 1997/09/07 04:38:58 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,1435 +22,1516 @@
#include <catalog/pg_proc.h>
#ifndef HAVE_MEMMOVE
-# include <regex/utils.h>
+#include <regex/utils.h>
#else
-# include <string.h>
+#include <string.h>
#endif
-static BTStack
-_bt_searchr(Relation rel, int keysz, ScanKey scankey,
- Buffer *bufP, BTStack stack_in);
-static OffsetNumber
-_bt_firsteq(Relation rel, TupleDesc itupdesc, Page page,
- Size keysz, ScanKey scankey, OffsetNumber offnum);
-static int
-_bt_compare(Relation rel, TupleDesc itupdesc, Page page,
- int keysz, ScanKey scankey, OffsetNumber offnum);
-static bool
-_bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
-static RetrieveIndexResult
-_bt_endpoint(IndexScanDesc scan, ScanDirection dir);
+static BTStack
+_bt_searchr(Relation rel, int keysz, ScanKey scankey,
+ Buffer * bufP, BTStack stack_in);
+static OffsetNumber
+_bt_firsteq(Relation rel, TupleDesc itupdesc, Page page,
+ Size keysz, ScanKey scankey, OffsetNumber offnum);
+static int
+_bt_compare(Relation rel, TupleDesc itupdesc, Page page,
+ int keysz, ScanKey scankey, OffsetNumber offnum);
+static bool
+ _bt_twostep(IndexScanDesc scan, Buffer * bufP, ScanDirection dir);
+static RetrieveIndexResult
+ _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
/*
- * _bt_search() -- Search for a scan key in the index.
+ * _bt_search() -- Search for a scan key in the index.
*
- * This routine is actually just a helper that sets things up and
- * calls a recursive-descent search routine on the tree.
+ * This routine is actually just a helper that sets things up and
+ * calls a recursive-descent search routine on the tree.
*/
BTStack
-_bt_search(Relation rel, int keysz, ScanKey scankey, Buffer *bufP)
+_bt_search(Relation rel, int keysz, ScanKey scankey, Buffer * bufP)
{
- *bufP = _bt_getroot(rel, BT_READ);
- return (_bt_searchr(rel, keysz, scankey, bufP, (BTStack) NULL));
+ *bufP = _bt_getroot(rel, BT_READ);
+ return (_bt_searchr(rel, keysz, scankey, bufP, (BTStack) NULL));
}
/*
- * _bt_searchr() -- Search the tree recursively for a particular scankey.
+ * _bt_searchr() -- Search the tree recursively for a particular scankey.
*/
-static BTStack
+static BTStack
_bt_searchr(Relation rel,
- int keysz,
- ScanKey scankey,
- Buffer *bufP,
- BTStack stack_in)
+ int keysz,
+ ScanKey scankey,
+ Buffer * bufP,
+ BTStack stack_in)
{
- BTStack stack;
- OffsetNumber offnum;
- Page page;
- BTPageOpaque opaque;
- BlockNumber par_blkno;
- BlockNumber blkno;
- ItemId itemid;
- BTItem btitem;
- BTItem item_save;
- int item_nbytes;
- IndexTuple itup;
-
- /* if this is a leaf page, we're done */
- page = BufferGetPage(*bufP);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- if (opaque->btpo_flags & BTP_LEAF)
- return (stack_in);
-
- /*
- * Find the appropriate item on the internal page, and get the child
- * page that it points to.
- */
-
- par_blkno = BufferGetBlockNumber(*bufP);
- offnum = _bt_binsrch(rel, *bufP, keysz, scankey, BT_DESCENT);
- itemid = PageGetItemId(page, offnum);
- btitem = (BTItem) PageGetItem(page, itemid);
- itup = &(btitem->bti_itup);
- blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
-
- /*
- * We need to save the bit image of the index entry we chose in the
- * parent page on a stack. In case we split the tree, we'll use this
- * bit image to figure out what our real parent page is, in case the
- * parent splits while we're working lower in the tree. See the paper
- * by Lehman and Yao for how this is detected and handled. (We use
- * unique OIDs to disambiguate duplicate keys in the index -- Lehman
- * and Yao disallow duplicate keys).
- */
-
- item_nbytes = ItemIdGetLength(itemid);
- item_save = (BTItem) palloc(item_nbytes);
- memmove((char *) item_save, (char *) btitem, item_nbytes);
- stack = (BTStack) palloc(sizeof(BTStackData));
- stack->bts_blkno = par_blkno;
- stack->bts_offset = offnum;
- stack->bts_btitem = item_save;
- stack->bts_parent = stack_in;
-
- /* drop the read lock on the parent page and acquire one on the child */
- _bt_relbuf(rel, *bufP, BT_READ);
- *bufP = _bt_getbuf(rel, blkno, BT_READ);
-
- /*
- * Race -- the page we just grabbed may have split since we read its
- * pointer in the parent. If it has, we may need to move right to its
- * new sibling. Do that.
- */
-
- *bufP = _bt_moveright(rel, *bufP, keysz, scankey, BT_READ);
-
- /* okay, all set to move down a level */
- return (_bt_searchr(rel, keysz, scankey, bufP, stack));
+ BTStack stack;
+ OffsetNumber offnum;
+ Page page;
+ BTPageOpaque opaque;
+ BlockNumber par_blkno;
+ BlockNumber blkno;
+ ItemId itemid;
+ BTItem btitem;
+ BTItem item_save;
+ int item_nbytes;
+ IndexTuple itup;
+
+ /* if this is a leaf page, we're done */
+ page = BufferGetPage(*bufP);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ if (opaque->btpo_flags & BTP_LEAF)
+ return (stack_in);
+
+ /*
+ * Find the appropriate item on the internal page, and get the child
+ * page that it points to.
+ */
+
+ par_blkno = BufferGetBlockNumber(*bufP);
+ offnum = _bt_binsrch(rel, *bufP, keysz, scankey, BT_DESCENT);
+ itemid = PageGetItemId(page, offnum);
+ btitem = (BTItem) PageGetItem(page, itemid);
+ itup = &(btitem->bti_itup);
+ blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
+
+ /*
+ * We need to save the bit image of the index entry we chose in the
+ * parent page on a stack. In case we split the tree, we'll use this
+ * bit image to figure out what our real parent page is, in case the
+ * parent splits while we're working lower in the tree. See the paper
+ * by Lehman and Yao for how this is detected and handled. (We use
+ * unique OIDs to disambiguate duplicate keys in the index -- Lehman
+ * and Yao disallow duplicate keys).
+ */
+
+ item_nbytes = ItemIdGetLength(itemid);
+ item_save = (BTItem) palloc(item_nbytes);
+ memmove((char *) item_save, (char *) btitem, item_nbytes);
+ stack = (BTStack) palloc(sizeof(BTStackData));
+ stack->bts_blkno = par_blkno;
+ stack->bts_offset = offnum;
+ stack->bts_btitem = item_save;
+ stack->bts_parent = stack_in;
+
+ /* drop the read lock on the parent page and acquire one on the child */
+ _bt_relbuf(rel, *bufP, BT_READ);
+ *bufP = _bt_getbuf(rel, blkno, BT_READ);
+
+ /*
+ * Race -- the page we just grabbed may have split since we read its
+ * pointer in the parent. If it has, we may need to move right to its
+ * new sibling. Do that.
+ */
+
+ *bufP = _bt_moveright(rel, *bufP, keysz, scankey, BT_READ);
+
+ /* okay, all set to move down a level */
+ return (_bt_searchr(rel, keysz, scankey, bufP, stack));
}
/*
- * _bt_moveright() -- move right in the btree if necessary.
+ * _bt_moveright() -- move right in the btree if necessary.
*
- * When we drop and reacquire a pointer to a page, it is possible that
- * the page has changed in the meanwhile. If this happens, we're
- * guaranteed that the page has "split right" -- that is, that any
- * data that appeared on the page originally is either on the page
- * or strictly to the right of it.
+ * When we drop and reacquire a pointer to a page, it is possible that
+ * the page has changed in the meanwhile. If this happens, we're
+ * guaranteed that the page has "split right" -- that is, that any
+ * data that appeared on the page originally is either on the page
+ * or strictly to the right of it.
*
- * This routine decides whether or not we need to move right in the
- * tree by examining the high key entry on the page. If that entry
- * is strictly less than one we expect to be on the page, then our
- * picture of the page is incorrect and we need to move right.
+ * This routine decides whether or not we need to move right in the
+ * tree by examining the high key entry on the page. If that entry
+ * is strictly less than one we expect to be on the page, then our
+ * picture of the page is incorrect and we need to move right.
*
- * On entry, we have the buffer pinned and a lock of the proper type.
- * If we move right, we release the buffer and lock and acquire the
- * same on the right sibling.
+ * On entry, we have the buffer pinned and a lock of the proper type.
+ * If we move right, we release the buffer and lock and acquire the
+ * same on the right sibling.
*/
Buffer
_bt_moveright(Relation rel,
- Buffer buf,
- int keysz,
- ScanKey scankey,
- int access)
+ Buffer buf,
+ int keysz,
+ ScanKey scankey,
+ int access)
{
- Page page;
- BTPageOpaque opaque;
- ItemId hikey;
- BlockNumber rblkno;
- int natts = rel->rd_rel->relnatts;
-
- page = BufferGetPage(buf);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-
- /* if we're on a rightmost page, we don't need to move right */
- if (P_RIGHTMOST(opaque))
- return (buf);
-
- /* by convention, item 0 on non-rightmost pages is the high key */
- hikey = PageGetItemId(page, P_HIKEY);
-
- /*
- * If the scan key that brought us to this page is >= the high key
- * stored on the page, then the page has split and we need to move
- * right.
- */
-
- if (_bt_skeycmp(rel, keysz, scankey, page, hikey,
- BTGreaterEqualStrategyNumber))
- {
- /* move right as long as we need to */
- do
+ Page page;
+ BTPageOpaque opaque;
+ ItemId hikey;
+ BlockNumber rblkno;
+ int natts = rel->rd_rel->relnatts;
+
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /* if we're on a rightmost page, we don't need to move right */
+ if (P_RIGHTMOST(opaque))
+ return (buf);
+
+ /* by convention, item 0 on non-rightmost pages is the high key */
+ hikey = PageGetItemId(page, P_HIKEY);
+
+ /*
+ * If the scan key that brought us to this page is >= the high key
+ * stored on the page, then the page has split and we need to move
+ * right.
+ */
+
+ if (_bt_skeycmp(rel, keysz, scankey, page, hikey,
+ BTGreaterEqualStrategyNumber))
{
- OffsetNumber offmax = PageGetMaxOffsetNumber(page);
- /*
- * If this page consists of all duplicate keys (hikey and first
- * key on the page have the same value), then we don't need to
- * step right.
- *
- * NOTE for multi-column indices: we may do scan using
- * keys not for all attrs. But we handle duplicates
- * using all attrs in _bt_insert/_bt_spool code.
- * And so we've to compare scankey with _last_ item
- * on this page to do not lose "good" tuples if number
- * of attrs > keysize. Example: (2,0) - last items on
- * this page, (2,1) - first item on next page (hikey),
- * our scankey is x = 2. Scankey == (2,1) because of
- * we compare first attrs only, but we shouldn't to move
- * right of here. - vadim 04/15/97
- */
-
- if ( _bt_skeycmp (rel, keysz, scankey, page, hikey,
- BTEqualStrategyNumber) )
- {
- if ( opaque->btpo_flags & BTP_CHAIN )
- {
- Assert ( ( opaque->btpo_flags & BTP_LEAF ) || offmax > P_HIKEY );
- break;
- }
- if ( offmax > P_HIKEY )
- {
- if ( natts == keysz ) /* sanity checks */
- {
- if ( _bt_skeycmp (rel, keysz, scankey, page,
- PageGetItemId (page, P_FIRSTKEY),
- BTEqualStrategyNumber) )
- elog (FATAL, "btree: BTP_CHAIN flag was expected");
- if ( _bt_skeycmp (rel, keysz, scankey, page,
- PageGetItemId (page, offmax),
- BTEqualStrategyNumber) )
- elog (FATAL, "btree: unexpected equal last item");
- if ( _bt_skeycmp (rel, keysz, scankey, page,
- PageGetItemId (page, offmax),
- BTLessStrategyNumber) )
- elog (FATAL, "btree: unexpected greater last item");
- /* move right */
- }
- else if ( _bt_skeycmp (rel, keysz, scankey, page,
- PageGetItemId (page, offmax),
- BTLessEqualStrategyNumber) )
- break;
- }
- }
-
- /* step right one page */
- rblkno = opaque->btpo_next;
- _bt_relbuf(rel, buf, access);
- buf = _bt_getbuf(rel, rblkno, access);
- page = BufferGetPage(buf);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- hikey = PageGetItemId(page, P_HIKEY);
-
- } while (! P_RIGHTMOST(opaque)
- && _bt_skeycmp(rel, keysz, scankey, page, hikey,
- BTGreaterEqualStrategyNumber));
- }
- return (buf);
+ /* move right as long as we need to */
+ do
+ {
+ OffsetNumber offmax = PageGetMaxOffsetNumber(page);
+
+ /*
+ * If this page consists of all duplicate keys (hikey and
+ * first key on the page have the same value), then we don't
+ * need to step right.
+ *
+ * NOTE for multi-column indices: we may do scan using keys not
+ * for all attrs. But we handle duplicates using all attrs in
+ * _bt_insert/_bt_spool code. And so we've to compare scankey
+ * with _last_ item on this page to do not lose "good" tuples
+ * if number of attrs > keysize. Example: (2,0) - last items
+ * on this page, (2,1) - first item on next page (hikey), our
+ * scankey is x = 2. Scankey == (2,1) because of we compare
+ * first attrs only, but we shouldn't to move right of here.
+ * - vadim 04/15/97
+ */
+
+ if (_bt_skeycmp(rel, keysz, scankey, page, hikey,
+ BTEqualStrategyNumber))
+ {
+ if (opaque->btpo_flags & BTP_CHAIN)
+ {
+ Assert((opaque->btpo_flags & BTP_LEAF) || offmax > P_HIKEY);
+ break;
+ }
+ if (offmax > P_HIKEY)
+ {
+ if (natts == keysz) /* sanity checks */
+ {
+ if (_bt_skeycmp(rel, keysz, scankey, page,
+ PageGetItemId(page, P_FIRSTKEY),
+ BTEqualStrategyNumber))
+ elog(FATAL, "btree: BTP_CHAIN flag was expected");
+ if (_bt_skeycmp(rel, keysz, scankey, page,
+ PageGetItemId(page, offmax),
+ BTEqualStrategyNumber))
+ elog(FATAL, "btree: unexpected equal last item");
+ if (_bt_skeycmp(rel, keysz, scankey, page,
+ PageGetItemId(page, offmax),
+ BTLessStrategyNumber))
+ elog(FATAL, "btree: unexpected greater last item");
+ /* move right */
+ }
+ else if (_bt_skeycmp(rel, keysz, scankey, page,
+ PageGetItemId(page, offmax),
+ BTLessEqualStrategyNumber))
+ break;
+ }
+ }
+
+ /* step right one page */
+ rblkno = opaque->btpo_next;
+ _bt_relbuf(rel, buf, access);
+ buf = _bt_getbuf(rel, rblkno, access);
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ hikey = PageGetItemId(page, P_HIKEY);
+
+ } while (!P_RIGHTMOST(opaque)
+ && _bt_skeycmp(rel, keysz, scankey, page, hikey,
+ BTGreaterEqualStrategyNumber));
+ }
+ return (buf);
}
/*
- * _bt_skeycmp() -- compare a scan key to a particular item on a page using
- * a requested strategy (<, <=, =, >=, >).
+ * _bt_skeycmp() -- compare a scan key to a particular item on a page using
+ * a requested strategy (<, <=, =, >=, >).
*
- * We ignore the unique OIDs stored in the btree item here. Those
- * numbers are intended for use internally only, in repositioning a
- * scan after a page split. They do not impose any meaningful ordering.
+ * We ignore the unique OIDs stored in the btree item here. Those
+ * numbers are intended for use internally only, in repositioning a
+ * scan after a page split. They do not impose any meaningful ordering.
*
- * The comparison is A <op> B, where A is the scan key and B is the
- * tuple pointed at by itemid on page.
+ * The comparison is A <op> B, where A is the scan key and B is the
+ * tuple pointed at by itemid on page.
*/
bool
_bt_skeycmp(Relation rel,
- Size keysz,
- ScanKey scankey,
- Page page,
- ItemId itemid,
- StrategyNumber strat)
+ Size keysz,
+ ScanKey scankey,
+ Page page,
+ ItemId itemid,
+ StrategyNumber strat)
{
- BTItem item;
- IndexTuple indexTuple;
- TupleDesc tupDes;
- ScanKey entry;
- int i;
- Datum attrDatum;
- Datum keyDatum;
- bool compare;
- bool isNull;
- bool useEqual = false;
- bool keyNull;
-
- if ( strat == BTLessEqualStrategyNumber )
- {
- useEqual = true;
- strat = BTLessStrategyNumber;
- }
- else if ( strat == BTGreaterEqualStrategyNumber )
- {
- useEqual = true;
- strat = BTGreaterStrategyNumber;
- }
-
- item = (BTItem) PageGetItem(page, itemid);
- indexTuple = &(item->bti_itup);
-
- tupDes = RelationGetTupleDescriptor(rel);
-
- /* see if the comparison is true for all of the key attributes */
- for (i=1; i <= keysz; i++) {
-
- entry = &scankey[i-1];
- Assert ( entry->sk_attno == i );
- attrDatum = index_getattr(indexTuple,
- entry->sk_attno,
- tupDes,
- &isNull);
- keyDatum = entry->sk_argument;
-
- /* see comments about NULLs handling in btbuild */
- if ( entry->sk_flags & SK_ISNULL ) /* key is NULL */
+ BTItem item;
+ IndexTuple indexTuple;
+ TupleDesc tupDes;
+ ScanKey entry;
+ int i;
+ Datum attrDatum;
+ Datum keyDatum;
+ bool compare;
+ bool isNull;
+ bool useEqual = false;
+ bool keyNull;
+
+ if (strat == BTLessEqualStrategyNumber)
{
- Assert ( entry->sk_procedure == NullValueRegProcedure );
- keyNull = true;
- if ( isNull )
- compare = ( strat == BTEqualStrategyNumber ) ? true : false;
- else
- compare = ( strat == BTGreaterStrategyNumber ) ? true : false;
- }
- else if ( isNull ) /* key is NOT_NULL and item is NULL */
- {
- keyNull = false;
- compare = ( strat == BTLessStrategyNumber ) ? true : false;
- }
- else
- {
- keyNull = false;
- compare = _bt_invokestrat(rel, i, strat, keyDatum, attrDatum);
+ useEqual = true;
+ strat = BTLessStrategyNumber;
}
-
- if ( compare ) /* true for one of ">, <, =" */
+ else if (strat == BTGreaterEqualStrategyNumber)
{
- if ( strat != BTEqualStrategyNumber )
- return (true);
+ useEqual = true;
+ strat = BTGreaterStrategyNumber;
}
- else /* false for one of ">, <, =" */
+
+ item = (BTItem) PageGetItem(page, itemid);
+ indexTuple = &(item->bti_itup);
+
+ tupDes = RelationGetTupleDescriptor(rel);
+
+ /* see if the comparison is true for all of the key attributes */
+ for (i = 1; i <= keysz; i++)
{
- if ( strat == BTEqualStrategyNumber )
- return (false);
- /*
- * if original strat was "<=, >=" OR
- * "<, >" but some attribute(s) left
- * - need to test for Equality
- */
- if ( useEqual || i < keysz )
- {
- if ( keyNull || isNull )
- compare = ( keyNull && isNull ) ? true : false;
- else
- compare = _bt_invokestrat(rel, i, BTEqualStrategyNumber,
- keyDatum, attrDatum);
- if ( compare ) /* key' and item' attributes are equal */
- continue; /* - try to compare next attributes */
- }
- return (false);
+
+ entry = &scankey[i - 1];
+ Assert(entry->sk_attno == i);
+ attrDatum = index_getattr(indexTuple,
+ entry->sk_attno,
+ tupDes,
+ &isNull);
+ keyDatum = entry->sk_argument;
+
+ /* see comments about NULLs handling in btbuild */
+ if (entry->sk_flags & SK_ISNULL) /* key is NULL */
+ {
+ Assert(entry->sk_procedure == NullValueRegProcedure);
+ keyNull = true;
+ if (isNull)
+ compare = (strat == BTEqualStrategyNumber) ? true : false;
+ else
+ compare = (strat == BTGreaterStrategyNumber) ? true : false;
+ }
+ else if (isNull) /* key is NOT_NULL and item is NULL */
+ {
+ keyNull = false;
+ compare = (strat == BTLessStrategyNumber) ? true : false;
+ }
+ else
+ {
+ keyNull = false;
+ compare = _bt_invokestrat(rel, i, strat, keyDatum, attrDatum);
+ }
+
+ if (compare) /* true for one of ">, <, =" */
+ {
+ if (strat != BTEqualStrategyNumber)
+ return (true);
+ }
+ else
+/* false for one of ">, <, =" */
+ {
+ if (strat == BTEqualStrategyNumber)
+ return (false);
+
+ /*
+ * if original strat was "<=, >=" OR "<, >" but some
+ * attribute(s) left - need to test for Equality
+ */
+ if (useEqual || i < keysz)
+ {
+ if (keyNull || isNull)
+ compare = (keyNull && isNull) ? true : false;
+ else
+ compare = _bt_invokestrat(rel, i, BTEqualStrategyNumber,
+ keyDatum, attrDatum);
+ if (compare) /* key' and item' attributes are equal */
+ continue; /* - try to compare next attributes */
+ }
+ return (false);
+ }
}
- }
-
- return (true);
+
+ return (true);
}
/*
- * _bt_binsrch() -- Do a binary search for a key on a particular page.
+ * _bt_binsrch() -- Do a binary search for a key on a particular page.
*
- * The scankey we get has the compare function stored in the procedure
- * entry of each data struct. We invoke this regproc to do the
- * comparison for every key in the scankey. _bt_binsrch() returns
- * the OffsetNumber of the first matching key on the page, or the
- * OffsetNumber at which the matching key would appear if it were
- * on this page.
+ * The scankey we get has the compare function stored in the procedure
+ * entry of each data struct. We invoke this regproc to do the
+ * comparison for every key in the scankey. _bt_binsrch() returns
+ * the OffsetNumber of the first matching key on the page, or the
+ * OffsetNumber at which the matching key would appear if it were
+ * on this page.
*
- * By the time this procedure is called, we're sure we're looking
- * at the right page -- don't need to walk right. _bt_binsrch() has
- * no lock or refcount side effects on the buffer.
+ * By the time this procedure is called, we're sure we're looking
+ * at the right page -- don't need to walk right. _bt_binsrch() has
+ * no lock or refcount side effects on the buffer.
*/
OffsetNumber
_bt_binsrch(Relation rel,
- Buffer buf,
- int keysz,
- ScanKey scankey,
- int srchtype)
+ Buffer buf,
+ int keysz,
+ ScanKey scankey,
+ int srchtype)
{
- TupleDesc itupdesc;
- Page page;
- BTPageOpaque opaque;
- OffsetNumber low, mid, high;
- int natts = rel->rd_rel->relnatts;
- int result;
-
- itupdesc = RelationGetTupleDescriptor(rel);
- page = BufferGetPage(buf);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-
- /* by convention, item 1 on any non-rightmost page is the high key */
- low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-
- high = PageGetMaxOffsetNumber(page);
-
- /*
- * Since for non-rightmost pages, the first item on the page is the
- * high key, there are two notions of emptiness. One is if nothing
- * appears on the page. The other is if nothing but the high key does.
- * The reason we test high <= low, rather than high == low, is that
- * after vacuuming there may be nothing *but* the high key on a page.
- * In that case, given the scheme above, low = 2 and high = 1.
- */
-
- if ( PageIsEmpty (page) )
- return (low);
- if ( (! P_RIGHTMOST(opaque) && high <= low))
- {
- if ( high < low ||
- (srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF)) )
- return (low);
- /* It's insertion and high == low == 2 */
- result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
- if ( result > 0 )
- return ( OffsetNumberNext (low) );
- return (low);
- }
-
- while ((high - low) > 1) {
- mid = low + ((high - low) / 2);
- result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid);
-
- if (result > 0)
- low = mid;
- else if (result < 0)
- high = mid - 1;
- else
+ TupleDesc itupdesc;
+ Page page;
+ BTPageOpaque opaque;
+ OffsetNumber low,
+ mid,
+ high;
+ int natts = rel->rd_rel->relnatts;
+ int result;
+
+ itupdesc = RelationGetTupleDescriptor(rel);
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /* by convention, item 1 on any non-rightmost page is the high key */
+ low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+ high = PageGetMaxOffsetNumber(page);
+
+ /*
+ * Since for non-rightmost pages, the first item on the page is the
+ * high key, there are two notions of emptiness. One is if nothing
+ * appears on the page. The other is if nothing but the high key
+ * does. The reason we test high <= low, rather than high == low, is
+ * that after vacuuming there may be nothing *but* the high key on a
+ * page. In that case, given the scheme above, low = 2 and high = 1.
+ */
+
+ if (PageIsEmpty(page))
+ return (low);
+ if ((!P_RIGHTMOST(opaque) && high <= low))
{
- mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid);
- /*
- * NOTE for multi-column indices: we may do scan using
- * keys not for all attrs. But we handle duplicates using
- * all attrs in _bt_insert/_bt_spool code. And so while
- * searching on internal pages having number of attrs > keysize
- * we want to point at the last item < the scankey, not at the
- * first item = the scankey (!!!), and let _bt_moveright
- * decide later whether to move right or not (see comments and
- * example there). Note also that INSERTions are not affected
- * by this code (natts == keysz). - vadim 04/15/97
- */
- if ( natts == keysz || opaque->btpo_flags & BTP_LEAF )
- return (mid);
- low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
- if ( mid == low )
- return (mid);
- return (OffsetNumberPrev(mid));
+ if (high < low ||
+ (srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF)))
+ return (low);
+ /* It's insertion and high == low == 2 */
+ result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
+ if (result > 0)
+ return (OffsetNumberNext(low));
+ return (low);
}
- }
-
- /*
- * We terminated because the endpoints got too close together. There
- * are two cases to take care of.
- *
- * For non-insertion searches on internal pages, we want to point at
- * the last key <, or first key =, the scankey on the page. This
- * guarantees that we'll descend the tree correctly.
- * (NOTE comments above for multi-column indices).
- *
- * For all other cases, we want to point at the first key >=
- * the scankey on the page. This guarantees that scans and
- * insertions will happen correctly.
- */
-
- if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT)
- { /*
- * We want the last key <, or first key ==, the scan key.
- */
- result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
-
- if (result == 0)
+
+ while ((high - low) > 1)
{
- mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high);
- /*
- * If natts > keysz we want last item < the scan key.
- * See comments above for multi-column indices.
- */
- if ( natts == keysz )
- return (mid);
- low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
- if ( mid == low )
- return (mid);
- return (OffsetNumberPrev(mid));
+ mid = low + ((high - low) / 2);
+ result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid);
+
+ if (result > 0)
+ low = mid;
+ else if (result < 0)
+ high = mid - 1;
+ else
+ {
+ mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid);
+
+ /*
+ * NOTE for multi-column indices: we may do scan using keys
+ * not for all attrs. But we handle duplicates using all attrs
+ * in _bt_insert/_bt_spool code. And so while searching on
+ * internal pages having number of attrs > keysize we want to
+ * point at the last item < the scankey, not at the first item
+ * = the scankey (!!!), and let _bt_moveright decide later
+ * whether to move right or not (see comments and example
+ * there). Note also that INSERTions are not affected by this
+ * code (natts == keysz). - vadim 04/15/97
+ */
+ if (natts == keysz || opaque->btpo_flags & BTP_LEAF)
+ return (mid);
+ low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+ if (mid == low)
+ return (mid);
+ return (OffsetNumberPrev(mid));
+ }
+ }
+
+ /*
+ * We terminated because the endpoints got too close together. There
+ * are two cases to take care of.
+ *
+ * For non-insertion searches on internal pages, we want to point at the
+ * last key <, or first key =, the scankey on the page. This
+ * guarantees that we'll descend the tree correctly. (NOTE comments
+ * above for multi-column indices).
+ *
+ * For all other cases, we want to point at the first key >= the scankey
+ * on the page. This guarantees that scans and insertions will happen
+ * correctly.
+ */
+
+ if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT)
+ { /* We want the last key <, or first key
+ * ==, the scan key. */
+ result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
+
+ if (result == 0)
+ {
+ mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high);
+
+ /*
+ * If natts > keysz we want last item < the scan key. See
+ * comments above for multi-column indices.
+ */
+ if (natts == keysz)
+ return (mid);
+ low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+ if (mid == low)
+ return (mid);
+ return (OffsetNumberPrev(mid));
+ }
+ else if (result > 0)
+ return (high);
+ else
+ return (low);
}
- else if (result > 0)
- return (high);
- else
- return (low);
- }
- else /* we want the first key >= the scan key */
- {
- result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
- if (result <= 0)
- return (low);
else
+/* we want the first key >= the scan key */
{
- if (low == high)
- return (OffsetNumberNext(low));
-
- result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
- if (result <= 0)
- return (high);
- else
- return (OffsetNumberNext(high));
+ result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
+ if (result <= 0)
+ return (low);
+ else
+ {
+ if (low == high)
+ return (OffsetNumberNext(low));
+
+ result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
+ if (result <= 0)
+ return (high);
+ else
+ return (OffsetNumberNext(high));
+ }
}
- }
}
-static OffsetNumber
+static OffsetNumber
_bt_firsteq(Relation rel,
- TupleDesc itupdesc,
- Page page,
- Size keysz,
- ScanKey scankey,
- OffsetNumber offnum)
+ TupleDesc itupdesc,
+ Page page,
+ Size keysz,
+ ScanKey scankey,
+ OffsetNumber offnum)
{
- BTPageOpaque opaque;
- OffsetNumber limit;
-
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-
- /* skip the high key, if any */
- limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-
- /* walk backwards looking for the first key in the chain of duplicates */
- while (offnum > limit
- && _bt_compare(rel, itupdesc, page,
- keysz, scankey, OffsetNumberPrev(offnum)) == 0) {
- offnum = OffsetNumberPrev(offnum);
- }
-
- return (offnum);
+ BTPageOpaque opaque;
+ OffsetNumber limit;
+
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /* skip the high key, if any */
+ limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+ /* walk backwards looking for the first key in the chain of duplicates */
+ while (offnum > limit
+ && _bt_compare(rel, itupdesc, page,
+ keysz, scankey, OffsetNumberPrev(offnum)) == 0)
+ {
+ offnum = OffsetNumberPrev(offnum);
+ }
+
+ return (offnum);
}
/*
- * _bt_compare() -- Compare scankey to a particular tuple on the page.
+ * _bt_compare() -- Compare scankey to a particular tuple on the page.
*
- * This routine returns:
- * -1 if scankey < tuple at offnum;
- * 0 if scankey == tuple at offnum;
- * +1 if scankey > tuple at offnum.
+ * This routine returns:
+ * -1 if scankey < tuple at offnum;
+ * 0 if scankey == tuple at offnum;
+ * +1 if scankey > tuple at offnum.
*
- * -- Old comments:
- * In order to avoid having to propagate changes up the tree any time
- * a new minimal key is inserted, the leftmost entry on the leftmost
- * page is less than all possible keys, by definition.
+ * -- Old comments:
+ * In order to avoid having to propagate changes up the tree any time
+ * a new minimal key is inserted, the leftmost entry on the leftmost
+ * page is less than all possible keys, by definition.
*
- * -- New ones:
- * New insertion code (fix against updating _in_place_ if new minimal
- * key has bigger size than old one) may delete P_HIKEY entry on the
- * root page in order to insert new minimal key - and so this definition
- * does not work properly in this case and breaks key' order on root
- * page. BTW, this propagation occures only while page' splitting,
- * but not "any time a new min key is inserted" (see _bt_insertonpg).
- * - vadim 12/05/96
+ * -- New ones:
+ * New insertion code (fix against updating _in_place_ if new minimal
+ * key has bigger size than old one) may delete P_HIKEY entry on the
+ * root page in order to insert new minimal key - and so this definition
+ * does not work properly in this case and breaks key' order on root
+ * page. BTW, this propagation occures only while page' splitting,
+ * but not "any time a new min key is inserted" (see _bt_insertonpg).
+ * - vadim 12/05/96
*/
static int
_bt_compare(Relation rel,
- TupleDesc itupdesc,
- Page page,
- int keysz,
- ScanKey scankey,
- OffsetNumber offnum)
+ TupleDesc itupdesc,
+ Page page,
+ int keysz,
+ ScanKey scankey,
+ OffsetNumber offnum)
{
- Datum datum;
- BTItem btitem;
- ItemId itemid;
- IndexTuple itup;
- BTPageOpaque opaque;
- ScanKey entry;
- AttrNumber attno;
- int result;
- int i;
- bool null;
-
- /*
- * If this is a leftmost internal page, and if our comparison is
- * with the first key on the page, then the item at that position is
- * by definition less than the scan key.
- *
- * - see new comments above...
- */
-
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- if (!(opaque->btpo_flags & BTP_LEAF)
- && P_LEFTMOST(opaque)
- && offnum == P_HIKEY) {
- itemid = PageGetItemId(page, offnum);
-
+ Datum datum;
+ BTItem btitem;
+ ItemId itemid;
+ IndexTuple itup;
+ BTPageOpaque opaque;
+ ScanKey entry;
+ AttrNumber attno;
+ int result;
+ int i;
+ bool null;
+
/*
- * we just have to believe that this will only be called with
- * offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the
- * first actual data key (i.e., this is also a rightmost
- * page). there doesn't seem to be any code that implies
- * that the leftmost page is normally missing a high key as
- * well as the rightmost page. but that implies that this
- * code path only applies to the root -- which seems
- * unlikely..
+ * If this is a leftmost internal page, and if our comparison is with
+ * the first key on the page, then the item at that position is by
+ * definition less than the scan key.
*
- * - see new comments above...
+ * - see new comments above...
*/
- if (! P_RIGHTMOST(opaque)) {
- elog(WARN, "_bt_compare: invalid comparison to high key");
- }
+
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ if (!(opaque->btpo_flags & BTP_LEAF)
+ && P_LEFTMOST(opaque)
+ && offnum == P_HIKEY)
+ {
+ itemid = PageGetItemId(page, offnum);
+
+ /*
+ * we just have to believe that this will only be called with
+ * offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the first
+ * actual data key (i.e., this is also a rightmost page). there
+ * doesn't seem to be any code that implies that the leftmost page
+ * is normally missing a high key as well as the rightmost page.
+ * but that implies that this code path only applies to the root
+ * -- which seems unlikely..
+ *
+ * - see new comments above...
+ */
+ if (!P_RIGHTMOST(opaque))
+ {
+ elog(WARN, "_bt_compare: invalid comparison to high key");
+ }
#if 0
+
+ /*
+ * We just have to belive that right answer will not break
+ * anything. I've checked code and all seems to be ok. See new
+ * comments above...
+ *
+ * -- Old comments If the item on the page is equal to the scankey,
+ * that's okay to admit. We just can't claim that the first key
+ * on the page is greater than anything.
+ */
+
+ if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
+ BTEqualStrategyNumber))
+ {
+ return (0);
+ }
+ return (1);
+#endif
+ }
+
+ btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+ itup = &(btitem->bti_itup);
+
/*
- * We just have to belive that right answer will not
- * break anything. I've checked code and all seems to be ok.
- * See new comments above...
+ * The scan key is set up with the attribute number associated with
+ * each term in the key. It is important that, if the index is
+ * multi-key, the scan contain the first k key attributes, and that
+ * they be in order. If you think about how multi-key ordering works,
+ * you'll understand why this is.
*
- * -- Old comments
- * If the item on the page is equal to the scankey, that's
- * okay to admit. We just can't claim that the first key on
- * the page is greater than anything.
+ * We don't test for violation of this condition here.
*/
-
- if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
- BTEqualStrategyNumber)) {
- return (0);
- }
- return (1);
-#endif
- }
-
- btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &(btitem->bti_itup);
-
- /*
- * The scan key is set up with the attribute number associated with each
- * term in the key. It is important that, if the index is multi-key,
- * the scan contain the first k key attributes, and that they be in
- * order. If you think about how multi-key ordering works, you'll
- * understand why this is.
- *
- * We don't test for violation of this condition here.
- */
-
- for (i = 1; i <= keysz; i++) {
- long tmpres;
-
- entry = &scankey[i - 1];
- attno = entry->sk_attno;
- datum = index_getattr(itup, attno, itupdesc, &null);
-
- /* see comments about NULLs handling in btbuild */
- if ( entry->sk_flags & SK_ISNULL ) /* key is NULL */
+
+ for (i = 1; i <= keysz; i++)
{
- Assert ( entry->sk_procedure == NullValueRegProcedure );
- if ( null )
- tmpres = (long) 0; /* NULL "=" NULL */
- else
- tmpres = (long) 1; /* NULL ">" NOT_NULL */
- }
- else if ( null ) /* key is NOT_NULL and item is NULL */
- {
- tmpres = (long) -1; /* NOT_NULL "<" NULL */
- }
- else
- {
- tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
- entry->sk_argument, datum);
+ long tmpres;
+
+ entry = &scankey[i - 1];
+ attno = entry->sk_attno;
+ datum = index_getattr(itup, attno, itupdesc, &null);
+
+ /* see comments about NULLs handling in btbuild */
+ if (entry->sk_flags & SK_ISNULL) /* key is NULL */
+ {
+ Assert(entry->sk_procedure == NullValueRegProcedure);
+ if (null)
+ tmpres = (long) 0; /* NULL "=" NULL */
+ else
+ tmpres = (long) 1; /* NULL ">" NOT_NULL */
+ }
+ else if (null) /* key is NOT_NULL and item is NULL */
+ {
+ tmpres = (long) -1; /* NOT_NULL "<" NULL */
+ }
+ else
+ {
+ tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+ entry->sk_argument, datum);
+ }
+ result = tmpres;
+
+ /* if the keys are unequal, return the difference */
+ if (result != 0)
+ return (result);
}
- result = tmpres;
-
- /* if the keys are unequal, return the difference */
- if (result != 0)
- return (result);
- }
-
- /* by here, the keys are equal */
- return (0);
+
+ /* by here, the keys are equal */
+ return (0);
}
/*
- * _bt_next() -- Get the next item in a scan.
+ * _bt_next() -- Get the next item in a scan.
*
- * On entry, we have a valid currentItemData in the scan, and a
- * read lock on the page that contains that item. We do not have
- * the page pinned. We return the next item in the scan. On
- * exit, we have the page containing the next item locked but not
- * pinned.
+ * On entry, we have a valid currentItemData in the scan, and a
+ * read lock on the page that contains that item. We do not have
+ * the page pinned. We return the next item in the scan. On
+ * exit, we have the page containing the next item locked but not
+ * pinned.
*/
RetrieveIndexResult
_bt_next(IndexScanDesc scan, ScanDirection dir)
{
- Relation rel;
- Buffer buf;
- Page page;
- OffsetNumber offnum;
- RetrieveIndexResult res;
- ItemPointer current;
- BTItem btitem;
- IndexTuple itup;
- BTScanOpaque so;
- Size keysok;
-
- rel = scan->relation;
- so = (BTScanOpaque) scan->opaque;
- current = &(scan->currentItemData);
-
- /*
- * XXX 10 may 91: somewhere there's a bug in our management of the
- * cached buffer for this scan. wei discovered it. the following
- * is a workaround so he can work until i figure out what's going on.
- */
-
- if (!BufferIsValid(so->btso_curbuf))
- so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current),
- BT_READ);
-
- /* we still have the buffer pinned and locked */
- buf = so->btso_curbuf;
-
- do
- {
- /* step one tuple in the appropriate direction */
- if (!_bt_step(scan, &buf, dir))
- return ((RetrieveIndexResult) NULL);
-
- /* by here, current is the tuple we want to return */
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &btitem->bti_itup;
-
- if ( _bt_checkkeys (scan, itup, &keysok) )
- {
- Assert (keysok == so->numberOfKeys);
- res = FormRetrieveIndexResult(current, &(itup->t_tid));
-
- /* remember which buffer we have pinned and locked */
- so->btso_curbuf = buf;
- return (res);
- }
+ Relation rel;
+ Buffer buf;
+ Page page;
+ OffsetNumber offnum;
+ RetrieveIndexResult res;
+ ItemPointer current;
+ BTItem btitem;
+ IndexTuple itup;
+ BTScanOpaque so;
+ Size keysok;
+
+ rel = scan->relation;
+ so = (BTScanOpaque) scan->opaque;
+ current = &(scan->currentItemData);
+
+ /*
+ * XXX 10 may 91: somewhere there's a bug in our management of the
+ * cached buffer for this scan. wei discovered it. the following is
+ * a workaround so he can work until i figure out what's going on.
+ */
+
+ if (!BufferIsValid(so->btso_curbuf))
+ so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current),
+ BT_READ);
+
+ /* we still have the buffer pinned and locked */
+ buf = so->btso_curbuf;
+
+ do
+ {
+ /* step one tuple in the appropriate direction */
+ if (!_bt_step(scan, &buf, dir))
+ return ((RetrieveIndexResult) NULL);
- } while ( keysok >= so->numberOfFirstKeys );
+ /* by here, current is the tuple we want to return */
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+ itup = &btitem->bti_itup;
+
+ if (_bt_checkkeys(scan, itup, &keysok))
+ {
+ Assert(keysok == so->numberOfKeys);
+ res = FormRetrieveIndexResult(current, &(itup->t_tid));
+
+ /* remember which buffer we have pinned and locked */
+ so->btso_curbuf = buf;
+ return (res);
+ }
+
+ } while (keysok >= so->numberOfFirstKeys);
- ItemPointerSetInvalid(current);
- so->btso_curbuf = InvalidBuffer;
- _bt_relbuf(rel, buf, BT_READ);
-
- return ((RetrieveIndexResult) NULL);
+ ItemPointerSetInvalid(current);
+ so->btso_curbuf = InvalidBuffer;
+ _bt_relbuf(rel, buf, BT_READ);
+
+ return ((RetrieveIndexResult) NULL);
}
/*
- * _bt_first() -- Find the first item in a scan.
+ * _bt_first() -- Find the first item in a scan.
*
- * We need to be clever about the type of scan, the operation it's
- * performing, and the tree ordering. We return the RetrieveIndexResult
- * of the first item in the tree that satisfies the qualification
- * associated with the scan descriptor. On exit, the page containing
- * the current index tuple is read locked and pinned, and the scan's
- * opaque data entry is updated to include the buffer.
+ * We need to be clever about the type of scan, the operation it's
+ * performing, and the tree ordering. We return the RetrieveIndexResult
+ * of the first item in the tree that satisfies the qualification
+ * associated with the scan descriptor. On exit, the page containing
+ * the current index tuple is read locked and pinned, and the scan's
+ * opaque data entry is updated to include the buffer.
*/
RetrieveIndexResult
_bt_first(IndexScanDesc scan, ScanDirection dir)
{
- Relation rel;
- TupleDesc itupdesc;
- Buffer buf;
- Page page;
- BTPageOpaque pop;
- BTStack stack;
- OffsetNumber offnum, maxoff;
- bool offGmax = false;
- BTItem btitem;
- IndexTuple itup;
- ItemPointer current;
- BlockNumber blkno;
- StrategyNumber strat;
- RetrieveIndexResult res;
- RegProcedure proc;
- int result;
- BTScanOpaque so;
- ScanKeyData skdata;
- Size keysok;
-
- rel = scan->relation;
- so = (BTScanOpaque) scan->opaque;
-
- /*
- * Order the keys in the qualification and be sure
- * that the scan exploits the tree order.
- */
- so->numberOfFirstKeys = 0; /* may be changed by _bt_orderkeys */
- so->qual_ok = 1; /* may be changed by _bt_orderkeys */
- scan->scanFromEnd = false;
- if ( so->numberOfKeys > 0 )
- {
- _bt_orderkeys(rel, so);
-
- strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure);
+ Relation rel;
+ TupleDesc itupdesc;
+ Buffer buf;
+ Page page;
+ BTPageOpaque pop;
+ BTStack stack;
+ OffsetNumber offnum,
+ maxoff;
+ bool offGmax = false;
+ BTItem btitem;
+ IndexTuple itup;
+ ItemPointer current;
+ BlockNumber blkno;
+ StrategyNumber strat;
+ RetrieveIndexResult res;
+ RegProcedure proc;
+ int result;
+ BTScanOpaque so;
+ ScanKeyData skdata;
+ Size keysok;
- /* NOTE: it assumes ForwardScanDirection */
- if ( strat == BTLessStrategyNumber ||
- strat == BTLessEqualStrategyNumber )
- scan->scanFromEnd = true;
- }
- else
- scan->scanFromEnd = true;
-
- if ( so->qual_ok == 0 )
- return ((RetrieveIndexResult) NULL);
-
- /* if we just need to walk down one edge of the tree, do that */
- if (scan->scanFromEnd)
- return (_bt_endpoint(scan, dir));
-
- itupdesc = RelationGetTupleDescriptor(rel);
- current = &(scan->currentItemData);
-
- /*
- * Okay, we want something more complicated. What we'll do is use
- * the first item in the scan key passed in (which has been correctly
- * ordered to take advantage of index ordering) to position ourselves
- * at the right place in the scan.
- */
- /* _bt_orderkeys disallows it, but it's place to add some code latter */
- if ( so->keyData[0].sk_flags & SK_ISNULL )
- {
- elog (WARN, "_bt_first: btree doesn't support is(not)null, yet");
- return ((RetrieveIndexResult) NULL);
- }
- proc = index_getprocid(rel, 1, BTORDER_PROC);
- ScanKeyEntryInitialize(&skdata, so->keyData[0].sk_flags, 1, proc,
- so->keyData[0].sk_argument);
-
- stack = _bt_search(rel, 1, &skdata, &buf);
- _bt_freestack(stack);
-
- blkno = BufferGetBlockNumber(buf);
- page = BufferGetPage(buf);
-
- /*
- * This will happen if the tree we're searching is entirely empty,
- * or if we're doing a search for a key that would appear on an
- * entirely empty internal page. In either case, there are no
- * matching tuples in the index.
- */
-
- if (PageIsEmpty(page)) {
- ItemPointerSetInvalid(current);
- so->btso_curbuf = InvalidBuffer;
- _bt_relbuf(rel, buf, BT_READ);
- return ((RetrieveIndexResult) NULL);
- }
- maxoff = PageGetMaxOffsetNumber(page);
- pop = (BTPageOpaque) PageGetSpecialPointer(page);
-
- /*
- * Now _bt_moveright doesn't move from non-rightmost leaf page
- * if scankey == hikey and there is only hikey there. It's
- * good for insertion, but we need to do work for scan here.
- * - vadim 05/27/97
- */
-
- while ( maxoff == P_HIKEY && !P_RIGHTMOST(pop) &&
- _bt_skeycmp(rel, 1, &skdata, page,
- PageGetItemId(page, P_HIKEY),
- BTGreaterEqualStrategyNumber) )
- {
- /* step right one page */
- blkno = pop->btpo_next;
- _bt_relbuf(rel, buf, BT_READ);
- buf = _bt_getbuf(rel, blkno, BT_READ);
+ rel = scan->relation;
+ so = (BTScanOpaque) scan->opaque;
+
+ /*
+ * Order the keys in the qualification and be sure that the scan
+ * exploits the tree order.
+ */
+ so->numberOfFirstKeys = 0; /* may be changed by _bt_orderkeys */
+ so->qual_ok = 1; /* may be changed by _bt_orderkeys */
+ scan->scanFromEnd = false;
+ if (so->numberOfKeys > 0)
+ {
+ _bt_orderkeys(rel, so);
+
+ strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure);
+
+ /* NOTE: it assumes ForwardScanDirection */
+ if (strat == BTLessStrategyNumber ||
+ strat == BTLessEqualStrategyNumber)
+ scan->scanFromEnd = true;
+ }
+ else
+ scan->scanFromEnd = true;
+
+ if (so->qual_ok == 0)
+ return ((RetrieveIndexResult) NULL);
+
+ /* if we just need to walk down one edge of the tree, do that */
+ if (scan->scanFromEnd)
+ return (_bt_endpoint(scan, dir));
+
+ itupdesc = RelationGetTupleDescriptor(rel);
+ current = &(scan->currentItemData);
+
+ /*
+ * Okay, we want something more complicated. What we'll do is use the
+ * first item in the scan key passed in (which has been correctly
+ * ordered to take advantage of index ordering) to position ourselves
+ * at the right place in the scan.
+ */
+ /* _bt_orderkeys disallows it, but it's place to add some code latter */
+ if (so->keyData[0].sk_flags & SK_ISNULL)
+ {
+ elog(WARN, "_bt_first: btree doesn't support is(not)null, yet");
+ return ((RetrieveIndexResult) NULL);
+ }
+ proc = index_getprocid(rel, 1, BTORDER_PROC);
+ ScanKeyEntryInitialize(&skdata, so->keyData[0].sk_flags, 1, proc,
+ so->keyData[0].sk_argument);
+
+ stack = _bt_search(rel, 1, &skdata, &buf);
+ _bt_freestack(stack);
+
+ blkno = BufferGetBlockNumber(buf);
page = BufferGetPage(buf);
- if (PageIsEmpty(page)) {
- ItemPointerSetInvalid(current);
- so->btso_curbuf = InvalidBuffer;
- _bt_relbuf(rel, buf, BT_READ);
- return ((RetrieveIndexResult) NULL);
+
+ /*
+ * This will happen if the tree we're searching is entirely empty, or
+ * if we're doing a search for a key that would appear on an entirely
+ * empty internal page. In either case, there are no matching tuples
+ * in the index.
+ */
+
+ if (PageIsEmpty(page))
+ {
+ ItemPointerSetInvalid(current);
+ so->btso_curbuf = InvalidBuffer;
+ _bt_relbuf(rel, buf, BT_READ);
+ return ((RetrieveIndexResult) NULL);
}
- maxoff = PageGetMaxOffsetNumber(page);
+ maxoff = PageGetMaxOffsetNumber(page);
pop = (BTPageOpaque) PageGetSpecialPointer(page);
- }
-
-
- /* find the nearest match to the manufactured scan key on the page */
- offnum = _bt_binsrch(rel, buf, 1, &skdata, BT_DESCENT);
-
- if (offnum > maxoff)
- {
- offnum = maxoff;
- offGmax = true;
- }
-
- ItemPointerSet(current, blkno, offnum);
-
- /*
- * Now find the right place to start the scan. Result is the
- * value we're looking for minus the value we're looking at
- * in the index.
- */
-
- result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
-
- /* it's yet other place to add some code latter for is(not)null */
-
- strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure);
-
- switch (strat) {
- case BTLessStrategyNumber:
- if (result <= 0) {
- do {
- if (!_bt_twostep(scan, &buf, BackwardScanDirection))
- break;
-
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
- } while (result <= 0);
-
- /* if this is true, the key we just looked at is gone */
- if (result > 0)
- _bt_twostep(scan, &buf, ForwardScanDirection);
- }
- break;
-
- case BTLessEqualStrategyNumber:
- if (result >= 0) {
- do {
- if (!_bt_twostep(scan, &buf, ForwardScanDirection))
- break;
-
- offnum = ItemPointerGetOffsetNumber(current);
+
+ /*
+ * Now _bt_moveright doesn't move from non-rightmost leaf page if
+ * scankey == hikey and there is only hikey there. It's good for
+ * insertion, but we need to do work for scan here. - vadim 05/27/97
+ */
+
+ while (maxoff == P_HIKEY && !P_RIGHTMOST(pop) &&
+ _bt_skeycmp(rel, 1, &skdata, page,
+ PageGetItemId(page, P_HIKEY),
+ BTGreaterEqualStrategyNumber))
+ {
+ /* step right one page */
+ blkno = pop->btpo_next;
+ _bt_relbuf(rel, buf, BT_READ);
+ buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
- result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
- } while (result >= 0);
-
- if (result < 0)
- _bt_twostep(scan, &buf, BackwardScanDirection);
+ if (PageIsEmpty(page))
+ {
+ ItemPointerSetInvalid(current);
+ so->btso_curbuf = InvalidBuffer;
+ _bt_relbuf(rel, buf, BT_READ);
+ return ((RetrieveIndexResult) NULL);
+ }
+ maxoff = PageGetMaxOffsetNumber(page);
+ pop = (BTPageOpaque) PageGetSpecialPointer(page);
}
- break;
-
- case BTEqualStrategyNumber:
- if (result != 0) {
- _bt_relbuf(scan->relation, buf, BT_READ);
- so->btso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(&(scan->currentItemData));
- return ((RetrieveIndexResult) NULL);
+
+
+ /* find the nearest match to the manufactured scan key on the page */
+ offnum = _bt_binsrch(rel, buf, 1, &skdata, BT_DESCENT);
+
+ if (offnum > maxoff)
+ {
+ offnum = maxoff;
+ offGmax = true;
}
- break;
-
- case BTGreaterEqualStrategyNumber:
- if ( offGmax )
+
+ ItemPointerSet(current, blkno, offnum);
+
+ /*
+ * Now find the right place to start the scan. Result is the value
+ * we're looking for minus the value we're looking at in the index.
+ */
+
+ result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+
+ /* it's yet other place to add some code latter for is(not)null */
+
+ strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure);
+
+ switch (strat)
{
- if (result < 0)
- {
- Assert ( !P_RIGHTMOST(pop) && maxoff == P_HIKEY );
- if ( !_bt_step(scan, &buf, ForwardScanDirection) )
- {
- _bt_relbuf(scan->relation, buf, BT_READ);
- so->btso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(&(scan->currentItemData));
- return ((RetrieveIndexResult) NULL);
+ case BTLessStrategyNumber:
+ if (result <= 0)
+ {
+ do
+ {
+ if (!_bt_twostep(scan, &buf, BackwardScanDirection))
+ break;
+
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+ } while (result <= 0);
+
+ /* if this is true, the key we just looked at is gone */
+ if (result > 0)
+ _bt_twostep(scan, &buf, ForwardScanDirection);
}
- }
- else if (result > 0)
- { /*
- * Just remember: _bt_binsrch() returns the OffsetNumber of
- * the first matching key on the page, or the OffsetNumber at
- * which the matching key WOULD APPEAR IF IT WERE on this page.
- * No key on this page, but offnum from _bt_binsrch() greater
- * maxoff - have to move right. - vadim 12/06/96
- */
- _bt_twostep(scan, &buf, ForwardScanDirection);
- }
+ break;
+
+ case BTLessEqualStrategyNumber:
+ if (result >= 0)
+ {
+ do
+ {
+ if (!_bt_twostep(scan, &buf, ForwardScanDirection))
+ break;
+
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+ } while (result >= 0);
+
+ if (result < 0)
+ _bt_twostep(scan, &buf, BackwardScanDirection);
+ }
+ break;
+
+ case BTEqualStrategyNumber:
+ if (result != 0)
+ {
+ _bt_relbuf(scan->relation, buf, BT_READ);
+ so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(&(scan->currentItemData));
+ return ((RetrieveIndexResult) NULL);
+ }
+ break;
+
+ case BTGreaterEqualStrategyNumber:
+ if (offGmax)
+ {
+ if (result < 0)
+ {
+ Assert(!P_RIGHTMOST(pop) && maxoff == P_HIKEY);
+ if (!_bt_step(scan, &buf, ForwardScanDirection))
+ {
+ _bt_relbuf(scan->relation, buf, BT_READ);
+ so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(&(scan->currentItemData));
+ return ((RetrieveIndexResult) NULL);
+ }
+ }
+ else if (result > 0)
+ { /* Just remember: _bt_binsrch() returns
+ * the OffsetNumber of the first matching
+ * key on the page, or the OffsetNumber at
+ * which the matching key WOULD APPEAR IF
+ * IT WERE on this page. No key on this
+ * page, but offnum from _bt_binsrch()
+ * greater maxoff - have to move right. -
+ * vadim 12/06/96 */
+ _bt_twostep(scan, &buf, ForwardScanDirection);
+ }
+ }
+ else if (result < 0)
+ {
+ do
+ {
+ if (!_bt_twostep(scan, &buf, BackwardScanDirection))
+ break;
+
+ page = BufferGetPage(buf);
+ offnum = ItemPointerGetOffsetNumber(current);
+ result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+ } while (result < 0);
+
+ if (result > 0)
+ _bt_twostep(scan, &buf, ForwardScanDirection);
+ }
+ break;
+
+ case BTGreaterStrategyNumber:
+ /* offGmax helps as above */
+ if (result >= 0 || offGmax)
+ {
+ do
+ {
+ if (!_bt_twostep(scan, &buf, ForwardScanDirection))
+ break;
+
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+ } while (result >= 0);
+ }
+ break;
}
- else if (result < 0)
+
+ /* okay, current item pointer for the scan is right */
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+ itup = &btitem->bti_itup;
+
+ if (_bt_checkkeys(scan, itup, &keysok))
{
- do {
- if (!_bt_twostep(scan, &buf, BackwardScanDirection))
- break;
-
- page = BufferGetPage(buf);
- offnum = ItemPointerGetOffsetNumber(current);
- result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
- } while (result < 0);
-
- if (result > 0)
- _bt_twostep(scan, &buf, ForwardScanDirection);
+ res = FormRetrieveIndexResult(current, &(itup->t_tid));
+
+ /* remember which buffer we have pinned */
+ so->btso_curbuf = buf;
}
- break;
-
- case BTGreaterStrategyNumber:
- /* offGmax helps as above */
- if (result >= 0 || offGmax) {
- do {
- if (!_bt_twostep(scan, &buf, ForwardScanDirection))
- break;
-
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
- } while (result >= 0);
+ else if (keysok >= so->numberOfFirstKeys)
+ {
+ so->btso_curbuf = buf;
+ return (_bt_next(scan, dir));
}
- break;
- }
-
- /* okay, current item pointer for the scan is right */
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &btitem->bti_itup;
-
- if ( _bt_checkkeys (scan, itup, &keysok) )
- {
- res = FormRetrieveIndexResult(current, &(itup->t_tid));
-
- /* remember which buffer we have pinned */
- so->btso_curbuf = buf;
- }
- else if ( keysok >= so->numberOfFirstKeys )
- {
- so->btso_curbuf = buf;
- return (_bt_next (scan, dir));
- }
- else
- {
- ItemPointerSetInvalid(current);
- so->btso_curbuf = InvalidBuffer;
- _bt_relbuf(rel, buf, BT_READ);
- res = (RetrieveIndexResult) NULL;
- }
-
- return (res);
+ else
+ {
+ ItemPointerSetInvalid(current);
+ so->btso_curbuf = InvalidBuffer;
+ _bt_relbuf(rel, buf, BT_READ);
+ res = (RetrieveIndexResult) NULL;
+ }
+
+ return (res);
}
/*
- * _bt_step() -- Step one item in the requested direction in a scan on
- * the tree.
+ * _bt_step() -- Step one item in the requested direction in a scan on
+ * the tree.
*
- * If no adjacent record exists in the requested direction, return
- * false. Else, return true and set the currentItemData for the
- * scan to the right thing.
+ * If no adjacent record exists in the requested direction, return
+ * false. Else, return true and set the currentItemData for the
+ * scan to the right thing.
*/
bool
-_bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
+_bt_step(IndexScanDesc scan, Buffer * bufP, ScanDirection dir)
{
- Page page;
- BTPageOpaque opaque;
- OffsetNumber offnum, maxoff;
- OffsetNumber start;
- BlockNumber blkno;
- BlockNumber obknum;
- BTScanOpaque so;
- ItemPointer current;
- Relation rel;
-
- rel = scan->relation;
- current = &(scan->currentItemData);
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(*bufP);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- so = (BTScanOpaque) scan->opaque;
- maxoff = PageGetMaxOffsetNumber(page);
-
- /* get the next tuple */
- if (ScanDirectionIsForward(dir)) {
- if (!PageIsEmpty(page) && offnum < maxoff) {
- offnum = OffsetNumberNext(offnum);
- } else {
-
- /* if we're at end of scan, release the buffer and return */
- blkno = opaque->btpo_next;
- if (P_RIGHTMOST(opaque)) {
- _bt_relbuf(rel, *bufP, BT_READ);
- ItemPointerSetInvalid(current);
- *bufP = so->btso_curbuf = InvalidBuffer;
- return (false);
- } else {
-
- /* walk right to the next page with data */
- _bt_relbuf(rel, *bufP, BT_READ);
- for (;;) {
- *bufP = _bt_getbuf(rel, blkno, BT_READ);
- page = BufferGetPage(*bufP);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- maxoff = PageGetMaxOffsetNumber(page);
- start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-
- if (!PageIsEmpty(page) && start <= maxoff) {
- break;
- } else {
+ Page page;
+ BTPageOpaque opaque;
+ OffsetNumber offnum,
+ maxoff;
+ OffsetNumber start;
+ BlockNumber blkno;
+ BlockNumber obknum;
+ BTScanOpaque so;
+ ItemPointer current;
+ Relation rel;
+
+ rel = scan->relation;
+ current = &(scan->currentItemData);
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(*bufP);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ so = (BTScanOpaque) scan->opaque;
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ /* get the next tuple */
+ if (ScanDirectionIsForward(dir))
+ {
+ if (!PageIsEmpty(page) && offnum < maxoff)
+ {
+ offnum = OffsetNumberNext(offnum);
+ }
+ else
+ {
+
+ /* if we're at end of scan, release the buffer and return */
blkno = opaque->btpo_next;
- _bt_relbuf(rel, *bufP, BT_READ);
- if (blkno == P_NONE) {
- *bufP = so->btso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(current);
- return (false);
+ if (P_RIGHTMOST(opaque))
+ {
+ _bt_relbuf(rel, *bufP, BT_READ);
+ ItemPointerSetInvalid(current);
+ *bufP = so->btso_curbuf = InvalidBuffer;
+ return (false);
+ }
+ else
+ {
+
+ /* walk right to the next page with data */
+ _bt_relbuf(rel, *bufP, BT_READ);
+ for (;;)
+ {
+ *bufP = _bt_getbuf(rel, blkno, BT_READ);
+ page = BufferGetPage(*bufP);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ maxoff = PageGetMaxOffsetNumber(page);
+ start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+ if (!PageIsEmpty(page) && start <= maxoff)
+ {
+ break;
+ }
+ else
+ {
+ blkno = opaque->btpo_next;
+ _bt_relbuf(rel, *bufP, BT_READ);
+ if (blkno == P_NONE)
+ {
+ *bufP = so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(current);
+ return (false);
+ }
+ }
+ }
+ offnum = start;
}
- }
}
- offnum = start;
- }
}
- } else if (ScanDirectionIsBackward(dir)) {
-
- /* remember that high key is item zero on non-rightmost pages */
- start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+ else if (ScanDirectionIsBackward(dir))
+ {
- if (offnum > start) {
- offnum = OffsetNumberPrev(offnum);
- } else {
-
- /* if we're at end of scan, release the buffer and return */
- blkno = opaque->btpo_prev;
- if (P_LEFTMOST(opaque)) {
- _bt_relbuf(rel, *bufP, BT_READ);
- *bufP = so->btso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(current);
- return (false);
- } else {
-
- obknum = BufferGetBlockNumber(*bufP);
-
- /* walk right to the next page with data */
- _bt_relbuf(rel, *bufP, BT_READ);
- for (;;) {
- *bufP = _bt_getbuf(rel, blkno, BT_READ);
- page = BufferGetPage(*bufP);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- maxoff = PageGetMaxOffsetNumber(page);
-
- /*
- * If the adjacent page just split, then we may have the
- * wrong block. Handle this case. Because pages only
- * split right, we don't have to worry about this failing
- * to terminate.
- */
-
- while (opaque->btpo_next != obknum) {
- blkno = opaque->btpo_next;
- _bt_relbuf(rel, *bufP, BT_READ);
- *bufP = _bt_getbuf(rel, blkno, BT_READ);
- page = BufferGetPage(*bufP);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- maxoff = PageGetMaxOffsetNumber(page);
- }
-
- /* don't consider the high key */
- start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-
- /* anything to look at here? */
- if (!PageIsEmpty(page) && maxoff >= start) {
- break;
- } else {
+ /* remember that high key is item zero on non-rightmost pages */
+ start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+ if (offnum > start)
+ {
+ offnum = OffsetNumberPrev(offnum);
+ }
+ else
+ {
+
+ /* if we're at end of scan, release the buffer and return */
blkno = opaque->btpo_prev;
- obknum = BufferGetBlockNumber(*bufP);
- _bt_relbuf(rel, *bufP, BT_READ);
- if (blkno == P_NONE) {
- *bufP = so->btso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(current);
- return (false);
+ if (P_LEFTMOST(opaque))
+ {
+ _bt_relbuf(rel, *bufP, BT_READ);
+ *bufP = so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(current);
+ return (false);
+ }
+ else
+ {
+
+ obknum = BufferGetBlockNumber(*bufP);
+
+ /* walk right to the next page with data */
+ _bt_relbuf(rel, *bufP, BT_READ);
+ for (;;)
+ {
+ *bufP = _bt_getbuf(rel, blkno, BT_READ);
+ page = BufferGetPage(*bufP);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ /*
+ * If the adjacent page just split, then we may have
+ * the wrong block. Handle this case. Because pages
+ * only split right, we don't have to worry about this
+ * failing to terminate.
+ */
+
+ while (opaque->btpo_next != obknum)
+ {
+ blkno = opaque->btpo_next;
+ _bt_relbuf(rel, *bufP, BT_READ);
+ *bufP = _bt_getbuf(rel, blkno, BT_READ);
+ page = BufferGetPage(*bufP);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ maxoff = PageGetMaxOffsetNumber(page);
+ }
+
+ /* don't consider the high key */
+ start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+ /* anything to look at here? */
+ if (!PageIsEmpty(page) && maxoff >= start)
+ {
+ break;
+ }
+ else
+ {
+ blkno = opaque->btpo_prev;
+ obknum = BufferGetBlockNumber(*bufP);
+ _bt_relbuf(rel, *bufP, BT_READ);
+ if (blkno == P_NONE)
+ {
+ *bufP = so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(current);
+ return (false);
+ }
+ }
+ }
+ offnum = maxoff;/* XXX PageIsEmpty? */
}
- }
}
- offnum = maxoff; /* XXX PageIsEmpty? */
- }
}
- }
- blkno = BufferGetBlockNumber(*bufP);
- so->btso_curbuf = *bufP;
- ItemPointerSet(current, blkno, offnum);
-
- return (true);
+ blkno = BufferGetBlockNumber(*bufP);
+ so->btso_curbuf = *bufP;
+ ItemPointerSet(current, blkno, offnum);
+
+ return (true);
}
/*
- * _bt_twostep() -- Move to an adjacent record in a scan on the tree,
- * if an adjacent record exists.
+ * _bt_twostep() -- Move to an adjacent record in a scan on the tree,
+ * if an adjacent record exists.
*
- * This is like _bt_step, except that if no adjacent record exists
- * it restores us to where we were before trying the step. This is
- * only hairy when you cross page boundaries, since the page you cross
- * from could have records inserted or deleted, or could even split.
- * This is unlikely, but we try to handle it correctly here anyway.
+ * This is like _bt_step, except that if no adjacent record exists
+ * it restores us to where we were before trying the step. This is
+ * only hairy when you cross page boundaries, since the page you cross
+ * from could have records inserted or deleted, or could even split.
+ * This is unlikely, but we try to handle it correctly here anyway.
*
- * This routine contains the only case in which our changes to Lehman
- * and Yao's algorithm.
+ * This routine contains the only case in which our changes to Lehman
+ * and Yao's algorithm.
*
- * Like step, this routine leaves the scan's currentItemData in the
- * proper state and acquires a lock and pin on *bufP. If the twostep
- * succeeded, we return true; otherwise, we return false.
+ * Like step, this routine leaves the scan's currentItemData in the
+ * proper state and acquires a lock and pin on *bufP. If the twostep
+ * succeeded, we return true; otherwise, we return false.
*/
-static bool
-_bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
+static bool
+_bt_twostep(IndexScanDesc scan, Buffer * bufP, ScanDirection dir)
{
- Page page;
- BTPageOpaque opaque;
- OffsetNumber offnum, maxoff;
- OffsetNumber start;
- ItemPointer current;
- ItemId itemid;
- int itemsz;
- BTItem btitem;
- BTItem svitem;
- BlockNumber blkno;
-
- blkno = BufferGetBlockNumber(*bufP);
- page = BufferGetPage(*bufP);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- maxoff = PageGetMaxOffsetNumber(page);
- current = &(scan->currentItemData);
- offnum = ItemPointerGetOffsetNumber(current);
-
- start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
-
- /* if we're safe, just do it */
- if (ScanDirectionIsForward(dir) && offnum < maxoff) { /* XXX PageIsEmpty? */
- ItemPointerSet(current, blkno, OffsetNumberNext(offnum));
- return (true);
- } else if (ScanDirectionIsBackward(dir) && offnum > start) {
- ItemPointerSet(current, blkno, OffsetNumberPrev(offnum));
- return (true);
- }
-
- /* if we've hit end of scan we don't have to do any work */
- if (ScanDirectionIsForward(dir) && P_RIGHTMOST(opaque)) {
- return (false);
- } else if (ScanDirectionIsBackward(dir) && P_LEFTMOST(opaque)) {
- return (false);
- }
-
- /*
- * Okay, it's off the page; let _bt_step() do the hard work, and we'll
- * try to remember where we were. This is not guaranteed to work; this
- * is the only place in the code where concurrency can screw us up,
- * and it's because we want to be able to move in two directions in
- * the scan.
- */
-
- itemid = PageGetItemId(page, offnum);
- itemsz = ItemIdGetLength(itemid);
- btitem = (BTItem) PageGetItem(page, itemid);
- svitem = (BTItem) palloc(itemsz);
- memmove((char *) svitem, (char *) btitem, itemsz);
-
- if (_bt_step(scan, bufP, dir)) {
- pfree(svitem);
- return (true);
- }
-
- /* try to find our place again */
- *bufP = _bt_getbuf(scan->relation, blkno, BT_READ);
- page = BufferGetPage(*bufP);
- maxoff = PageGetMaxOffsetNumber(page);
-
- while (offnum <= maxoff) {
+ Page page;
+ BTPageOpaque opaque;
+ OffsetNumber offnum,
+ maxoff;
+ OffsetNumber start;
+ ItemPointer current;
+ ItemId itemid;
+ int itemsz;
+ BTItem btitem;
+ BTItem svitem;
+ BlockNumber blkno;
+
+ blkno = BufferGetBlockNumber(*bufP);
+ page = BufferGetPage(*bufP);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ maxoff = PageGetMaxOffsetNumber(page);
+ current = &(scan->currentItemData);
+ offnum = ItemPointerGetOffsetNumber(current);
+
+ start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+ /* if we're safe, just do it */
+ if (ScanDirectionIsForward(dir) && offnum < maxoff)
+ { /* XXX PageIsEmpty? */
+ ItemPointerSet(current, blkno, OffsetNumberNext(offnum));
+ return (true);
+ }
+ else if (ScanDirectionIsBackward(dir) && offnum > start)
+ {
+ ItemPointerSet(current, blkno, OffsetNumberPrev(offnum));
+ return (true);
+ }
+
+ /* if we've hit end of scan we don't have to do any work */
+ if (ScanDirectionIsForward(dir) && P_RIGHTMOST(opaque))
+ {
+ return (false);
+ }
+ else if (ScanDirectionIsBackward(dir) && P_LEFTMOST(opaque))
+ {
+ return (false);
+ }
+
+ /*
+ * Okay, it's off the page; let _bt_step() do the hard work, and we'll
+ * try to remember where we were. This is not guaranteed to work;
+ * this is the only place in the code where concurrency can screw us
+ * up, and it's because we want to be able to move in two directions
+ * in the scan.
+ */
+
itemid = PageGetItemId(page, offnum);
+ itemsz = ItemIdGetLength(itemid);
btitem = (BTItem) PageGetItem(page, itemid);
- if ( BTItemSame (btitem, svitem) ) {
- pfree(svitem);
- ItemPointerSet(current, blkno, offnum);
- return (false);
+ svitem = (BTItem) palloc(itemsz);
+ memmove((char *) svitem, (char *) btitem, itemsz);
+
+ if (_bt_step(scan, bufP, dir))
+ {
+ pfree(svitem);
+ return (true);
+ }
+
+ /* try to find our place again */
+ *bufP = _bt_getbuf(scan->relation, blkno, BT_READ);
+ page = BufferGetPage(*bufP);
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ while (offnum <= maxoff)
+ {
+ itemid = PageGetItemId(page, offnum);
+ btitem = (BTItem) PageGetItem(page, itemid);
+ if (BTItemSame(btitem, svitem))
+ {
+ pfree(svitem);
+ ItemPointerSet(current, blkno, offnum);
+ return (false);
+ }
}
- }
-
- /*
- * XXX crash and burn -- can't find our place. We can be a little
- * smarter -- walk to the next page to the right, for example, since
- * that's the only direction that splits happen in. Deletions screw
- * us up less often since they're only done by the vacuum daemon.
- */
-
- elog(WARN, "btree synchronization error: concurrent update botched scan");
-
- return (false);
+
+ /*
+ * XXX crash and burn -- can't find our place. We can be a little
+ * smarter -- walk to the next page to the right, for example, since
+ * that's the only direction that splits happen in. Deletions screw
+ * us up less often since they're only done by the vacuum daemon.
+ */
+
+ elog(WARN, "btree synchronization error: concurrent update botched scan");
+
+ return (false);
}
/*
- * _bt_endpoint() -- Find the first or last key in the index.
+ * _bt_endpoint() -- Find the first or last key in the index.
*/
-static RetrieveIndexResult
+static RetrieveIndexResult
_bt_endpoint(IndexScanDesc scan, ScanDirection dir)
{
- Relation rel;
- Buffer buf;
- Page page;
- BTPageOpaque opaque;
- ItemPointer current;
- OffsetNumber offnum, maxoff;
- OffsetNumber start = 0;
- BlockNumber blkno;
- BTItem btitem;
- IndexTuple itup;
- BTScanOpaque so;
- RetrieveIndexResult res;
- Size keysok;
-
- rel = scan->relation;
- current = &(scan->currentItemData);
- so = (BTScanOpaque) scan->opaque;
-
- buf = _bt_getroot(rel, BT_READ);
- blkno = BufferGetBlockNumber(buf);
- page = BufferGetPage(buf);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-
- for (;;) {
- if (opaque->btpo_flags & BTP_LEAF)
- break;
-
- if (ScanDirectionIsForward(dir)) {
- offnum = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
- } else {
- offnum = PageGetMaxOffsetNumber(page);
- }
-
- btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &(btitem->bti_itup);
-
- blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
-
- _bt_relbuf(rel, buf, BT_READ);
- buf = _bt_getbuf(rel, blkno, BT_READ);
+ Relation rel;
+ Buffer buf;
+ Page page;
+ BTPageOpaque opaque;
+ ItemPointer current;
+ OffsetNumber offnum,
+ maxoff;
+ OffsetNumber start = 0;
+ BlockNumber blkno;
+ BTItem btitem;
+ IndexTuple itup;
+ BTScanOpaque so;
+ RetrieveIndexResult res;
+ Size keysok;
+
+ rel = scan->relation;
+ current = &(scan->currentItemData);
+ so = (BTScanOpaque) scan->opaque;
+
+ buf = _bt_getroot(rel, BT_READ);
+ blkno = BufferGetBlockNumber(buf);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-
- /*
- * Race condition: If the child page we just stepped onto is
- * in the process of being split, we need to make sure we're
- * all the way at the right edge of the tree. See the paper
- * by Lehman and Yao.
- */
-
- if (ScanDirectionIsBackward(dir) && ! P_RIGHTMOST(opaque)) {
- do {
- blkno = opaque->btpo_next;
+
+ for (;;)
+ {
+ if (opaque->btpo_flags & BTP_LEAF)
+ break;
+
+ if (ScanDirectionIsForward(dir))
+ {
+ offnum = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+ }
+ else
+ {
+ offnum = PageGetMaxOffsetNumber(page);
+ }
+
+ btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+ itup = &(btitem->bti_itup);
+
+ blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
+
_bt_relbuf(rel, buf, BT_READ);
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- } while (! P_RIGHTMOST(opaque));
+
+ /*
+ * Race condition: If the child page we just stepped onto is in
+ * the process of being split, we need to make sure we're all the
+ * way at the right edge of the tree. See the paper by Lehman and
+ * Yao.
+ */
+
+ if (ScanDirectionIsBackward(dir) && !P_RIGHTMOST(opaque))
+ {
+ do
+ {
+ blkno = opaque->btpo_next;
+ _bt_relbuf(rel, buf, BT_READ);
+ buf = _bt_getbuf(rel, blkno, BT_READ);
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ } while (!P_RIGHTMOST(opaque));
+ }
}
- }
-
- /* okay, we've got the {left,right}-most page in the tree */
- maxoff = PageGetMaxOffsetNumber(page);
-
- if (ScanDirectionIsForward(dir)) {
- if ( !P_LEFTMOST(opaque) ) /* non-leftmost page ? */
- elog (WARN, "_bt_endpoint: leftmost page (%u) has not leftmost flag", blkno);
- start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
- /*
- * I don't understand this stuff! It doesn't work for non-rightmost
- * pages with only one element (P_HIKEY) which we have after
- * deletion itups by vacuum (it's case of start > maxoff).
- * Scanning in BackwardScanDirection is not understandable at all.
- * Well - new stuff. - vadim 12/06/96
- */
+
+ /* okay, we've got the {left,right}-most page in the tree */
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ if (ScanDirectionIsForward(dir))
+ {
+ if (!P_LEFTMOST(opaque))/* non-leftmost page ? */
+ elog(WARN, "_bt_endpoint: leftmost page (%u) has not leftmost flag", blkno);
+ start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+ /*
+ * I don't understand this stuff! It doesn't work for
+ * non-rightmost pages with only one element (P_HIKEY) which we
+ * have after deletion itups by vacuum (it's case of start >
+ * maxoff). Scanning in BackwardScanDirection is not
+ * understandable at all. Well - new stuff. - vadim 12/06/96
+ */
#if 0
- if (PageIsEmpty(page) || start > maxoff) {
- ItemPointerSet(current, blkno, maxoff);
- if (!_bt_step(scan, &buf, BackwardScanDirection))
- return ((RetrieveIndexResult) NULL);
-
- start = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- }
+ if (PageIsEmpty(page) || start > maxoff)
+ {
+ ItemPointerSet(current, blkno, maxoff);
+ if (!_bt_step(scan, &buf, BackwardScanDirection))
+ return ((RetrieveIndexResult) NULL);
+
+ start = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ }
#endif
- if ( PageIsEmpty (page) )
+ if (PageIsEmpty(page))
+ {
+ if (start != P_HIKEY) /* non-rightmost page */
+ elog(WARN, "_bt_endpoint: non-rightmost page (%u) is empty", blkno);
+
+ /*
+ * It's left- & right- most page - root page, - and it's
+ * empty...
+ */
+ _bt_relbuf(rel, buf, BT_READ);
+ ItemPointerSetInvalid(current);
+ so->btso_curbuf = InvalidBuffer;
+ return ((RetrieveIndexResult) NULL);
+ }
+ if (start > maxoff) /* start == 2 && maxoff == 1 */
+ {
+ ItemPointerSet(current, blkno, maxoff);
+ if (!_bt_step(scan, &buf, ForwardScanDirection))
+ return ((RetrieveIndexResult) NULL);
+
+ start = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ }
+ /* new stuff ends here */
+ else
+ {
+ ItemPointerSet(current, blkno, start);
+ }
+ }
+ else if (ScanDirectionIsBackward(dir))
{
- if ( start != P_HIKEY ) /* non-rightmost page */
- elog (WARN, "_bt_endpoint: non-rightmost page (%u) is empty", blkno);
- /* It's left- & right- most page - root page, - and it's empty... */
- _bt_relbuf(rel, buf, BT_READ);
- ItemPointerSetInvalid(current);
- so->btso_curbuf = InvalidBuffer;
- return ((RetrieveIndexResult) NULL);
+
+ /*
+ * I don't understand this stuff too! If RIGHT-most leaf page is
+ * empty why do scanning in ForwardScanDirection ??? Well - new
+ * stuff. - vadim 12/06/96
+ */
+#if 0
+ if (PageIsEmpty(page))
+ {
+ ItemPointerSet(current, blkno, FirstOffsetNumber);
+ if (!_bt_step(scan, &buf, ForwardScanDirection))
+ return ((RetrieveIndexResult) NULL);
+
+ start = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ }
+#endif
+ if (PageIsEmpty(page))
+ {
+ /* If it's leftmost page too - it's empty root page... */
+ if (P_LEFTMOST(opaque))
+ {
+ _bt_relbuf(rel, buf, BT_READ);
+ ItemPointerSetInvalid(current);
+ so->btso_curbuf = InvalidBuffer;
+ return ((RetrieveIndexResult) NULL);
+ }
+ /* Go back ! */
+ ItemPointerSet(current, blkno, FirstOffsetNumber);
+ if (!_bt_step(scan, &buf, BackwardScanDirection))
+ return ((RetrieveIndexResult) NULL);
+
+ start = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ }
+ /* new stuff ends here */
+ else
+ {
+ start = PageGetMaxOffsetNumber(page);
+ ItemPointerSet(current, blkno, start);
+ }
}
- if ( start > maxoff ) /* start == 2 && maxoff == 1 */
+ else
{
- ItemPointerSet(current, blkno, maxoff);
- if (!_bt_step(scan, &buf, ForwardScanDirection))
- return ((RetrieveIndexResult) NULL);
-
- start = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
+ elog(WARN, "Illegal scan direction %d", dir);
}
- /* new stuff ends here */
- else {
- ItemPointerSet(current, blkno, start);
+
+ btitem = (BTItem) PageGetItem(page, PageGetItemId(page, start));
+ itup = &(btitem->bti_itup);
+
+ /* see if we picked a winner */
+ if (_bt_checkkeys(scan, itup, &keysok))
+ {
+ res = FormRetrieveIndexResult(current, &(itup->t_tid));
+
+ /* remember which buffer we have pinned */
+ so->btso_curbuf = buf;
}
- } else if (ScanDirectionIsBackward(dir)) {
- /*
- * I don't understand this stuff too! If RIGHT-most leaf page is
- * empty why do scanning in ForwardScanDirection ???
- * Well - new stuff. - vadim 12/06/96
- */
-#if 0
- if (PageIsEmpty(page)) {
- ItemPointerSet(current, blkno, FirstOffsetNumber);
- if (!_bt_step(scan, &buf, ForwardScanDirection))
- return ((RetrieveIndexResult) NULL);
-
- start = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
+ else if (keysok >= so->numberOfFirstKeys)
+ {
+ so->btso_curbuf = buf;
+ return (_bt_next(scan, dir));
}
-#endif
- if (PageIsEmpty(page))
+ else
{
- /* If it's leftmost page too - it's empty root page... */
- if ( P_LEFTMOST(opaque) )
- {
- _bt_relbuf(rel, buf, BT_READ);
ItemPointerSetInvalid(current);
so->btso_curbuf = InvalidBuffer;
- return ((RetrieveIndexResult) NULL);
- }
- /* Go back ! */
- ItemPointerSet(current, blkno, FirstOffsetNumber);
- if (!_bt_step(scan, &buf, BackwardScanDirection))
- return ((RetrieveIndexResult) NULL);
-
- start = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- }
- /* new stuff ends here */
- else {
- start = PageGetMaxOffsetNumber(page);
- ItemPointerSet(current, blkno, start);
+ _bt_relbuf(rel, buf, BT_READ);
+ res = (RetrieveIndexResult) NULL;
}
- } else {
- elog(WARN, "Illegal scan direction %d", dir);
- }
-
- btitem = (BTItem) PageGetItem(page, PageGetItemId(page, start));
- itup = &(btitem->bti_itup);
-
- /* see if we picked a winner */
- if ( _bt_checkkeys (scan, itup, &keysok) )
- {
- res = FormRetrieveIndexResult(current, &(itup->t_tid));
-
- /* remember which buffer we have pinned */
- so->btso_curbuf = buf;
- }
- else if ( keysok >= so->numberOfFirstKeys )
- {
- so->btso_curbuf = buf;
- return (_bt_next (scan, dir));
- }
- else
- {
- ItemPointerSetInvalid(current);
- so->btso_curbuf = InvalidBuffer;
- _bt_relbuf(rel, buf, BT_READ);
- res = (RetrieveIndexResult) NULL;
- }
-
- return (res);
+
+ return (res);
}