summaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtsearch.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtsearch.c')
-rw-r--r--src/backend/access/nbtree/nbtsearch.c279
1 files changed, 137 insertions, 142 deletions
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index c029824fa6f..06075dd3dda 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.94 2005/10/06 02:29:12 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.95 2005/10/15 02:49:09 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -69,9 +69,9 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
BTStack new_stack;
/*
- * Race -- the page we just grabbed may have split since we read
- * its pointer in the parent (or metapage). If it has, we may
- * need to move right to its new sibling. Do that.
+ * Race -- the page we just grabbed may have split since we read its
+ * pointer in the parent (or metapage). If it has, we may need to
+ * move right to its new sibling. Do that.
*/
*bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey, BT_READ);
@@ -82,8 +82,8 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
break;
/*
- * Find the appropriate item on the internal page, and get the
- * child page that it points to.
+ * Find the appropriate item on the internal page, and get the child
+ * page that it points to.
*/
offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey);
itemid = PageGetItemId(page, offnum);
@@ -94,13 +94,13 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
/*
* We need to save the location of the index entry we chose in the
- * parent page on a stack. In case we split the tree, we'll use
- * the stack to work back up to the parent page. We also save the
- * actual downlink (TID) to uniquely identify the index entry, in
- * case it moves right while we're working lower in the tree. See
- * the paper by Lehman and Yao for how this is detected and
- * handled. (We use the child link to disambiguate duplicate keys
- * in the index -- Lehman and Yao disallow duplicate keys.)
+ * parent page on a stack. In case we split the tree, we'll use the
+ * stack to work back up to the parent page. We also save the actual
+ * downlink (TID) to uniquely identify the index entry, in case it
+ * moves right while we're working lower in the tree. See the paper
+ * by Lehman and Yao for how this is detected and handled. (We use the
+ * child link to disambiguate duplicate keys in the index -- Lehman
+ * and Yao disallow duplicate keys.)
*/
new_stack = (BTStack) palloc(sizeof(BTStackData));
new_stack->bts_blkno = par_blkno;
@@ -156,19 +156,18 @@ _bt_moveright(Relation rel,
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/*
- * When nextkey = false (normal case): if the scan key that brought us
- * to this page is > the high key stored on the page, then the page
- * has split and we need to move right. (If the scan key is equal to
- * the high key, we might or might not need to move right; have to
- * scan the page first anyway.)
+ * When nextkey = false (normal case): if the scan key that brought us to
+ * this page is > the high key stored on the page, then the page has split
+ * and we need to move right. (If the scan key is equal to the high key,
+ * we might or might not need to move right; have to scan the page first
+ * anyway.)
*
* When nextkey = true: move right if the scan key is >= page's high key.
*
- * The page could even have split more than once, so scan as far as
- * needed.
+ * The page could even have split more than once, so scan as far as needed.
*
- * We also have to move right if we followed a link that brought us to a
- * dead page.
+ * We also have to move right if we followed a link that brought us to a dead
+ * page.
*/
cmpval = nextkey ? 0 : 1;
@@ -242,24 +241,24 @@ _bt_binsrch(Relation rel,
high = PageGetMaxOffsetNumber(page);
/*
- * If there are no keys on the page, return the first available slot.
- * Note this covers two cases: the page is really empty (no keys), or
- * it contains only a high key. The latter case is possible after
- * vacuuming. This can never happen on an internal page, however,
- * since they are never empty (an internal page must have children).
+ * If there are no keys on the page, return the first available slot. Note
+ * this covers two cases: the page is really empty (no keys), or it
+ * contains only a high key. The latter case is possible after vacuuming.
+ * This can never happen on an internal page, however, since they are
+ * never empty (an internal page must have children).
*/
if (high < low)
return low;
/*
- * Binary search to find the first key on the page >= scan key, or
- * first key > scankey when nextkey is true.
+ * Binary search to find the first key on the page >= scan key, or first
+ * key > scankey when nextkey is true.
*
* For nextkey=false (cmpval=1), the loop invariant is: all slots before
* 'low' are < scan key, all slots at or after 'high' are >= scan key.
*
- * For nextkey=true (cmpval=0), the loop invariant is: all slots before
- * 'low' are <= scan key, all slots at or after 'high' are > scan key.
+ * For nextkey=true (cmpval=0), the loop invariant is: all slots before 'low'
+ * are <= scan key, all slots at or after 'high' are > scan key.
*
* We can fall out when high == low.
*/
@@ -285,15 +284,15 @@ _bt_binsrch(Relation rel,
* At this point we have high == low, but be careful: they could point
* past the last slot on the page.
*
- * On a leaf page, we always return the first key >= scan key (resp. >
- * scan key), which could be the last slot + 1.
+ * On a leaf page, we always return the first key >= scan key (resp. > scan
+ * key), which could be the last slot + 1.
*/
if (P_ISLEAF(opaque))
return low;
/*
- * On a non-leaf page, return the last key < scan key (resp. <= scan
- * key). There must be one if _bt_compare() is playing by the rules.
+ * On a non-leaf page, return the last key < scan key (resp. <= scan key).
+ * There must be one if _bt_compare() is playing by the rules.
*/
Assert(low > P_FIRSTDATAKEY(opaque));
@@ -337,8 +336,8 @@ _bt_compare(Relation rel,
int i;
/*
- * Force result ">" if target item is first data item on an internal
- * page --- see NOTE above.
+ * Force result ">" if target item is first data item on an internal page
+ * --- see NOTE above.
*/
if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
return 1;
@@ -347,15 +346,15 @@ _bt_compare(Relation rel,
itup = &(btitem->bti_itup);
/*
- * The scan key is set up with the attribute number associated with
- * each term in the key. It is important that, if the index is
- * multi-key, the scan contain the first k key attributes, and that
- * they be in order. If you think about how multi-key ordering works,
- * you'll understand why this is.
+ * The scan key is set up with the attribute number associated with each
+ * term in the key. It is important that, if the index is multi-key, the
+ * scan contain the first k key attributes, and that they be in order. If
+ * you think about how multi-key ordering works, you'll understand why
+ * this is.
*
- * We don't test for violation of this condition here, however. The
- * initial setup for the index scan had better have gotten it right
- * (see _bt_first).
+ * We don't test for violation of this condition here, however. The initial
+ * setup for the index scan had better have gotten it right (see
+ * _bt_first).
*/
for (i = 1; i <= keysz; i++)
@@ -381,15 +380,15 @@ _bt_compare(Relation rel,
else
{
/*
- * The sk_func needs to be passed the index value as left arg
- * and the sk_argument as right arg (they might be of
- * different types). Since it is convenient for callers to
- * think of _bt_compare as comparing the scankey to the index
- * item, we have to flip the sign of the comparison result.
+ * The sk_func needs to be passed the index value as left arg and
+ * the sk_argument as right arg (they might be of different
+ * types). Since it is convenient for callers to think of
+ * _bt_compare as comparing the scankey to the index item, we have
+ * to flip the sign of the comparison result.
*
- * Note: curious-looking coding is to avoid overflow if
- * comparison function returns INT_MIN. There is no risk of
- * overflow for positive results.
+ * Note: curious-looking coding is to avoid overflow if comparison
+ * function returns INT_MIN. There is no risk of overflow for
+ * positive results.
*/
result = DatumGetInt32(FunctionCall2(&scankey->sk_func,
datum,
@@ -497,7 +496,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
bool goback;
bool continuescan;
ScanKey startKeys[INDEX_MAX_KEYS];
- ScanKeyData scankeys[INDEX_MAX_KEYS];
+ ScanKeyData scankeys[INDEX_MAX_KEYS];
int keysCount = 0;
int i;
StrategyNumber strat_total;
@@ -505,8 +504,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
pgstat_count_index_scan(&scan->xs_pgstat_info);
/*
- * Examine the scan keys and eliminate any redundant keys; also
- * discover how many keys must be matched to continue the scan.
+ * Examine the scan keys and eliminate any redundant keys; also discover
+ * how many keys must be matched to continue the scan.
*/
_bt_preprocess_keys(scan);
@@ -556,9 +555,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
ScanKey cur;
/*
- * chosen is the so-far-chosen key for the current attribute, if
- * any. We don't cast the decision in stone until we reach keys
- * for the next attribute.
+ * chosen is the so-far-chosen key for the current attribute, if any.
+ * We don't cast the decision in stone until we reach keys for the
+ * next attribute.
*/
curattr = 1;
chosen = NULL;
@@ -595,9 +594,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
}
/*
- * Done if that was the last attribute, or if next key
- * is not in sequence (implying no boundary key is available
- * for the next attribute).
+ * Done if that was the last attribute, or if next key is not
+ * in sequence (implying no boundary key is available for the
+ * next attribute).
*/
if (i >= so->numberOfKeys ||
cur->sk_attno != curattr + 1)
@@ -632,17 +631,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
}
/*
- * If we found no usable boundary keys, we have to start from one end
- * of the tree. Walk down that edge to the first or last key, and
- * scan from there.
+ * If we found no usable boundary keys, we have to start from one end of
+ * the tree. Walk down that edge to the first or last key, and scan from
+ * there.
*/
if (keysCount == 0)
return _bt_endpoint(scan, dir);
/*
* We want to start the scan somewhere within the index. Set up a
- * 3-way-comparison scankey we can use to search for the boundary
- * point we identified above.
+ * 3-way-comparison scankey we can use to search for the boundary point we
+ * identified above.
*/
Assert(keysCount <= INDEX_MAX_KEYS);
for (i = 0; i < keysCount; i++)
@@ -650,16 +649,15 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
ScanKey cur = startKeys[i];
/*
- * _bt_preprocess_keys disallows it, but it's place to add some
- * code later
+ * _bt_preprocess_keys disallows it, but it's place to add some code
+ * later
*/
if (cur->sk_flags & SK_ISNULL)
elog(ERROR, "btree doesn't support is(not)null, yet");
/*
- * If scankey operator is of default subtype, we can use the
- * cached comparison procedure; otherwise gotta look it up in the
- * catalogs.
+ * If scankey operator is of default subtype, we can use the cached
+ * comparison procedure; otherwise gotta look it up in the catalogs.
*/
if (cur->sk_subtype == InvalidOid)
{
@@ -692,13 +690,13 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
}
/*
- * Examine the selected initial-positioning strategy to determine
- * exactly where we need to start the scan, and set flag variables to
- * control the code below.
+ * Examine the selected initial-positioning strategy to determine exactly
+ * where we need to start the scan, and set flag variables to control the
+ * code below.
*
- * If nextkey = false, _bt_search and _bt_binsrch will locate the first
- * item >= scan key. If nextkey = true, they will locate the first
- * item > scan key.
+ * If nextkey = false, _bt_search and _bt_binsrch will locate the first item
+ * >= scan key. If nextkey = true, they will locate the first item > scan
+ * key.
*
* If goback = true, we will then step back one item, while if goback =
* false, we will start the scan on the located item.
@@ -710,10 +708,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
case BTLessStrategyNumber:
/*
- * Find first item >= scankey, then back up one to arrive at
- * last item < scankey. (Note: this positioning strategy is
- * only used for a backward scan, so that is always the
- * correct starting position.)
+ * Find first item >= scankey, then back up one to arrive at last
+ * item < scankey. (Note: this positioning strategy is only used
+ * for a backward scan, so that is always the correct starting
+ * position.)
*/
nextkey = false;
goback = true;
@@ -722,10 +720,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
case BTLessEqualStrategyNumber:
/*
- * Find first item > scankey, then back up one to arrive at
- * last item <= scankey. (Note: this positioning strategy is
- * only used for a backward scan, so that is always the
- * correct starting position.)
+ * Find first item > scankey, then back up one to arrive at last
+ * item <= scankey. (Note: this positioning strategy is only used
+ * for a backward scan, so that is always the correct starting
+ * position.)
*/
nextkey = true;
goback = true;
@@ -734,14 +732,14 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
case BTEqualStrategyNumber:
/*
- * If a backward scan was specified, need to start with last
- * equal item not first one.
+ * If a backward scan was specified, need to start with last equal
+ * item not first one.
*/
if (ScanDirectionIsBackward(dir))
{
/*
- * This is the same as the <= strategy. We will check at
- * the end whether the found item is actually =.
+ * This is the same as the <= strategy. We will check at the
+ * end whether the found item is actually =.
*/
nextkey = true;
goback = true;
@@ -749,8 +747,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
else
{
/*
- * This is the same as the >= strategy. We will check at
- * the end whether the found item is actually =.
+ * This is the same as the >= strategy. We will check at the
+ * end whether the found item is actually =.
*/
nextkey = false;
goback = false;
@@ -813,24 +811,24 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
ItemPointerSet(current, blkno, offnum);
/*
- * If nextkey = false, we are positioned at the first item >= scan
- * key, or possibly at the end of a page on which all the existing
- * items are less than the scan key and we know that everything on
- * later pages is greater than or equal to scan key.
+ * If nextkey = false, we are positioned at the first item >= scan key, or
+ * possibly at the end of a page on which all the existing items are less
+ * than the scan key and we know that everything on later pages is greater
+ * than or equal to scan key.
*
* If nextkey = true, we are positioned at the first item > scan key, or
- * possibly at the end of a page on which all the existing items are
- * less than or equal to the scan key and we know that everything on
- * later pages is greater than scan key.
+ * possibly at the end of a page on which all the existing items are less
+ * than or equal to the scan key and we know that everything on later
+ * pages is greater than scan key.
*
- * The actually desired starting point is either this item or the prior
- * one, or in the end-of-page case it's the first item on the next
- * page or the last item on this page. We apply _bt_step if needed to
- * get to the right place.
+ * The actually desired starting point is either this item or the prior one,
+ * or in the end-of-page case it's the first item on the next page or the
+ * last item on this page. We apply _bt_step if needed to get to the
+ * right place.
*
* If _bt_step fails (meaning we fell off the end of the index in one
- * direction or the other), then there are no matches so we just
- * return false.
+ * direction or the other), then there are no matches so we just return
+ * false.
*/
if (goback)
{
@@ -902,8 +900,8 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
BlockNumber blkno;
/*
- * Don't use ItemPointerGetOffsetNumber or you risk to get assertion
- * due to ability of ip_posid to be equal 0.
+ * Don't use ItemPointerGetOffsetNumber or you risk to get assertion due
+ * to ability of ip_posid to be equal 0.
*/
offnum = current->ip_posid;
@@ -954,9 +952,9 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
/*
* Walk left to the next page with data. This is much more
* complex than the walk-right case because of the possibility
- * that the page to our left splits while we are in flight to
- * it, plus the possibility that the page we were on gets
- * deleted after we leave it. See nbtree/README for details.
+ * that the page to our left splits while we are in flight to it,
+ * plus the possibility that the page we were on gets deleted
+ * after we leave it. See nbtree/README for details.
*/
for (;;)
{
@@ -973,9 +971,9 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/*
- * Okay, we managed to move left to a non-deleted page.
- * Done if it's not half-dead and not empty. Else loop
- * back and do it all again.
+ * Okay, we managed to move left to a non-deleted page. Done
+ * if it's not half-dead and not empty. Else loop back and do
+ * it all again.
*/
if (!P_IGNORE(opaque))
{
@@ -1043,15 +1041,14 @@ _bt_walk_left(Relation rel, Buffer buf)
/*
* If this isn't the page we want, walk right till we find what we
- * want --- but go no more than four hops (an arbitrary limit). If
- * we don't find the correct page by then, the most likely bet is
- * that the original page got deleted and isn't in the sibling
- * chain at all anymore, not that its left sibling got split more
- * than four times.
+ * want --- but go no more than four hops (an arbitrary limit). If we
+ * don't find the correct page by then, the most likely bet is that
+ * the original page got deleted and isn't in the sibling chain at all
+ * anymore, not that its left sibling got split more than four times.
*
- * Note that it is correct to test P_ISDELETED not P_IGNORE here,
- * because half-dead pages are still in the sibling chain. Caller
- * must reject half-dead pages if wanted.
+ * Note that it is correct to test P_ISDELETED not P_IGNORE here, because
+ * half-dead pages are still in the sibling chain. Caller must reject
+ * half-dead pages if wanted.
*/
tries = 0;
for (;;)
@@ -1077,9 +1074,9 @@ _bt_walk_left(Relation rel, Buffer buf)
{
/*
* It was deleted. Move right to first nondeleted page (there
- * must be one); that is the page that has acquired the
- * deleted one's keyspace, so stepping left from it will take
- * us where we want to be.
+ * must be one); that is the page that has acquired the deleted
+ * one's keyspace, so stepping left from it will take us where we
+ * want to be.
*/
for (;;)
{
@@ -1095,16 +1092,16 @@ _bt_walk_left(Relation rel, Buffer buf)
}
/*
- * Now return to top of loop, resetting obknum to point to
- * this nondeleted page, and try again.
+ * Now return to top of loop, resetting obknum to point to this
+ * nondeleted page, and try again.
*/
}
else
{
/*
- * It wasn't deleted; the explanation had better be that the
- * page to the left got split or deleted. Without this check,
- * we'd go into an infinite loop if there's anything wrong.
+ * It wasn't deleted; the explanation had better be that the page
+ * to the left got split or deleted. Without this check, we'd go
+ * into an infinite loop if there's anything wrong.
*/
if (opaque->btpo_prev == lblkno)
elog(ERROR, "could not find left sibling in \"%s\"",
@@ -1137,8 +1134,8 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
/*
* If we are looking for a leaf page, okay to descend from fast root;
- * otherwise better descend from true root. (There is no point in
- * being smarter about intermediate levels.)
+ * otherwise better descend from true root. (There is no point in being
+ * smarter about intermediate levels.)
*/
if (level == 0)
buf = _bt_getroot(rel, BT_READ);
@@ -1159,8 +1156,8 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
/*
* If we landed on a deleted page, step right to find a live page
* (there must be one). Also, if we want the rightmost page, step
- * right if needed to get to it (this could happen if the page
- * split since we obtained a pointer to it).
+ * right if needed to get to it (this could happen if the page split
+ * since we obtained a pointer to it).
*/
while (P_IGNORE(opaque) ||
(rightmost && !P_RIGHTMOST(opaque)))
@@ -1228,9 +1225,9 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
so = (BTScanOpaque) scan->opaque;
/*
- * Scan down to the leftmost or rightmost leaf page. This is a
- * simplified version of _bt_search(). We don't maintain a stack
- * since we know we won't need it.
+ * Scan down to the leftmost or rightmost leaf page. This is a simplified
+ * version of _bt_search(). We don't maintain a stack since we know we
+ * won't need it.
*/
buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));
@@ -1261,8 +1258,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
Assert(P_RIGHTMOST(opaque));
start = PageGetMaxOffsetNumber(page);
- if (start < P_FIRSTDATAKEY(opaque)) /* watch out for empty
- * page */
+ if (start < P_FIRSTDATAKEY(opaque)) /* watch out for empty page */
start = P_FIRSTDATAKEY(opaque);
}
else
@@ -1276,8 +1272,8 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
so->btso_curbuf = buf;
/*
- * Left/rightmost page could be empty due to deletions, if so step
- * till we find a nonempty page.
+ * Left/rightmost page could be empty due to deletions, if so step till we
+ * find a nonempty page.
*/
if (start > maxoff)
{
@@ -1291,8 +1287,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
itup = &(btitem->bti_itup);
/*
- * Okay, we are on the first or last tuple. Does it pass all the
- * quals?
+ * Okay, we are on the first or last tuple. Does it pass all the quals?
*/
if (_bt_checkkeys(scan, itup, dir, &continuescan))
{