diff options
Diffstat (limited to 'src/backend/access/nbtree/nbtpage.c')
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 902 |
1 files changed, 465 insertions, 437 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 9142c557378..6551af4c17c 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -1,21 +1,21 @@ /*------------------------------------------------------------------------- * * nbtpage.c-- - * BTree-specific page management code for the Postgres btree access - * method. + * BTree-specific page management code for the Postgres btree access + * method. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.9 1997/08/19 21:29:36 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.10 1997/09/07 04:38:52 momjian Exp $ * - * NOTES - * Postgres btree pages look like ordinary relation pages. The opaque - * data at high addresses includes pointers to left and right siblings - * and flag data describing page state. The first page in a btree, page - * zero, is special -- it stores meta-information describing the tree. - * Pages one and higher store the actual tree data. + * NOTES + * Postgres btree pages look like ordinary relation pages. The opaque + * data at high addresses includes pointers to left and right siblings + * and flag data describing page state. The first page in a btree, page + * zero, is special -- it stores meta-information describing the tree. + * Pages one and higher store the actual tree data. * *------------------------------------------------------------------------- */ @@ -31,16 +31,16 @@ #include <storage/lmgr.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static void _bt_setpagelock(Relation rel, BlockNumber blkno, int access); -static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access); +static void _bt_setpagelock(Relation rel, BlockNumber blkno, int access); +static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access); #define BTREE_METAPAGE 0 -#define BTREE_MAGIC 0x053162 +#define BTREE_MAGIC 0x053162 #ifdef BTREE_VERSION_1 #define BTREE_VERSION 1 @@ -48,546 +48,574 @@ static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access); #define BTREE_VERSION 0 #endif -typedef struct BTMetaPageData { - uint32 btm_magic; - uint32 btm_version; - BlockNumber btm_root; +typedef struct BTMetaPageData +{ + uint32 btm_magic; + uint32 btm_version; + BlockNumber btm_root; #ifdef BTREE_VERSION_1 - int32 btm_level; + int32 btm_level; #endif -} BTMetaPageData; +} BTMetaPageData; -#define BTPageGetMeta(p) \ - ((BTMetaPageData *) &((PageHeader) p)->pd_linp[0]) +#define BTPageGetMeta(p) \ + ((BTMetaPageData *) &((PageHeader) p)->pd_linp[0]) -extern bool BuildingBtree; +extern bool BuildingBtree; /* - * We use high-concurrency locking on btrees. There are two cases in - * which we don't do locking. One is when we're building the btree. - * Since the creating transaction has not committed, no one can see - * the index, and there's no reason to share locks. The second case - * is when we're just starting up the database system. We use some - * special-purpose initialization code in the relation cache manager - * (see utils/cache/relcache.c) to allow us to do indexed scans on - * the system catalogs before we'd normally be able to. This happens - * before the lock table is fully initialized, so we can't use it. - * Strictly speaking, this violates 2pl, but we don't do 2pl on the - * system catalogs anyway, so I declare this to be okay. + * We use high-concurrency locking on btrees. There are two cases in + * which we don't do locking. One is when we're building the btree. + * Since the creating transaction has not committed, no one can see + * the index, and there's no reason to share locks. The second case + * is when we're just starting up the database system. We use some + * special-purpose initialization code in the relation cache manager + * (see utils/cache/relcache.c) to allow us to do indexed scans on + * the system catalogs before we'd normally be able to. This happens + * before the lock table is fully initialized, so we can't use it. + * Strictly speaking, this violates 2pl, but we don't do 2pl on the + * system catalogs anyway, so I declare this to be okay. */ -#define USELOCKING (!BuildingBtree && !IsInitProcessingMode()) +#define USELOCKING (!BuildingBtree && !IsInitProcessingMode()) /* - * _bt_metapinit() -- Initialize the metadata page of a btree. + * _bt_metapinit() -- Initialize the metadata page of a btree. */ void _bt_metapinit(Relation rel) { - Buffer buf; - Page pg; - int nblocks; - BTMetaPageData metad; - BTPageOpaque op; - - /* can't be sharing this with anyone, now... */ - if (USELOCKING) - RelationSetLockForWrite(rel); - - if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) { - elog(WARN, "Cannot initialize non-empty btree %s", - RelationGetRelationName(rel)); - } - - buf = ReadBuffer(rel, P_NEW); - pg = BufferGetPage(buf); - _bt_pageinit(pg, BufferGetPageSize(buf)); - - metad.btm_magic = BTREE_MAGIC; - metad.btm_version = BTREE_VERSION; - metad.btm_root = P_NONE; + Buffer buf; + Page pg; + int nblocks; + BTMetaPageData metad; + BTPageOpaque op; + + /* can't be sharing this with anyone, now... */ + if (USELOCKING) + RelationSetLockForWrite(rel); + + if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) + { + elog(WARN, "Cannot initialize non-empty btree %s", + RelationGetRelationName(rel)); + } + + buf = ReadBuffer(rel, P_NEW); + pg = BufferGetPage(buf); + _bt_pageinit(pg, BufferGetPageSize(buf)); + + metad.btm_magic = BTREE_MAGIC; + metad.btm_version = BTREE_VERSION; + metad.btm_root = P_NONE; #ifdef BTREE_VERSION_1 - metad.btm_level = 0; + metad.btm_level = 0; #endif - memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad)); - - op = (BTPageOpaque) PageGetSpecialPointer(pg); - op->btpo_flags = BTP_META; - - WriteBuffer(buf); - - /* all done */ - if (USELOCKING) - RelationUnsetLockForWrite(rel); + memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad)); + + op = (BTPageOpaque) PageGetSpecialPointer(pg); + op->btpo_flags = BTP_META; + + WriteBuffer(buf); + + /* all done */ + if (USELOCKING) + RelationUnsetLockForWrite(rel); } #ifdef NOT_USED /* - * _bt_checkmeta() -- Verify that the metadata stored in a btree are - * reasonable. + * _bt_checkmeta() -- Verify that the metadata stored in a btree are + * reasonable. */ void _bt_checkmeta(Relation rel) { - Buffer metabuf; - Page metap; - BTMetaPageData *metad; - BTPageOpaque op; - int nblocks; - - /* if the relation is empty, this is init time; don't complain */ - if ((nblocks = RelationGetNumberOfBlocks(rel)) == 0) - return; - - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); - metap = BufferGetPage(metabuf); - op = (BTPageOpaque) PageGetSpecialPointer(metap); - if (!(op->btpo_flags & BTP_META)) { - elog(WARN, "Invalid metapage for index %s", - RelationGetRelationName(rel)); - } - metad = BTPageGetMeta(metap); - - if (metad->btm_magic != BTREE_MAGIC) { - elog(WARN, "Index %s is not a btree", - RelationGetRelationName(rel)); - } - - if (metad->btm_version != BTREE_VERSION) { - elog(WARN, "Version mismatch on %s: version %d file, version %d code", - RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION); - } - - _bt_relbuf(rel, metabuf, BT_READ); + Buffer metabuf; + Page metap; + BTMetaPageData *metad; + BTPageOpaque op; + int nblocks; + + /* if the relation is empty, this is init time; don't complain */ + if ((nblocks = RelationGetNumberOfBlocks(rel)) == 0) + return; + + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + metap = BufferGetPage(metabuf); + op = (BTPageOpaque) PageGetSpecialPointer(metap); + if (!(op->btpo_flags & BTP_META)) + { + elog(WARN, "Invalid metapage for index %s", + RelationGetRelationName(rel)); + } + metad = BTPageGetMeta(metap); + + if (metad->btm_magic != BTREE_MAGIC) + { + elog(WARN, "Index %s is not a btree", + RelationGetRelationName(rel)); + } + + if (metad->btm_version != BTREE_VERSION) + { + elog(WARN, "Version mismatch on %s: version %d file, version %d code", + RelationGetRelationName(rel), + metad->btm_version, BTREE_VERSION); + } + + _bt_relbuf(rel, metabuf, BT_READ); } + #endif /* - * _bt_getroot() -- Get the root page of the btree. + * _bt_getroot() -- Get the root page of the btree. * - * Since the root page can move around the btree file, we have to read - * its location from the metadata page, and then read the root page - * itself. If no root page exists yet, we have to create one. The - * standard class of race conditions exists here; I think I covered - * them all in the Hopi Indian rain dance of lock requests below. + * Since the root page can move around the btree file, we have to read + * its location from the metadata page, and then read the root page + * itself. If no root page exists yet, we have to create one. The + * standard class of race conditions exists here; I think I covered + * them all in the Hopi Indian rain dance of lock requests below. * - * We pass in the access type (BT_READ or BT_WRITE), and return the - * root page's buffer with the appropriate lock type set. Reference - * count on the root page gets bumped by ReadBuffer. The metadata - * page is unlocked and unreferenced by this process when this routine - * returns. + * We pass in the access type (BT_READ or BT_WRITE), and return the + * root page's buffer with the appropriate lock type set. Reference + * count on the root page gets bumped by ReadBuffer. The metadata + * page is unlocked and unreferenced by this process when this routine + * returns. */ Buffer _bt_getroot(Relation rel, int access) { - Buffer metabuf; - Page metapg; - BTPageOpaque metaopaque; - Buffer rootbuf; - Page rootpg; - BTPageOpaque rootopaque; - BlockNumber rootblkno; - BTMetaPageData *metad; - - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); - metapg = BufferGetPage(metabuf); - metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); - Assert(metaopaque->btpo_flags & BTP_META); - metad = BTPageGetMeta(metapg); - - if (metad->btm_magic != BTREE_MAGIC) { - elog(WARN, "Index %s is not a btree", - RelationGetRelationName(rel)); - } - - if (metad->btm_version != BTREE_VERSION) { - elog(WARN, "Version mismatch on %s: version %d file, version %d code", - RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION); - } - - /* if no root page initialized yet, do it */ - if (metad->btm_root == P_NONE) { - - /* turn our read lock in for a write lock */ - _bt_relbuf(rel, metabuf, BT_READ); - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + Buffer metabuf; + Page metapg; + BTPageOpaque metaopaque; + Buffer rootbuf; + Page rootpg; + BTPageOpaque rootopaque; + BlockNumber rootblkno; + BTMetaPageData *metad; + + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); metapg = BufferGetPage(metabuf); metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); Assert(metaopaque->btpo_flags & BTP_META); metad = BTPageGetMeta(metapg); - - /* - * Race condition: if someone else initialized the metadata between - * the time we released the read lock and acquired the write lock, - * above, we want to avoid doing it again. - */ - - if (metad->btm_root == P_NONE) { - - /* - * Get, initialize, write, and leave a lock of the appropriate - * type on the new root page. Since this is the first page in - * the tree, it's a leaf. - */ - - rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); - rootblkno = BufferGetBlockNumber(rootbuf); - rootpg = BufferGetPage(rootbuf); - metad->btm_root = rootblkno; + + if (metad->btm_magic != BTREE_MAGIC) + { + elog(WARN, "Index %s is not a btree", + RelationGetRelationName(rel)); + } + + if (metad->btm_version != BTREE_VERSION) + { + elog(WARN, "Version mismatch on %s: version %d file, version %d code", + RelationGetRelationName(rel), + metad->btm_version, BTREE_VERSION); + } + + /* if no root page initialized yet, do it */ + if (metad->btm_root == P_NONE) + { + + /* turn our read lock in for a write lock */ + _bt_relbuf(rel, metabuf, BT_READ); + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + metapg = BufferGetPage(metabuf); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); + Assert(metaopaque->btpo_flags & BTP_META); + metad = BTPageGetMeta(metapg); + + /* + * Race condition: if someone else initialized the metadata + * between the time we released the read lock and acquired the + * write lock, above, we want to avoid doing it again. + */ + + if (metad->btm_root == P_NONE) + { + + /* + * Get, initialize, write, and leave a lock of the appropriate + * type on the new root page. Since this is the first page in + * the tree, it's a leaf. + */ + + rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); + rootblkno = BufferGetBlockNumber(rootbuf); + rootpg = BufferGetPage(rootbuf); + metad->btm_root = rootblkno; #ifdef BTREE_VERSION_1 - metad->btm_level = 1; + metad->btm_level = 1; #endif - _bt_pageinit(rootpg, BufferGetPageSize(rootbuf)); - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); - rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); - _bt_wrtnorelbuf(rel, rootbuf); - - /* swap write lock for read lock, if appropriate */ - if (access != BT_WRITE) { - _bt_setpagelock(rel, rootblkno, BT_READ); - _bt_unsetpagelock(rel, rootblkno, BT_WRITE); - } - - /* okay, metadata is correct */ - _bt_wrtbuf(rel, metabuf); - } else { - - /* - * Metadata initialized by someone else. In order to guarantee - * no deadlocks, we have to release the metadata page and start - * all over again. - */ - - _bt_relbuf(rel, metabuf, BT_WRITE); - return (_bt_getroot(rel, access)); + _bt_pageinit(rootpg, BufferGetPageSize(rootbuf)); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); + rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); + _bt_wrtnorelbuf(rel, rootbuf); + + /* swap write lock for read lock, if appropriate */ + if (access != BT_WRITE) + { + _bt_setpagelock(rel, rootblkno, BT_READ); + _bt_unsetpagelock(rel, rootblkno, BT_WRITE); + } + + /* okay, metadata is correct */ + _bt_wrtbuf(rel, metabuf); + } + else + { + + /* + * Metadata initialized by someone else. In order to + * guarantee no deadlocks, we have to release the metadata + * page and start all over again. + */ + + _bt_relbuf(rel, metabuf, BT_WRITE); + return (_bt_getroot(rel, access)); + } } - } else { - rootbuf = _bt_getbuf(rel, metad->btm_root, access); - - /* done with the meta page */ - _bt_relbuf(rel, metabuf, BT_READ); - } - - /* - * Race condition: If the root page split between the time we looked - * at the metadata page and got the root buffer, then we got the wrong - * buffer. - */ - - rootpg = BufferGetPage(rootbuf); - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); - if (!(rootopaque->btpo_flags & BTP_ROOT)) { - - /* it happened, try again */ - _bt_relbuf(rel, rootbuf, access); - return (_bt_getroot(rel, access)); - } - - /* - * By here, we have a correct lock on the root block, its reference - * count is correct, and we have no lock set on the metadata page. - * Return the root block. - */ - - return (rootbuf); + else + { + rootbuf = _bt_getbuf(rel, metad->btm_root, access); + + /* done with the meta page */ + _bt_relbuf(rel, metabuf, BT_READ); + } + + /* + * Race condition: If the root page split between the time we looked + * at the metadata page and got the root buffer, then we got the wrong + * buffer. + */ + + rootpg = BufferGetPage(rootbuf); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); + if (!(rootopaque->btpo_flags & BTP_ROOT)) + { + + /* it happened, try again */ + _bt_relbuf(rel, rootbuf, access); + return (_bt_getroot(rel, access)); + } + + /* + * By here, we have a correct lock on the root block, its reference + * count is correct, and we have no lock set on the metadata page. + * Return the root block. + */ + + return (rootbuf); } /* - * _bt_getbuf() -- Get a buffer by block number for read or write. + * _bt_getbuf() -- Get a buffer by block number for read or write. * - * When this routine returns, the appropriate lock is set on the - * requested buffer its reference count is correct. + * When this routine returns, the appropriate lock is set on the + * requested buffer its reference count is correct. */ Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access) { - Buffer buf; - Page page; - - /* - * If we want a new block, we can't set a lock of the appropriate type - * until we've instantiated the buffer. - */ - - if (blkno != P_NEW) { - if (access == BT_WRITE) - _bt_setpagelock(rel, blkno, BT_WRITE); - else - _bt_setpagelock(rel, blkno, BT_READ); - - buf = ReadBuffer(rel, blkno); - } else { - buf = ReadBuffer(rel, blkno); - blkno = BufferGetBlockNumber(buf); - page = BufferGetPage(buf); - _bt_pageinit(page, BufferGetPageSize(buf)); - - if (access == BT_WRITE) - _bt_setpagelock(rel, blkno, BT_WRITE); + Buffer buf; + Page page; + + /* + * If we want a new block, we can't set a lock of the appropriate type + * until we've instantiated the buffer. + */ + + if (blkno != P_NEW) + { + if (access == BT_WRITE) + _bt_setpagelock(rel, blkno, BT_WRITE); + else + _bt_setpagelock(rel, blkno, BT_READ); + + buf = ReadBuffer(rel, blkno); + } else - _bt_setpagelock(rel, blkno, BT_READ); - } - - /* ref count and lock type are correct */ - return (buf); + { + buf = ReadBuffer(rel, blkno); + blkno = BufferGetBlockNumber(buf); + page = BufferGetPage(buf); + _bt_pageinit(page, BufferGetPageSize(buf)); + + if (access == BT_WRITE) + _bt_setpagelock(rel, blkno, BT_WRITE); + else + _bt_setpagelock(rel, blkno, BT_READ); + } + + /* ref count and lock type are correct */ + return (buf); } /* - * _bt_relbuf() -- release a locked buffer. + * _bt_relbuf() -- release a locked buffer. */ void _bt_relbuf(Relation rel, Buffer buf, int access) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - - /* access had better be one of read or write */ - if (access == BT_WRITE) - _bt_unsetpagelock(rel, blkno, BT_WRITE); - else - _bt_unsetpagelock(rel, blkno, BT_READ); - - ReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + + /* access had better be one of read or write */ + if (access == BT_WRITE) + _bt_unsetpagelock(rel, blkno, BT_WRITE); + else + _bt_unsetpagelock(rel, blkno, BT_READ); + + ReleaseBuffer(buf); } /* - * _bt_wrtbuf() -- write a btree page to disk. + * _bt_wrtbuf() -- write a btree page to disk. * - * This routine releases the lock held on the buffer and our reference - * to it. It is an error to call _bt_wrtbuf() without a write lock - * or a reference to the buffer. + * This routine releases the lock held on the buffer and our reference + * to it. It is an error to call _bt_wrtbuf() without a write lock + * or a reference to the buffer. */ void _bt_wrtbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteBuffer(buf); - _bt_unsetpagelock(rel, blkno, BT_WRITE); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteBuffer(buf); + _bt_unsetpagelock(rel, blkno, BT_WRITE); } /* - * _bt_wrtnorelbuf() -- write a btree page to disk, but do not release - * our reference or lock. + * _bt_wrtnorelbuf() -- write a btree page to disk, but do not release + * our reference or lock. * - * It is an error to call _bt_wrtnorelbuf() without a write lock - * or a reference to the buffer. + * It is an error to call _bt_wrtnorelbuf() without a write lock + * or a reference to the buffer. */ void _bt_wrtnorelbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteNoReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteNoReleaseBuffer(buf); } /* - * _bt_pageinit() -- Initialize a new page. + * _bt_pageinit() -- Initialize a new page. */ void _bt_pageinit(Page page, Size size) { - /* - * Cargo-cult programming -- don't really need this to be zero, but - * creating new pages is an infrequent occurrence and it makes me feel - * good when I know they're empty. - */ - - memset(page, 0, size); - - PageInit(page, size, sizeof(BTPageOpaqueData)); + + /* + * Cargo-cult programming -- don't really need this to be zero, but + * creating new pages is an infrequent occurrence and it makes me feel + * good when I know they're empty. + */ + + memset(page, 0, size); + + PageInit(page, size, sizeof(BTPageOpaqueData)); } /* - * _bt_metaproot() -- Change the root page of the btree. + * _bt_metaproot() -- Change the root page of the btree. * - * Lehman and Yao require that the root page move around in order to - * guarantee deadlock-free short-term, fine-granularity locking. When - * we split the root page, we record the new parent in the metadata page - * for the relation. This routine does the work. + * Lehman and Yao require that the root page move around in order to + * guarantee deadlock-free short-term, fine-granularity locking. When + * we split the root page, we record the new parent in the metadata page + * for the relation. This routine does the work. * - * No direct preconditions, but if you don't have the a write lock on - * at least the old root page when you call this, you're making a big - * mistake. On exit, metapage data is correct and we no longer have - * a reference to or lock on the metapage. + * No direct preconditions, but if you don't have the a write lock on + * at least the old root page when you call this, you're making a big + * mistake. On exit, metapage data is correct and we no longer have + * a reference to or lock on the metapage. */ void _bt_metaproot(Relation rel, BlockNumber rootbknum, int level) { - Buffer metabuf; - Page metap; - BTPageOpaque metaopaque; - BTMetaPageData *metad; - - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); - metap = BufferGetPage(metabuf); - metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap); - Assert(metaopaque->btpo_flags & BTP_META); - metad = BTPageGetMeta(metap); - metad->btm_root = rootbknum; + Buffer metabuf; + Page metap; + BTPageOpaque metaopaque; + BTMetaPageData *metad; + + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + metap = BufferGetPage(metabuf); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap); + Assert(metaopaque->btpo_flags & BTP_META); + metad = BTPageGetMeta(metap); + metad->btm_root = rootbknum; #ifdef BTREE_VERSION_1 - if ( level == 0 ) /* called from _do_insert */ - metad->btm_level += 1; - else - metad->btm_level = level; /* called from btsort */ + if (level == 0) /* called from _do_insert */ + metad->btm_level += 1; + else + metad->btm_level = level; /* called from btsort */ #endif - _bt_wrtbuf(rel, metabuf); + _bt_wrtbuf(rel, metabuf); } /* - * _bt_getstackbuf() -- Walk back up the tree one step, and find the item - * we last looked at in the parent. + * _bt_getstackbuf() -- Walk back up the tree one step, and find the item + * we last looked at in the parent. * - * This is possible because we save a bit image of the last item - * we looked at in the parent, and the update algorithm guarantees - * that if items above us in the tree move, they only move right. + * This is possible because we save a bit image of the last item + * we looked at in the parent, and the update algorithm guarantees + * that if items above us in the tree move, they only move right. * - * Also, re-set bts_blkno & bts_offset if changed and - * bts_btitem (it may be changed - see _bt_insertonpg). + * Also, re-set bts_blkno & bts_offset if changed and + * bts_btitem (it may be changed - see _bt_insertonpg). */ Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access) { - Buffer buf; - BlockNumber blkno; - OffsetNumber start, offnum, maxoff; - OffsetNumber i; - Page page; - ItemId itemid; - BTItem item; - BTPageOpaque opaque; - BTItem item_save; - int item_nbytes; - - blkno = stack->bts_blkno; - buf = _bt_getbuf(rel, blkno, access); - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - - if (maxoff >= stack->bts_offset) { - itemid = PageGetItemId(page, stack->bts_offset); - item = (BTItem) PageGetItem(page, itemid); - - /* if the item is where we left it, we're done */ - if ( BTItemSame (item, stack->bts_btitem) ) - { - pfree(stack->bts_btitem); - item_nbytes = ItemIdGetLength(itemid); - item_save = (BTItem) palloc(item_nbytes); - memmove((char *) item_save, (char *) item, item_nbytes); - stack->bts_btitem = item_save; - return (buf); - } - - /* if the item has just moved right on this page, we're done */ - for (i = OffsetNumberNext(stack->bts_offset); - i <= maxoff; - i = OffsetNumberNext(i)) { - itemid = PageGetItemId(page, i); - item = (BTItem) PageGetItem(page, itemid); - - /* if the item is where we left it, we're done */ - if ( BTItemSame (item, stack->bts_btitem) ) - { - stack->bts_offset = i; - pfree(stack->bts_btitem); - item_nbytes = ItemIdGetLength(itemid); - item_save = (BTItem) palloc(item_nbytes); - memmove((char *) item_save, (char *) item, item_nbytes); - stack->bts_btitem = item_save; - return (buf); - } - } - } - - /* by here, the item we're looking for moved right at least one page */ - for (;;) { - blkno = opaque->btpo_next; - if (P_RIGHTMOST(opaque)) - elog(FATAL, "my bits moved right off the end of the world!"); - - _bt_relbuf(rel, buf, access); + Buffer buf; + BlockNumber blkno; + OffsetNumber start, + offnum, + maxoff; + OffsetNumber i; + Page page; + ItemId itemid; + BTItem item; + BTPageOpaque opaque; + BTItem item_save; + int item_nbytes; + + blkno = stack->bts_blkno; buf = _bt_getbuf(rel, blkno, access); page = BufferGetPage(buf); - maxoff = PageGetMaxOffsetNumber(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - /* if we have a right sibling, step over the high key */ - start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - /* see if it's on this page */ - for (offnum = start; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) { - itemid = PageGetItemId(page, offnum); - item = (BTItem) PageGetItem(page, itemid); - if ( BTItemSame (item, stack->bts_btitem) ) - { - stack->bts_offset = offnum; - stack->bts_blkno = blkno; - pfree(stack->bts_btitem); - item_nbytes = ItemIdGetLength(itemid); - item_save = (BTItem) palloc(item_nbytes); - memmove((char *) item_save, (char *) item, item_nbytes); - stack->bts_btitem = item_save; - return (buf); - } + maxoff = PageGetMaxOffsetNumber(page); + + if (maxoff >= stack->bts_offset) + { + itemid = PageGetItemId(page, stack->bts_offset); + item = (BTItem) PageGetItem(page, itemid); + + /* if the item is where we left it, we're done */ + if (BTItemSame(item, stack->bts_btitem)) + { + pfree(stack->bts_btitem); + item_nbytes = ItemIdGetLength(itemid); + item_save = (BTItem) palloc(item_nbytes); + memmove((char *) item_save, (char *) item, item_nbytes); + stack->bts_btitem = item_save; + return (buf); + } + + /* if the item has just moved right on this page, we're done */ + for (i = OffsetNumberNext(stack->bts_offset); + i <= maxoff; + i = OffsetNumberNext(i)) + { + itemid = PageGetItemId(page, i); + item = (BTItem) PageGetItem(page, itemid); + + /* if the item is where we left it, we're done */ + if (BTItemSame(item, stack->bts_btitem)) + { + stack->bts_offset = i; + pfree(stack->bts_btitem); + item_nbytes = ItemIdGetLength(itemid); + item_save = (BTItem) palloc(item_nbytes); + memmove((char *) item_save, (char *) item, item_nbytes); + stack->bts_btitem = item_save; + return (buf); + } + } + } + + /* by here, the item we're looking for moved right at least one page */ + for (;;) + { + blkno = opaque->btpo_next; + if (P_RIGHTMOST(opaque)) + elog(FATAL, "my bits moved right off the end of the world!"); + + _bt_relbuf(rel, buf, access); + buf = _bt_getbuf(rel, blkno, access); + page = BufferGetPage(buf); + maxoff = PageGetMaxOffsetNumber(page); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* if we have a right sibling, step over the high key */ + start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + /* see if it's on this page */ + for (offnum = start; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + item = (BTItem) PageGetItem(page, itemid); + if (BTItemSame(item, stack->bts_btitem)) + { + stack->bts_offset = offnum; + stack->bts_blkno = blkno; + pfree(stack->bts_btitem); + item_nbytes = ItemIdGetLength(itemid); + item_save = (BTItem) palloc(item_nbytes); + memmove((char *) item_save, (char *) item, item_nbytes); + stack->bts_btitem = item_save; + return (buf); + } + } } - } } static void _bt_setpagelock(Relation rel, BlockNumber blkno, int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, P_HIKEY); - - if (access == BT_WRITE) - RelationSetSingleWLockPage(rel, &iptr); - else - RelationSetSingleRLockPage(rel, &iptr); - } + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, P_HIKEY); + + if (access == BT_WRITE) + RelationSetSingleWLockPage(rel, &iptr); + else + RelationSetSingleRLockPage(rel, &iptr); + } } static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, P_HIKEY); - - if (access == BT_WRITE) - RelationUnsetSingleWLockPage(rel, &iptr); - else - RelationUnsetSingleRLockPage(rel, &iptr); - } + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, P_HIKEY); + + if (access == BT_WRITE) + RelationUnsetSingleWLockPage(rel, &iptr); + else + RelationUnsetSingleRLockPage(rel, &iptr); + } } void _bt_pagedel(Relation rel, ItemPointer tid) { - Buffer buf; - Page page; - BlockNumber blkno; - OffsetNumber offno; - - blkno = ItemPointerGetBlockNumber(tid); - offno = ItemPointerGetOffsetNumber(tid); - - buf = _bt_getbuf(rel, blkno, BT_WRITE); - page = BufferGetPage(buf); - - PageIndexTupleDelete(page, offno); - - /* write the buffer and release the lock */ - _bt_wrtbuf(rel, buf); + Buffer buf; + Page page; + BlockNumber blkno; + OffsetNumber offno; + + blkno = ItemPointerGetBlockNumber(tid); + offno = ItemPointerGetOffsetNumber(tid); + + buf = _bt_getbuf(rel, blkno, BT_WRITE); + page = BufferGetPage(buf); + + PageIndexTupleDelete(page, offno); + + /* write the buffer and release the lock */ + _bt_wrtbuf(rel, buf); } |