diff options
Diffstat (limited to 'src/include/access/tableam.h')
-rw-r--r-- | src/include/access/tableam.h | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 50b8ab93539..c7a26d82274 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -27,6 +27,85 @@ extern char *default_table_access_method; extern bool synchronize_seqscans; +struct BulkInsertStateData; + + +/* + * Result codes for table_{update,delete,lock}_tuple, and for visibility + * routines inside table AMs. + */ +typedef enum TM_Result +{ + /* + * Signals that the action succeeded (i.e. update/delete performed, lock + * was acquired) + */ + TM_Ok, + + /* The affected tuple wasn't visible to the relevant snapshot */ + TM_Invisible, + + /* The affected tuple was already modified by the calling backend */ + TM_SelfModified, + + /* + * The affected tuple was updated by another transaction. This includes + * the case where tuple was moved to another partition. + */ + TM_Updated, + + /* The affected tuple was deleted by another transaction */ + TM_Deleted, + + /* + * The affected tuple is currently being modified by another session. This + * will only be returned if (update/delete/lock)_tuple are instructed not + * to wait. + */ + TM_BeingModified, + + /* lock couldn't be acquired, action skipped. Only used by lock_tuple */ + TM_WouldBlock +} TM_Result; + + +/* + * When table_update, table_delete, or table_lock_tuple fail because the target + * tuple is already outdated, they fill in this struct to provide information + * to the caller about what happened. + * ctid is the target's ctid link: it is the same as the target's TID if the + * target was deleted, or the location of the replacement tuple if the target + * was updated. + * xmax is the outdating transaction's XID. If the caller wants to visit the + * replacement tuple, it must check that this matches before believing the + * replacement is really a match. + * cmax is the outdating command's CID, but only when the failure code is + * TM_SelfModified (i.e., something in the current transaction outdated the + * tuple); otherwise cmax is zero. (We make this restriction because + * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other + * transactions.) + */ +typedef struct TM_FailureData +{ + ItemPointerData ctid; + TransactionId xmax; + CommandId cmax; + bool traversed; +} TM_FailureData; + +/* "options" flag bits for table_insert */ +#define TABLE_INSERT_SKIP_WAL 0x0001 +#define TABLE_INSERT_SKIP_FSM 0x0002 +#define TABLE_INSERT_FROZEN 0x0004 +#define TABLE_INSERT_NO_LOGICAL 0x0008 + +/* flag bits fortable_lock_tuple */ +/* Follow tuples whose update is in progress if lock modes don't conflict */ +#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0) +/* Follow update chain and lock lastest version of tuple */ +#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1) + + /* * API struct for a table AM. Note this must be allocated in a * server-lifetime manner, typically as a static const struct, which then gets @@ -200,6 +279,62 @@ typedef struct TableAmRoutine TupleTableSlot *slot, Snapshot snapshot); + /* ------------------------------------------------------------------------ + * Manipulations of physical tuples. + * ------------------------------------------------------------------------ + */ + + /* see table_insert() for reference about parameters */ + void (*tuple_insert) (Relation rel, TupleTableSlot *slot, CommandId cid, + int options, struct BulkInsertStateData *bistate); + + /* see table_insert() for reference about parameters */ + void (*tuple_insert_speculative) (Relation rel, + TupleTableSlot *slot, + CommandId cid, + int options, + struct BulkInsertStateData *bistate, + uint32 specToken); + + /* see table_insert() for reference about parameters */ + void (*tuple_complete_speculative) (Relation rel, + TupleTableSlot *slot, + uint32 specToken, + bool succeeded); + + /* see table_insert() for reference about parameters */ + TM_Result (*tuple_delete) (Relation rel, + ItemPointer tid, + CommandId cid, + Snapshot snapshot, + Snapshot crosscheck, + bool wait, + TM_FailureData *tmfd, + bool changingPart); + + /* see table_insert() for reference about parameters */ + TM_Result (*tuple_update) (Relation rel, + ItemPointer otid, + TupleTableSlot *slot, + CommandId cid, + Snapshot snapshot, + Snapshot crosscheck, + bool wait, + TM_FailureData *tmfd, + LockTupleMode *lockmode, + bool *update_indexes); + + /* see table_insert() for reference about parameters */ + TM_Result (*tuple_lock) (Relation rel, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + CommandId cid, + LockTupleMode mode, + LockWaitPolicy wait_policy, + uint8 flags, + TM_FailureData *tmfd); + } TableAmRoutine; @@ -488,6 +623,230 @@ table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snap /* ---------------------------------------------------------------------------- + * Functions for manipulations of physical tuples. + * ---------------------------------------------------------------------------- + */ + +/* + * Insert a tuple from a slot into table AM routine. + * + * The options bitmask allows to specify options that allow to change the + * behaviour of the AM. Several options might be ignored by AMs not supporting + * them. + * + * If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple will not + * necessarily logged to WAL, even for a non-temp relation. It is the AMs + * choice whether this optimization is supported. + * + * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse + * free space in the relation. This can save some cycles when we know the + * relation is new and doesn't contain useful amounts of free space. It's + * commonly passed directly to RelationGetBufferForTuple, see for more info. + * + * TABLE_INSERT_FROZEN should only be specified for inserts into + * relfilenodes created during the current subtransaction and when + * there are no prior snapshots or pre-existing portals open. + * This causes rows to be frozen, which is an MVCC violation and + * requires explicit options chosen by user. + * + * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding + * information for the tuple. This should solely be used during table rewrites + * where RelationIsLogicallyLogged(relation) is not yet accurate for the new + * relation. + * + * Note that most of these options will be applied when inserting into the + * heap's TOAST table, too, if the tuple requires any out-of-line data + * + * + * The BulkInsertState object (if any; bistate can be NULL for default + * behavior) is also just passed through to RelationGetBufferForTuple. + * + * On return the slot's tts_tid and tts_tableOid are updated to reflect the + * insertion. But note that any toasting of fields within the slot is NOT + * reflected in the slots contents. + */ +static inline void +table_insert(Relation rel, TupleTableSlot *slot, CommandId cid, + int options, struct BulkInsertStateData *bistate) +{ + rel->rd_tableam->tuple_insert(rel, slot, cid, options, + bistate); +} + +/* + * Perform a "speculative insertion". These can be backed out afterwards + * without aborting the whole transaction. Other sessions can wait for the + * speculative insertion to be confirmed, turning it into a regular tuple, or + * aborted, as if it never existed. Speculatively inserted tuples behave as + * "value locks" of short duration, used to implement INSERT .. ON CONFLICT. + * + * A transaction having performed a speculative insertion has to either abort, + * or finish the speculative insertion with + * table_complete_speculative(succeeded = ...). + */ +static inline void +table_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid, + int options, struct BulkInsertStateData *bistate, uint32 specToken) +{ + rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options, + bistate, specToken); +} + +/* + * Complete "speculative insertion" started in the same transaction. If + * succeeded is true, the tuple is fully inserted, if false, it's removed. + */ +static inline void +table_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken, + bool succeeded) +{ + return rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken, + succeeded); +} + +/* + * Delete a tuple. + * + * NB: do not call this directly unless prepared to deal with + * concurrent-update conditions. Use simple_table_delete instead. + * + * Input parameters: + * relation - table to be modified (caller must hold suitable lock) + * tid - TID of tuple to be deleted + * cid - delete command ID (used for visibility test, and stored into + * cmax if successful) + * crosscheck - if not InvalidSnapshot, also check tuple against this + * wait - true if should wait for any conflicting update to commit/abort + * Output parameters: + * tmfd - filled in failure cases (see below) + * changingPart - true iff the tuple is being moved to another partition + * table due to an update of the partition key. Otherwise, false. + * + * Normal, successful return value is TM_Ok, which + * actually means we did delete it. Failure return codes are + * TM_SelfModified, TM_Updated, or TM_BeingModified + * (the last only possible if wait == false). + * + * In the failure cases, the routine fills *tmfd with the tuple's t_ctid, + * t_xmax, and, if possible, and, if possible, t_cmax. See comments for + * struct TM_FailureData for additional info. + */ +static inline TM_Result +table_delete(Relation rel, ItemPointer tid, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, bool changingPart) +{ + return rel->rd_tableam->tuple_delete(rel, tid, cid, + snapshot, crosscheck, + wait, tmfd, changingPart); +} + +/* + * Update a tuple. + * + * NB: do not call this directly unless you are prepared to deal with + * concurrent-update conditions. Use simple_table_update instead. + * + * Input parameters: + * relation - table to be modified (caller must hold suitable lock) + * otid - TID of old tuple to be replaced + * newtup - newly constructed tuple data to store + * cid - update command ID (used for visibility test, and stored into + * cmax/cmin if successful) + * crosscheck - if not InvalidSnapshot, also check old tuple against this + * wait - true if should wait for any conflicting update to commit/abort + * Output parameters: + * tmfd - filled in failure cases (see below) + * lockmode - filled with lock mode acquired on tuple + * update_indexes - in success cases this is set to true if new index entries + * are required for this tuple + * + * Normal, successful return value is TM_Ok, which + * actually means we *did* update it. Failure return codes are + * TM_SelfModified, TM_Updated, or TM_BeingModified + * (the last only possible if wait == false). + * + * On success, the header fields of *newtup are updated to match the new + * stored tuple; in particular, newtup->t_self is set to the TID where the + * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT + * update was done. However, any TOAST changes in the new tuple's + * data are not reflected into *newtup. + * + * In the failure cases, the routine fills *tmfd with the tuple's t_ctid, + * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData + * for additional info. + */ +static inline TM_Result +table_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, + CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, LockTupleMode *lockmode, + bool *update_indexes) +{ + return rel->rd_tableam->tuple_update(rel, otid, slot, + cid, snapshot, crosscheck, + wait, tmfd, + lockmode, update_indexes); +} + +/* + * Lock a tuple in the specified mode. + * + * Input parameters: + * relation: relation containing tuple (caller must hold suitable lock) + * tid: TID of tuple to lock + * snapshot: snapshot to use for visibility determinations + * cid: current command ID (used for visibility test, and stored into + * tuple's cmax if lock is successful) + * mode: lock mode desired + * wait_policy: what to do if tuple lock is not available + * flags: + * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to + * also lock descendant tuples if lock modes don't conflict. + * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, update chain and lock lastest + * version. + * + * Output parameters: + * *slot: contains the target tuple + * *tmfd: filled in failure cases (see below) + * + * Function result may be: + * TM_Ok: lock was successfully acquired + * TM_Invisible: lock failed because tuple was never visible to us + * TM_SelfModified: lock failed because tuple updated by self + * TM_Updated: lock failed because tuple updated by other xact + * TM_Deleted: lock failed because tuple deleted by other xact + * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip + * + * In the failure cases other than TM_Invisible, the routine fills *tmfd with + * the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See comments for + * struct TM_FailureData for additional info. + */ +static inline TM_Result +table_lock_tuple(Relation rel, ItemPointer tid, Snapshot snapshot, + TupleTableSlot *slot, CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + TM_FailureData *tmfd) +{ + return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot, + cid, mode, wait_policy, + flags, tmfd); +} + + +/* ---------------------------------------------------------------------------- + * Functions to make modifications a bit simpler. + * ---------------------------------------------------------------------------- + */ + +extern void simple_table_insert(Relation rel, TupleTableSlot *slot); +extern void simple_table_delete(Relation rel, ItemPointer tid, + Snapshot snapshot); +extern void simple_table_update(Relation rel, ItemPointer otid, + TupleTableSlot *slot, Snapshot snapshot, + bool *update_indexes); + + +/* ---------------------------------------------------------------------------- * Helper functions to implement parallel scans for block oriented AMs. * ---------------------------------------------------------------------------- */ |