diff options
Diffstat (limited to 'src/backend/utils/activity/pgstat_relation.c')
-rw-r--r-- | src/backend/utils/activity/pgstat_relation.c | 935 |
1 files changed, 935 insertions, 0 deletions
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c new file mode 100644 index 00000000000..f5c03b54b48 --- /dev/null +++ b/src/backend/utils/activity/pgstat_relation.c @@ -0,0 +1,935 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_relation.c + * Implementation of relation statistics. + * + * This file contains the implementation of function relation. It is kept + * separate from pgstat.c to enforce the line between the statistics access / + * storage implementation and the details about individual types of + * statistics. + * + * Copyright (c) 2001-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_relation.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/twophase_rmgr.h" +#include "access/xact.h" +#include "postmaster/autovacuum.h" +#include "utils/memutils.h" +#include "utils/pgstat_internal.h" +#include "utils/rel.h" +#include "utils/timestamp.h" + + +/* + * Structures in which backends store per-table info that's waiting to be + * sent to the collector. + * + * NOTE: once allocated, TabStatusArray structures are never moved or deleted + * for the life of the backend. Also, we zero out the t_id fields of the + * contained PgStat_TableStatus structs whenever they are not actively in use. + * This allows relcache pgstat_info pointers to be treated as long-lived data, + * avoiding repeated searches in pgstat_relation_init() when a relation is + * repeatedly opened during a transaction. + */ +#define TABSTAT_QUANTUM 100 /* we alloc this many at a time */ + + +typedef struct TabStatusArray +{ + struct TabStatusArray *tsa_next; /* link to next array, if any */ + int tsa_used; /* # entries currently used */ + PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM]; /* per-table data */ +} TabStatusArray; + +static TabStatusArray *pgStatTabList = NULL; + +/* + * pgStatTabHash entry: map from relation OID to PgStat_TableStatus pointer + */ +typedef struct TabStatHashEntry +{ + Oid t_id; + PgStat_TableStatus *tsa_entry; +} TabStatHashEntry; + +/* Record that's written to 2PC state file when pgstat state is persisted */ +typedef struct TwoPhasePgStatRecord +{ + PgStat_Counter tuples_inserted; /* tuples inserted in xact */ + PgStat_Counter tuples_updated; /* tuples updated in xact */ + PgStat_Counter tuples_deleted; /* tuples deleted in xact */ + /* tuples i/u/d prior to truncate/drop */ + PgStat_Counter inserted_pre_truncdrop; + PgStat_Counter updated_pre_truncdrop; + PgStat_Counter deleted_pre_truncdrop; + Oid t_id; /* table's OID */ + bool t_shared; /* is it a shared catalog? */ + bool t_truncdropped; /* was the relation truncated/dropped? */ +} TwoPhasePgStatRecord; + + +static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); +static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now); +static void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level); +static void ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info); +static void pgstat_truncdrop_save_counters(PgStat_TableXactStatus *trans, bool is_drop); +static void pgstat_truncdrop_restore_counters(PgStat_TableXactStatus *trans); + + +/* + * Indicates if backend has some relation stats that it hasn't yet + * sent to the collector. + */ +bool have_relation_stats; + + +/* + * Hash table for O(1) t_id -> tsa_entry lookup + */ +static HTAB *pgStatTabHash = NULL; + + +/* ---------- + * pgstat_relation_init() - + * + * Initialize a relcache entry to count access statistics. + * Called whenever a relation is opened. + * + * We assume that a relcache entry's pgstat_info field is zeroed by + * relcache.c when the relcache entry is made; thereafter it is long-lived + * data. We can avoid repeated searches of the TabStatus arrays when the + * same relation is touched repeatedly within a transaction. + * ---------- + */ +void +pgstat_relation_init(Relation rel) +{ + Oid rel_id = rel->rd_id; + char relkind = rel->rd_rel->relkind; + + /* + * We only count stats for relations with storage and partitioned tables + */ + if (!RELKIND_HAS_STORAGE(relkind) && relkind != RELKIND_PARTITIONED_TABLE) + { + rel->pgstat_info = NULL; + return; + } + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + { + /* We're not counting at all */ + rel->pgstat_info = NULL; + return; + } + + /* + * If we already set up this relation in the current transaction, nothing + * to do. + */ + if (rel->pgstat_info != NULL && + rel->pgstat_info->t_id == rel_id) + return; + + /* Else find or make the PgStat_TableStatus entry, and update link */ + rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared); +} + +/* ---------- + * pgstat_drop_relation() - + * + * Tell the collector that we just dropped a relation. + * (If the message gets lost, we will still clean the dead entry eventually + * via future invocations of pgstat_vacuum_stat().) + * + * Currently not used for lack of any good place to call it; we rely + * entirely on pgstat_vacuum_stat() to clean out stats for dead rels. + * ---------- + */ +#ifdef NOT_USED +void +pgstat_drop_relation(Oid relid) +{ + PgStat_MsgTabpurge msg; + int len; + + if (pgStatSock == PGINVALID_SOCKET) + return; + + msg.m_tableid[0] = relid; + msg.m_nentries = 1; + + len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) + sizeof(Oid); + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE); + msg.m_databaseid = MyDatabaseId; + pgstat_send(&msg, len); +} +#endif /* NOT_USED */ + +/* ---------- + * pgstat_report_autovac() - + * + * Called from autovacuum.c to report startup of an autovacuum process. + * We are called before InitPostgres is done, so can't rely on MyDatabaseId; + * the db OID must be passed in, instead. + * ---------- + */ +void +pgstat_report_autovac(Oid dboid) +{ + PgStat_MsgAutovacStart msg; + + if (pgStatSock == PGINVALID_SOCKET) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START); + msg.m_databaseid = dboid; + msg.m_start_time = GetCurrentTimestamp(); + + pgstat_send(&msg, sizeof(msg)); +} + +/* --------- + * pgstat_report_vacuum() - + * + * Tell the collector about the table we just vacuumed. + * --------- + */ +void +pgstat_report_vacuum(Oid tableoid, bool shared, + PgStat_Counter livetuples, PgStat_Counter deadtuples) +{ + PgStat_MsgVacuum msg; + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM); + msg.m_databaseid = shared ? InvalidOid : MyDatabaseId; + msg.m_tableoid = tableoid; + msg.m_autovacuum = IsAutoVacuumWorkerProcess(); + msg.m_vacuumtime = GetCurrentTimestamp(); + msg.m_live_tuples = livetuples; + msg.m_dead_tuples = deadtuples; + pgstat_send(&msg, sizeof(msg)); +} + +/* -------- + * pgstat_report_analyze() - + * + * Tell the collector about the table we just analyzed. + * + * Caller must provide new live- and dead-tuples estimates, as well as a + * flag indicating whether to reset the changes_since_analyze counter. + * -------- + */ +void +pgstat_report_analyze(Relation rel, + PgStat_Counter livetuples, PgStat_Counter deadtuples, + bool resetcounter) +{ + PgStat_MsgAnalyze msg; + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + /* + * Unlike VACUUM, ANALYZE might be running inside a transaction that has + * already inserted and/or deleted rows in the target table. ANALYZE will + * have counted such rows as live or dead respectively. Because we will + * report our counts of such rows at transaction end, we should subtract + * off these counts from what we send to the collector now, else they'll + * be double-counted after commit. (This approach also ensures that the + * collector ends up with the right numbers if we abort instead of + * committing.) + * + * Waste no time on partitioned tables, though. + */ + if (pgstat_relation_should_count(rel) && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + { + PgStat_TableXactStatus *trans; + + for (trans = rel->pgstat_info->trans; trans; trans = trans->upper) + { + livetuples -= trans->tuples_inserted - trans->tuples_deleted; + deadtuples -= trans->tuples_updated + trans->tuples_deleted; + } + /* count stuff inserted by already-aborted subxacts, too */ + deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples; + /* Since ANALYZE's counts are estimates, we could have underflowed */ + livetuples = Max(livetuples, 0); + deadtuples = Max(deadtuples, 0); + } + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE); + msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId; + msg.m_tableoid = RelationGetRelid(rel); + msg.m_autovacuum = IsAutoVacuumWorkerProcess(); + msg.m_resetcounter = resetcounter; + msg.m_analyzetime = GetCurrentTimestamp(); + msg.m_live_tuples = livetuples; + msg.m_dead_tuples = deadtuples; + pgstat_send(&msg, sizeof(msg)); +} + +/* + * pgstat_count_heap_insert - count a tuple insertion of n tuples + */ +void +pgstat_count_heap_insert(Relation rel, PgStat_Counter n) +{ + if (pgstat_relation_should_count(rel)) + { + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + ensure_tabstat_xact_level(pgstat_info); + pgstat_info->trans->tuples_inserted += n; + } +} + +/* + * pgstat_count_heap_update - count a tuple update + */ +void +pgstat_count_heap_update(Relation rel, bool hot) +{ + if (pgstat_relation_should_count(rel)) + { + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + ensure_tabstat_xact_level(pgstat_info); + pgstat_info->trans->tuples_updated++; + + /* t_tuples_hot_updated is nontransactional, so just advance it */ + if (hot) + pgstat_info->t_counts.t_tuples_hot_updated++; + } +} + +/* + * pgstat_count_heap_delete - count a tuple deletion + */ +void +pgstat_count_heap_delete(Relation rel) +{ + if (pgstat_relation_should_count(rel)) + { + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + ensure_tabstat_xact_level(pgstat_info); + pgstat_info->trans->tuples_deleted++; + } +} + +/* + * pgstat_count_truncate - update tuple counters due to truncate + */ +void +pgstat_count_truncate(Relation rel) +{ + if (pgstat_relation_should_count(rel)) + { + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + ensure_tabstat_xact_level(pgstat_info); + pgstat_truncdrop_save_counters(pgstat_info->trans, false); + pgstat_info->trans->tuples_inserted = 0; + pgstat_info->trans->tuples_updated = 0; + pgstat_info->trans->tuples_deleted = 0; + } +} + +/* + * pgstat_update_heap_dead_tuples - update dead-tuples count + * + * The semantics of this are that we are reporting the nontransactional + * recovery of "delta" dead tuples; so t_delta_dead_tuples decreases + * rather than increasing, and the change goes straight into the per-table + * counter, not into transactional state. + */ +void +pgstat_update_heap_dead_tuples(Relation rel, int delta) +{ + if (pgstat_relation_should_count(rel)) + { + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + pgstat_info->t_counts.t_delta_dead_tuples -= delta; + } +} + +/* + * find_tabstat_entry - find any existing PgStat_TableStatus entry for rel + * + * If no entry, return NULL, don't create a new one + * + * Note: if we got an error in the most recent execution of pgstat_report_stat, + * it's possible that an entry exists but there's no hashtable entry for it. + * That's okay, we'll treat this case as "doesn't exist". + */ +PgStat_TableStatus * +find_tabstat_entry(Oid rel_id) +{ + TabStatHashEntry *hash_entry; + + /* If hashtable doesn't exist, there are no entries at all */ + if (!pgStatTabHash) + return NULL; + + hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL); + if (!hash_entry) + return NULL; + + /* Note that this step could also return NULL, but that's correct */ + return hash_entry->tsa_entry; +} + +/* + * Perform relation stats specific end-of-transaction work. Helper for + * AtEOXact_PgStat. + * + * Transfer transactional insert/update counts into the base tabstat entries. + * We don't bother to free any of the transactional state, since it's all in + * TopTransactionContext and will go away anyway. + */ +void +AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit) +{ + PgStat_TableXactStatus *trans; + + for (trans = xact_state->first; trans != NULL; trans = trans->next) + { + PgStat_TableStatus *tabstat; + + Assert(trans->nest_level == 1); + Assert(trans->upper == NULL); + tabstat = trans->parent; + Assert(tabstat->trans == trans); + /* restore pre-truncate/drop stats (if any) in case of aborted xact */ + if (!isCommit) + pgstat_truncdrop_restore_counters(trans); + /* count attempted actions regardless of commit/abort */ + tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted; + tabstat->t_counts.t_tuples_updated += trans->tuples_updated; + tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted; + if (isCommit) + { + tabstat->t_counts.t_truncdropped = trans->truncdropped; + if (trans->truncdropped) + { + /* forget live/dead stats seen by backend thus far */ + tabstat->t_counts.t_delta_live_tuples = 0; + tabstat->t_counts.t_delta_dead_tuples = 0; + } + /* insert adds a live tuple, delete removes one */ + tabstat->t_counts.t_delta_live_tuples += + trans->tuples_inserted - trans->tuples_deleted; + /* update and delete each create a dead tuple */ + tabstat->t_counts.t_delta_dead_tuples += + trans->tuples_updated + trans->tuples_deleted; + /* insert, update, delete each count as one change event */ + tabstat->t_counts.t_changed_tuples += + trans->tuples_inserted + trans->tuples_updated + + trans->tuples_deleted; + } + else + { + /* inserted tuples are dead, deleted tuples are unaffected */ + tabstat->t_counts.t_delta_dead_tuples += + trans->tuples_inserted + trans->tuples_updated; + /* an aborted xact generates no changed_tuple events */ + } + tabstat->trans = NULL; + } +} + +/* + * Perform relation stats specific end-of-sub-transaction work. Helper for + * AtEOSubXact_PgStat. + * + * Transfer transactional insert/update counts into the next higher + * subtransaction state. + */ +void +AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth) +{ + PgStat_TableXactStatus *trans; + PgStat_TableXactStatus *next_trans; + + for (trans = xact_state->first; trans != NULL; trans = next_trans) + { + PgStat_TableStatus *tabstat; + + next_trans = trans->next; + Assert(trans->nest_level == nestDepth); + tabstat = trans->parent; + Assert(tabstat->trans == trans); + + if (isCommit) + { + if (trans->upper && trans->upper->nest_level == nestDepth - 1) + { + if (trans->truncdropped) + { + /* propagate the truncate/drop status one level up */ + pgstat_truncdrop_save_counters(trans->upper, false); + /* replace upper xact stats with ours */ + trans->upper->tuples_inserted = trans->tuples_inserted; + trans->upper->tuples_updated = trans->tuples_updated; + trans->upper->tuples_deleted = trans->tuples_deleted; + } + else + { + trans->upper->tuples_inserted += trans->tuples_inserted; + trans->upper->tuples_updated += trans->tuples_updated; + trans->upper->tuples_deleted += trans->tuples_deleted; + } + tabstat->trans = trans->upper; + pfree(trans); + } + else + { + /* + * When there isn't an immediate parent state, we can just + * reuse the record instead of going through a palloc/pfree + * pushup (this works since it's all in TopTransactionContext + * anyway). We have to re-link it into the parent level, + * though, and that might mean pushing a new entry into the + * pgStatXactStack. + */ + PgStat_SubXactStatus *upper_xact_state; + + upper_xact_state = pgstat_xact_stack_level_get(nestDepth - 1); + trans->next = upper_xact_state->first; + upper_xact_state->first = trans; + trans->nest_level = nestDepth - 1; + } + } + else + { + /* + * On abort, update top-level tabstat counts, then forget the + * subtransaction + */ + + /* first restore values obliterated by truncate/drop */ + pgstat_truncdrop_restore_counters(trans); + /* count attempted actions regardless of commit/abort */ + tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted; + tabstat->t_counts.t_tuples_updated += trans->tuples_updated; + tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted; + /* inserted tuples are dead, deleted tuples are unaffected */ + tabstat->t_counts.t_delta_dead_tuples += + trans->tuples_inserted + trans->tuples_updated; + tabstat->trans = trans->upper; + pfree(trans); + } + } +} + +/* + * Generate 2PC records for all the pending transaction-dependent relation + * stats. + */ +void +AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) +{ + PgStat_TableXactStatus *trans; + + for (trans = xact_state->first; trans != NULL; trans = trans->next) + { + PgStat_TableStatus *tabstat; + TwoPhasePgStatRecord record; + + Assert(trans->nest_level == 1); + Assert(trans->upper == NULL); + tabstat = trans->parent; + Assert(tabstat->trans == trans); + + record.tuples_inserted = trans->tuples_inserted; + record.tuples_updated = trans->tuples_updated; + record.tuples_deleted = trans->tuples_deleted; + record.inserted_pre_truncdrop = trans->inserted_pre_truncdrop; + record.updated_pre_truncdrop = trans->updated_pre_truncdrop; + record.deleted_pre_truncdrop = trans->deleted_pre_truncdrop; + record.t_id = tabstat->t_id; + record.t_shared = tabstat->t_shared; + record.t_truncdropped = trans->truncdropped; + + RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0, + &record, sizeof(TwoPhasePgStatRecord)); + } +} + +/* + * All we need do here is unlink the transaction stats state from the + * nontransactional state. The nontransactional action counts will be + * reported to the stats collector immediately, while the effects on + * live and dead tuple counts are preserved in the 2PC state file. + * + * Note: AtEOXact_PgStat_Relations is not called during PREPARE. + */ +void +PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) +{ + PgStat_TableXactStatus *trans; + + for (trans = xact_state->first; trans != NULL; trans = trans->next) + { + PgStat_TableStatus *tabstat; + + tabstat = trans->parent; + tabstat->trans = NULL; + } +} + +/* + * 2PC processing routine for COMMIT PREPARED case. + * + * Load the saved counts into our local pgstats state. + */ +void +pgstat_twophase_postcommit(TransactionId xid, uint16 info, + void *recdata, uint32 len) +{ + TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; + PgStat_TableStatus *pgstat_info; + + /* Find or create a tabstat entry for the rel */ + pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared); + + /* Same math as in AtEOXact_PgStat, commit case */ + pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted; + pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated; + pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted; + pgstat_info->t_counts.t_truncdropped = rec->t_truncdropped; + if (rec->t_truncdropped) + { + /* forget live/dead stats seen by backend thus far */ + pgstat_info->t_counts.t_delta_live_tuples = 0; + pgstat_info->t_counts.t_delta_dead_tuples = 0; + } + pgstat_info->t_counts.t_delta_live_tuples += + rec->tuples_inserted - rec->tuples_deleted; + pgstat_info->t_counts.t_delta_dead_tuples += + rec->tuples_updated + rec->tuples_deleted; + pgstat_info->t_counts.t_changed_tuples += + rec->tuples_inserted + rec->tuples_updated + + rec->tuples_deleted; +} + +/* + * 2PC processing routine for ROLLBACK PREPARED case. + * + * Load the saved counts into our local pgstats state, but treat them + * as aborted. + */ +void +pgstat_twophase_postabort(TransactionId xid, uint16 info, + void *recdata, uint32 len) +{ + TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; + PgStat_TableStatus *pgstat_info; + + /* Find or create a tabstat entry for the rel */ + pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared); + + /* Same math as in AtEOXact_PgStat, abort case */ + if (rec->t_truncdropped) + { + rec->tuples_inserted = rec->inserted_pre_truncdrop; + rec->tuples_updated = rec->updated_pre_truncdrop; + rec->tuples_deleted = rec->deleted_pre_truncdrop; + } + pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted; + pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated; + pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted; + pgstat_info->t_counts.t_delta_dead_tuples += + rec->tuples_inserted + rec->tuples_updated; +} + +/* + * Subroutine for pgstat_report_stat: Send relation statistics + */ +void +pgstat_send_tabstats(TimestampTz now, bool disconnect) +{ + /* we assume this inits to all zeroes: */ + static const PgStat_TableCounts all_zeroes; + PgStat_MsgTabstat regular_msg; + PgStat_MsgTabstat shared_msg; + TabStatusArray *tsa; + int i; + + /* + * Destroy pgStatTabHash before we start invalidating PgStat_TableEntry + * entries it points to. (Should we fail partway through the loop below, + * it's okay to have removed the hashtable already --- the only + * consequence is we'd get multiple entries for the same table in the + * pgStatTabList, and that's safe.) + */ + if (pgStatTabHash) + hash_destroy(pgStatTabHash); + pgStatTabHash = NULL; + + /* + * Scan through the TabStatusArray struct(s) to find tables that actually + * have counts, and build messages to send. We have to separate shared + * relations from regular ones because the databaseid field in the message + * header has to depend on that. + */ + regular_msg.m_databaseid = MyDatabaseId; + shared_msg.m_databaseid = InvalidOid; + regular_msg.m_nentries = 0; + shared_msg.m_nentries = 0; + + for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next) + { + for (i = 0; i < tsa->tsa_used; i++) + { + PgStat_TableStatus *entry = &tsa->tsa_entries[i]; + PgStat_MsgTabstat *this_msg; + PgStat_TableEntry *this_ent; + + /* Shouldn't have any pending transaction-dependent counts */ + Assert(entry->trans == NULL); + + /* + * Ignore entries that didn't accumulate any actual counts, such + * as indexes that were opened by the planner but not used. + */ + if (memcmp(&entry->t_counts, &all_zeroes, + sizeof(PgStat_TableCounts)) == 0) + continue; + + /* + * OK, insert data into the appropriate message, and send if full. + */ + this_msg = entry->t_shared ? &shared_msg : ®ular_msg; + this_ent = &this_msg->m_entry[this_msg->m_nentries]; + this_ent->t_id = entry->t_id; + memcpy(&this_ent->t_counts, &entry->t_counts, + sizeof(PgStat_TableCounts)); + if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES) + { + pgstat_send_tabstat(this_msg, now); + this_msg->m_nentries = 0; + } + } + /* zero out PgStat_TableStatus structs after use */ + MemSet(tsa->tsa_entries, 0, + tsa->tsa_used * sizeof(PgStat_TableStatus)); + tsa->tsa_used = 0; + } + + /* + * Send partial messages. Make sure that any pending xact commit/abort + * and connection stats get counted, even if there are no table stats to + * send. + */ + if (regular_msg.m_nentries > 0 || + pgStatXactCommit > 0 || pgStatXactRollback > 0 || disconnect) + pgstat_send_tabstat(®ular_msg, now); + if (shared_msg.m_nentries > 0) + pgstat_send_tabstat(&shared_msg, now); + + have_relation_stats = false; +} + +/* + * Subroutine for pgstat_send_tabstats: finish and send one tabstat message + */ +static void +pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now) +{ + int n; + int len; + + /* It's unlikely we'd get here with no socket, but maybe not impossible */ + if (pgStatSock == PGINVALID_SOCKET) + return; + + /* + * Report and reset accumulated xact commit/rollback and I/O timings + * whenever we send a normal tabstat message + */ + pgstat_update_dbstats(tsmsg, now); + + n = tsmsg->m_nentries; + len = offsetof(PgStat_MsgTabstat, m_entry[0]) + + n * sizeof(PgStat_TableEntry); + + pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT); + pgstat_send(tsmsg, len); +} + +/* + * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel + */ +static PgStat_TableStatus * +get_tabstat_entry(Oid rel_id, bool isshared) +{ + TabStatHashEntry *hash_entry; + PgStat_TableStatus *entry; + TabStatusArray *tsa; + bool found; + + pgstat_assert_is_up(); + + have_relation_stats = true; + + /* + * Create hash table if we don't have it already. + */ + if (pgStatTabHash == NULL) + { + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TabStatHashEntry); + + pgStatTabHash = hash_create("pgstat TabStatusArray lookup hash table", + TABSTAT_QUANTUM, + &ctl, + HASH_ELEM | HASH_BLOBS); + } + + /* + * Find an entry or create a new one. + */ + hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found); + if (!found) + { + /* initialize new entry with null pointer */ + hash_entry->tsa_entry = NULL; + } + + /* + * If entry is already valid, we're done. + */ + if (hash_entry->tsa_entry) + return hash_entry->tsa_entry; + + /* + * Locate the first pgStatTabList entry with free space, making a new list + * entry if needed. Note that we could get an OOM failure here, but if so + * we have left the hashtable and the list in a consistent state. + */ + if (pgStatTabList == NULL) + { + /* Set up first pgStatTabList entry */ + pgStatTabList = (TabStatusArray *) + MemoryContextAllocZero(TopMemoryContext, + sizeof(TabStatusArray)); + } + + tsa = pgStatTabList; + while (tsa->tsa_used >= TABSTAT_QUANTUM) + { + if (tsa->tsa_next == NULL) + tsa->tsa_next = (TabStatusArray *) + MemoryContextAllocZero(TopMemoryContext, + sizeof(TabStatusArray)); + tsa = tsa->tsa_next; + } + + /* + * Allocate a PgStat_TableStatus entry within this list entry. We assume + * the entry was already zeroed, either at creation or after last use. + */ + entry = &tsa->tsa_entries[tsa->tsa_used++]; + entry->t_id = rel_id; + entry->t_shared = isshared; + + /* + * Now we can fill the entry in pgStatTabHash. + */ + hash_entry->tsa_entry = entry; + + return entry; +} + +/* + * add_tabstat_xact_level - add a new (sub)transaction state record + */ +static void +add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level) +{ + PgStat_SubXactStatus *xact_state; + PgStat_TableXactStatus *trans; + + /* + * If this is the first rel to be modified at the current nest level, we + * first have to push a transaction stack entry. + */ + xact_state = pgstat_xact_stack_level_get(nest_level); + + /* Now make a per-table stack entry */ + trans = (PgStat_TableXactStatus *) + MemoryContextAllocZero(TopTransactionContext, + sizeof(PgStat_TableXactStatus)); + trans->nest_level = nest_level; + trans->upper = pgstat_info->trans; + trans->parent = pgstat_info; + trans->next = xact_state->first; + xact_state->first = trans; + pgstat_info->trans = trans; +} + +/* + * Add a new (sub)transaction record if needed. + */ +static void +ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info) +{ + int nest_level = GetCurrentTransactionNestLevel(); + + if (pgstat_info->trans == NULL || + pgstat_info->trans->nest_level != nest_level) + add_tabstat_xact_level(pgstat_info, nest_level); +} + +/* + * pgstat_truncdrop_save_counters + * + * Whenever a table is truncated/dropped, we save its i/u/d counters so that + * they can be cleared, and if the (sub)xact that executed the truncate/drop + * later aborts, the counters can be restored to the saved (pre-truncate/drop) + * values. + * + * Note that for truncate we do this on the first truncate in any particular + * subxact level only. + */ +static void +pgstat_truncdrop_save_counters(PgStat_TableXactStatus *trans, bool is_drop) +{ + if (!trans->truncdropped || is_drop) + { + trans->inserted_pre_truncdrop = trans->tuples_inserted; + trans->updated_pre_truncdrop = trans->tuples_updated; + trans->deleted_pre_truncdrop = trans->tuples_deleted; + trans->truncdropped = true; + } +} + +/* + * pgstat_truncdrop_restore_counters - restore counters when a truncate aborts + */ +static void +pgstat_truncdrop_restore_counters(PgStat_TableXactStatus *trans) +{ + if (trans->truncdropped) + { + trans->tuples_inserted = trans->inserted_pre_truncdrop; + trans->tuples_updated = trans->updated_pre_truncdrop; + trans->tuples_deleted = trans->deleted_pre_truncdrop; + } +} |