Documentation
¶
Index ¶
- type ColumnFamilySnapshot
- type CoreAPI
- type DB
- func (db *DB) Close() error
- func (db *DB) Del(key []byte) error
- func (db *DB) DelCF(cf kv.ColumnFamily, key []byte) error
- func (db *DB) DeleteVersionedEntry(cf kv.ColumnFamily, key []byte, version uint64) error
- func (db *DB) Get(key []byte) (*kv.Entry, error)
- func (db *DB) GetCF(cf kv.ColumnFamily, key []byte) (*kv.Entry, error)
- func (db *DB) GetVersionedEntry(cf kv.ColumnFamily, key []byte, version uint64) (*kv.Entry, error)
- func (db *DB) Info() *Stats
- func (db *DB) IsClosed() bool
- func (db *DB) Manifest() *manifest.Manager
- func (db *DB) NewIterator(opt *utils.Options) utils.Iterator
- func (db *DB) NewTransaction(update bool) *Txn
- func (db *DB) RunValueLogGC(discardRatio float64) error
- func (db *DB) Set(data *kv.Entry) error
- func (db *DB) SetCF(cf kv.ColumnFamily, key, value []byte) error
- func (db *DB) SetRegionMetrics(rm *storepkg.RegionMetrics)
- func (db *DB) SetVersionedEntry(cf kv.ColumnFamily, key []byte, version uint64, value []byte, meta byte) error
- func (db *DB) Update(fn func(txn *Txn) error) error
- func (db *DB) View(fn func(txn *Txn) error) error
- func (db *DB) WAL() *wal.Manager
- type DBIterator
- type HotKeyStat
- type Item
- type IteratorOptions
- type LSMLevelStats
- type Options
- type Stats
- type StatsSnapshot
- type Txn
- func (txn *Txn) Commit() error
- func (txn *Txn) CommitWith(cb func(error))
- func (txn *Txn) Delete(key []byte) error
- func (txn *Txn) Discard()
- func (txn *Txn) Get(key []byte) (item *Item, rerr error)
- func (txn *Txn) NewIterator(opt IteratorOptions) *TxnIterator
- func (txn *Txn) NewKeyIterator(key []byte, opt IteratorOptions) *TxnIterator
- func (txn *Txn) ReadTs() uint64
- func (txn *Txn) Set(key, val []byte) error
- func (txn *Txn) SetEntry(e *kv.Entry) error
- type TxnIterator
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ColumnFamilySnapshot ¶
type CoreAPI ¶
type CoreAPI interface {
Set(data *kv.Entry) error
Get(key []byte) (*kv.Entry, error)
Del(key []byte) error
SetCF(cf kv.ColumnFamily, key, value []byte) error
GetCF(cf kv.ColumnFamily, key []byte) (*kv.Entry, error)
DelCF(cf kv.ColumnFamily, key []byte) error
NewIterator(opt *utils.Options) utils.Iterator
Info() *Stats
Close() error
}
NoKV对外提供的功能集合
type DB ¶
DB 对外暴露的接口对象 全局唯一,持有各种资源句柄
func (*DB) DelCF ¶
func (db *DB) DelCF(cf kv.ColumnFamily, key []byte) error
DelCF deletes a key from the specified column family.
func (*DB) DeleteVersionedEntry ¶ added in v0.2.0
DeleteVersionedEntry marks the specified version as deleted by writing a tombstone record.
func (*DB) GetVersionedEntry ¶ added in v0.2.0
GetVersionedEntry retrieves the value stored at the provided MVCC version. The caller is responsible for releasing the returned entry via DecrRef.
func (*DB) NewTransaction ¶
func (*DB) RunValueLogGC ¶
RunValueLogGC triggers a value log garbage collection.
func (*DB) SetCF ¶
func (db *DB) SetCF(cf kv.ColumnFamily, key, value []byte) error
SetCF writes a key/value pair into the specified column family.
func (*DB) SetRegionMetrics ¶
func (db *DB) SetRegionMetrics(rm *storepkg.RegionMetrics)
SetRegionMetrics attaches region metrics recorder so Stats snapshot and expvar include region state counts.
func (*DB) SetVersionedEntry ¶ added in v0.2.0
func (db *DB) SetVersionedEntry(cf kv.ColumnFamily, key []byte, version uint64, value []byte, meta byte) error
SetVersionedEntry writes a value to the specified column family using the provided version. It mirrors SetCF but allows callers to control the MVCC timestamp embedded in the internal key.
func (*DB) Update ¶
Update executes a function, creating and managing a read-write transaction for the user. Error returned by the function is relayed by the Update method. Update cannot be used with managed transactions.
type DBIterator ¶
type DBIterator struct {
// contains filtered or unexported fields
}
func (*DBIterator) Close ¶
func (iter *DBIterator) Close() error
func (*DBIterator) Item ¶
func (iter *DBIterator) Item() utils.Item
func (*DBIterator) Next ¶
func (iter *DBIterator) Next()
func (*DBIterator) Rewind ¶
func (iter *DBIterator) Rewind()
func (*DBIterator) Seek ¶
func (iter *DBIterator) Seek(key []byte)
func (*DBIterator) Valid ¶
func (iter *DBIterator) Valid() bool
type HotKeyStat ¶
type IteratorOptions ¶
type IteratorOptions struct {
Reverse bool // Direction of iteration. False is forward, true is backward.
AllVersions bool // Fetch all valid versions of the same key.
InternalAccess bool // Used to allow internal access to keys.
KeyOnly bool // Avoid eager value materialisation.
Prefix []byte // Only iterate over this given prefix.
SinceTs uint64 // Only read data that has version > SinceTs.
// contains filtered or unexported fields
}
type LSMLevelStats ¶ added in v0.4.0
type LSMLevelStats struct {
Level int `json:"level"`
TableCount int `json:"tables"`
SizeBytes int64 `json:"size_bytes"`
ValueBytes int64 `json:"value_bytes"`
StaleBytes int64 `json:"stale_bytes"`
IngestTables int `json:"ingest_tables"`
IngestSizeBytes int64 `json:"ingest_size_bytes"`
IngestValueBytes int64 `json:"ingest_value_bytes"`
ValueDensity float64 `json:"value_density"`
IngestValueDensity float64 `json:"ingest_value_density"`
IngestRuns int64 `json:"ingest_runs"`
IngestMs float64 `json:"ingest_ms"`
IngestTablesCount int64 `json:"ingest_tables_compacted"`
MergeRuns int64 `json:"ingest_merge_runs"`
MergeMs float64 `json:"ingest_merge_ms"`
MergeTables int64 `json:"ingest_merge_tables"`
}
LSMLevelStats captures aggregated metrics per LSM level.
type Options ¶
type Options struct {
ValueThreshold int64
WorkDir string
MemTableSize int64
SSTableMaxSz int64
MaxBatchCount int64
MaxBatchSize int64 // max batch size in bytes
ValueLogFileSize int
ValueLogMaxEntries uint32
// ValueLogGCInterval specifies how frequently to trigger a check for value
// log garbage collection. Zero or negative values disable automatic GC.
ValueLogGCInterval time.Duration
// ValueLogGCDiscardRatio is the discard ratio for a value log file to be
// considered for garbage collection. It must be in the range (0.0, 1.0).
ValueLogGCDiscardRatio float64
// Value log GC sampling parameters. Ratios <= 0 fall back to defaults.
ValueLogGCSampleSizeRatio float64
ValueLogGCSampleCountRatio float64
ValueLogGCSampleFromHead bool
// ValueLogVerbose enables verbose logging across value-log operations.
ValueLogVerbose bool
WriteBatchMaxCount int
WriteBatchMaxSize int64
DetectConflicts bool
HotRingEnabled bool
HotRingBits uint8
HotRingTopK int
// HotRingDecayInterval controls how often HotRing halves its global counters.
// Zero disables periodic decay.
HotRingDecayInterval time.Duration
// HotRingDecayShift determines how aggressively counters decay (count >>= shift).
HotRingDecayShift uint32
// HotRingWindowSlots controls the number of sliding-window buckets tracked per key.
// Zero disables the sliding window.
HotRingWindowSlots int
// HotRingWindowSlotDuration sets the duration of each sliding-window bucket.
HotRingWindowSlotDuration time.Duration
SyncWrites bool
ManifestSync bool
// WriteHotKeyLimit caps how many consecutive writes a single key can issue
// before the DB returns utils.ErrHotKeyWriteThrottle. Zero disables write-path
// throttling.
WriteHotKeyLimit int32
// HotWriteBurstThreshold marks a key as “hot” for batching when its write
// frequency exceeds this count; zero disables hot write batching.
HotWriteBurstThreshold int32
// HotWriteBatchMultiplier scales write batch limits when a hot key is
// detected, allowing short-term coalescing of repeated writes.
HotWriteBatchMultiplier int
// WriteBatchWait adds an optional coalescing delay when the commit queue is
// momentarily empty, letting small bursts share one WAL fsync/apply pass.
// Zero disables the delay.
WriteBatchWait time.Duration
// CommitPipelineDepth controls the buffering between commit queue, value log
// writes, and LSM apply. Values <= 0 fall back to a small default.
CommitPipelineDepth int
// Block cache configuration for read path optimization. Cached blocks
// target L0/L1; colder data relies on the OS page cache.
BlockCacheSize int
BloomCacheSize int
// RaftLagWarnSegments determines how many WAL segments a follower can lag
// behind the active segment before stats surfaces a warning. Zero disables
// the alert.
RaftLagWarnSegments int64
// EnableWALWatchdog enables the background WAL backlog watchdog which
// surfaces typed-record warnings and optionally runs automated segment GC.
EnableWALWatchdog bool
// WALAutoGCInterval controls how frequently the watchdog evaluates WAL
// backlog for automated garbage collection.
WALAutoGCInterval time.Duration
// WALAutoGCMinRemovable is the minimum number of removable WAL segments
// required before an automated GC pass will run.
WALAutoGCMinRemovable int
// WALAutoGCMaxBatch bounds how many WAL segments are removed during a single
// automated GC pass.
WALAutoGCMaxBatch int
// WALTypedRecordWarnRatio triggers a typed-record warning when raft records
// constitute at least this fraction of WAL writes. Zero disables ratio-based
// warnings.
WALTypedRecordWarnRatio float64
// WALTypedRecordWarnSegments triggers a typed-record warning when the number
// of WAL segments containing raft records exceeds this threshold. Zero
// disables segment-count warnings.
WALTypedRecordWarnSegments int64
// DiscardStatsFlushThreshold controls how many discard-stat updates must be
// accumulated before they are flushed back into the LSM. Zero keeps the
// default threshold.
DiscardStatsFlushThreshold int
// NumCompactors controls how many background compaction workers are spawned.
// Zero uses an auto value derived from the host CPU count.
NumCompactors int
// NumLevelZeroTables controls when write throttling kicks in and feeds into
// the compaction priority calculation. Zero falls back to the legacy default.
NumLevelZeroTables int
// IngestCompactBatchSize decides how many L0 tables to promote into the
// ingest buffer per compaction cycle. Zero falls back to the legacy default.
IngestCompactBatchSize int
// IngestBacklogMergeScore triggers an ingest-merge task when the ingest
// backlog score exceeds this threshold. Zero keeps the default (2.0).
IngestBacklogMergeScore float64
// CompactionValueWeight adjusts how aggressively the scheduler prioritises
// levels whose entries reference large value log payloads. Higher values
// make the compaction picker favour levels with high ValuePtr density.
CompactionValueWeight float64
// CompactionValueAlertThreshold triggers stats alerts when a level's
// value-density (value bytes / total bytes) exceeds this ratio.
CompactionValueAlertThreshold float64
// IngestShardParallelism caps how many ingest shards can be compacted in a
// single ingest-only pass. A value <= 0 falls back to 1 (sequential).
IngestShardParallelism int
}
Options NoKV 总的配置文件
type Stats ¶
type Stats struct {
EntryNum int64 // Mirrors Entries for backwards compatibility.
// contains filtered or unexported fields
}
func (*Stats) SetRegionMetrics ¶
func (s *Stats) SetRegionMetrics(rm *storepkg.RegionMetrics)
SetRegionMetrics attaches region metrics recorder used in snapshots.
func (*Stats) Snapshot ¶
func (s *Stats) Snapshot() StatsSnapshot
Snapshot returns a point-in-time metrics snapshot without mutating state.
func (*Stats) StartStats ¶
func (s *Stats) StartStats()
StartStats runs periodic collection of internal backlog metrics.
type StatsSnapshot ¶
type StatsSnapshot struct {
Entries int64 `json:"entries"`
FlushPending int64 `json:"flush_pending"`
FlushQueueLength int64 `json:"flush_queue_length"`
FlushActive int64 `json:"flush_active"`
FlushWaitMs float64 `json:"flush_wait_ms"`
FlushLastWaitMs float64 `json:"flush_last_wait_ms"`
FlushMaxWaitMs float64 `json:"flush_max_wait_ms"`
FlushBuildMs float64 `json:"flush_build_ms"`
FlushLastBuildMs float64 `json:"flush_last_build_ms"`
FlushMaxBuildMs float64 `json:"flush_max_build_ms"`
FlushReleaseMs float64 `json:"flush_release_ms"`
FlushLastReleaseMs float64 `json:"flush_last_release_ms"`
FlushMaxReleaseMs float64 `json:"flush_max_release_ms"`
FlushCompleted int64 `json:"flush_completed"`
CompactionBacklog int64 `json:"compaction_backlog"`
CompactionMaxScore float64 `json:"compaction_max_score"`
CompactionLastDurationMs float64 `json:"compaction_last_duration_ms"`
CompactionMaxDurationMs float64 `json:"compaction_max_duration_ms"`
CompactionRuns uint64 `json:"compaction_runs"`
CompactionIngestRuns int64 `json:"compaction_ingest_runs"`
CompactionMergeRuns int64 `json:"compaction_ingest_merge_runs"`
CompactionIngestMs float64 `json:"compaction_ingest_ms"`
CompactionMergeMs float64 `json:"compaction_ingest_merge_ms"`
CompactionIngestTables int64 `json:"compaction_ingest_tables"`
CompactionMergeTables int64 `json:"compaction_ingest_merge_tables"`
CompactionValueWeight float64 `json:"compaction_value_weight"`
CompactionValueWeightSuggested float64 `json:"compaction_value_weight_suggested,omitempty"`
ValueLogSegments int `json:"vlog_segments"`
ValueLogPendingDel int `json:"vlog_pending_deletes"`
ValueLogDiscardQueue int `json:"vlog_discard_queue"`
ValueLogHead kv.ValuePtr `json:"vlog_head"`
WALActiveSegment int64 `json:"wal_active_segment"`
WALSegmentCount int64 `json:"wal_segment_count"`
WALActiveSize int64 `json:"wal_active_size"`
WALSegmentsRemoved uint64 `json:"wal_segments_removed"`
WALRecordCounts wal.RecordMetrics `json:"wal_record_counts"`
WALSegmentsWithRaftRecords int `json:"wal_segments_with_raft_records"`
WALRemovableRaftSegments int `json:"wal_removable_raft_segments"`
WALTypedRecordRatio float64 `json:"wal_typed_record_ratio"`
WALTypedRecordWarning bool `json:"wal_typed_record_warning"`
WALTypedRecordReason string `json:"wal_typed_record_reason,omitempty"`
WALAutoGCRuns uint64 `json:"wal_auto_gc_runs"`
WALAutoGCRemoved uint64 `json:"wal_auto_gc_removed"`
WALAutoGCLastUnix int64 `json:"wal_auto_gc_last_unix"`
RaftGroupCount int `json:"raft_group_count"`
RaftLaggingGroups int `json:"raft_lagging_groups"`
RaftMinLogSegment uint32 `json:"raft_min_log_segment"`
RaftMaxLogSegment uint32 `json:"raft_max_log_segment"`
RaftMaxLagSegments int64 `json:"raft_max_lag_segments"`
RaftLagWarnThreshold int64 `json:"raft_lag_warn_threshold"`
RaftLagWarning bool `json:"raft_lag_warning"`
WriteQueueDepth int64 `json:"write_queue_depth"`
WriteQueueEntries int64 `json:"write_queue_entries"`
WriteQueueBytes int64 `json:"write_queue_bytes"`
WriteAvgBatchEntries float64 `json:"write_avg_batch_entries"`
WriteAvgBatchBytes float64 `json:"write_avg_batch_bytes"`
WriteAvgRequestWaitMs float64 `json:"write_avg_request_wait_ms"`
WriteAvgValueLogMs float64 `json:"write_avg_vlog_ms"`
WriteAvgApplyMs float64 `json:"write_avg_apply_ms"`
WriteBatchesTotal int64 `json:"write_batches_total"`
WriteThrottleActive bool `json:"write_throttle_active"`
TxnsActive int64 `json:"txns_active"`
TxnsStarted uint64 `json:"txns_started"`
TxnsCommitted uint64 `json:"txns_committed"`
TxnsConflicts uint64 `json:"txns_conflicts"`
RegionTotal int64 `json:"region_total"`
RegionNew int64 `json:"region_new"`
RegionRunning int64 `json:"region_running"`
RegionRemoving int64 `json:"region_removing"`
RegionTombstone int64 `json:"region_tombstone"`
RegionOther int64 `json:"region_other"`
HotKeys []HotKeyStat `json:"hot_keys,omitempty"`
HotWriteLimited uint64 `json:"hot_write_limited"`
BlockL0HitRate float64 `json:"block_l0_hit_rate"`
BlockL1HitRate float64 `json:"block_l1_hit_rate"`
BloomHitRate float64 `json:"bloom_hit_rate"`
IndexHitRate float64 `json:"index_hit_rate"`
IteratorReused uint64 `json:"iterator_reused"`
ColumnFamilies map[string]ColumnFamilySnapshot `json:"column_families,omitempty"`
LSMLevels []LSMLevelStats `json:"lsm_levels,omitempty"`
LSMValueBytesTotal int64 `json:"lsm_value_bytes_total"`
LSMValueDensityMax float64 `json:"lsm_value_density_max"`
LSMValueDensityAlert bool `json:"lsm_value_density_alert"`
}
StatsSnapshot captures a point-in-time view of internal backlog metrics.
type Txn ¶
type Txn struct {
// contains filtered or unexported fields
}
func (*Txn) Commit ¶
Commit commits the transaction, following these steps:
1. If there are no writes, return immediately.
2. Check if read rows were updated since txn started. If so, return ErrConflict.
3. If no conflict, generate a commit timestamp and update written rows' commit ts.
4. Batch up all writes, write them to value log and LSM tree.
5. If callback is provided, will return immediately after checking for conflicts. Writes to the database will happen in the background. If there is a conflict, an error will be returned and the callback will not run. If there are no conflicts, the callback will be called in the background upon successful completion of writes or any error during write.
If error is nil, the transaction is successfully committed. In case of a non-nil error, the LSM tree won't be updated, so there's no need for any rollback.
func (*Txn) CommitWith ¶
CommitWith acts like Commit, but takes a callback, which gets run via a goroutine to avoid blocking this function. The callback is guaranteed to run, so it is safe to increment sync.WaitGroup before calling CommitWith, and decrementing it in the callback; to block until all callbacks are run.
func (*Txn) Delete ¶
Delete deletes a key.
This is done by adding a delete marker for the key at commit timestamp. Any reads happening before this timestamp would be unaffected. Any reads after this commit would see the deletion.
The current transaction keeps a reference to the key byte slice argument. Users must not modify the key until the end of the transaction.
func (*Txn) Discard ¶
func (txn *Txn) Discard()
Discard discards a created transaction. This method is very important and must be called. Commit method calls this internally, however, calling this multiple times doesn't cause any issues. So, this can safely be called via a defer right when transaction is created.
NOTE: If any operations are run on a discarded transaction, ErrDiscardedTxn is returned.
func (*Txn) Get ¶
Get looks for key and returns corresponding Item. If key is not found, ErrKeyNotFound is returned.
func (*Txn) NewIterator ¶
func (txn *Txn) NewIterator(opt IteratorOptions) *TxnIterator
NewIterator 方法会生成一个新的事务迭代器。 在 Option 中,可以设置只迭代 Key,或者迭代 Key-Value
func (*Txn) NewKeyIterator ¶
func (txn *Txn) NewKeyIterator(key []byte, opt IteratorOptions) *TxnIterator
NewKeyIterator is just like NewIterator, but allows the user to iterate over all versions of a single key. Internally, it sets the Prefix option in provided opt, and uses that prefix to additionally run bloom filter lookups before picking tables from the LSM tree.
func (*Txn) Set ¶
Set adds a key-value pair to the database. It will return ErrReadOnlyTxn if update flag was set to false when creating the transaction.
The current transaction keeps a reference to the key and val byte slice arguments. Users must not modify key and val until the end of the transaction.
type TxnIterator ¶
type TxnIterator struct {
// contains filtered or unexported fields
}
Iterator helps iterating over the KV pairs in a lexicographically sorted order.
func (*TxnIterator) Close ¶
func (it *TxnIterator) Close()
Close would close the iterator. It is important to call this when you're done with iteration.
func (*TxnIterator) Item ¶
func (it *TxnIterator) Item() *Item
Item returns pointer to the current key-value pair. This item is only valid until it.Next() gets called.
func (*TxnIterator) Next ¶
func (it *TxnIterator) Next()
Next would advance the iterator by one. Always check it.Valid() after a Next() to ensure you have access to a valid it.Item().
func (*TxnIterator) Rewind ¶
func (it *TxnIterator) Rewind()
Rewind would rewind the iterator cursor all the way to zero-th position, which would be the smallest key if iterating forward, and largest if iterating backward. It does not keep track of whether the cursor started with a Seek().
func (*TxnIterator) Seek ¶
func (it *TxnIterator) Seek(key []byte) uint64
Seek would seek to the provided key if present. If absent, it would seek to the next smallest key greater than the provided key if iterating in the forward direction. Behavior would be reversed if iterating backwards.
func (*TxnIterator) Valid ¶
func (it *TxnIterator) Valid() bool
Valid returns false when iteration is done.
func (*TxnIterator) ValidForPrefix ¶
func (it *TxnIterator) ValidForPrefix(prefix []byte) bool
ValidForPrefix returns false when iteration is done or when the current key is not prefixed by the specified prefix.