Skip to content

raft: allow retry join block within ha_storage stanza #9254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 45 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a01d02d
raft: initial work on raft ha storage support
calvn May 26, 2020
1c0d821
add note on join
calvn May 27, 2020
a4fce98
add todo note
calvn May 29, 2020
17edc2c
raft: add support for bootstrapping and joining existing nodes
calvn Jun 3, 2020
5cf1915
raft: gate bootstrap join by reading leader api address from storage
calvn Jun 3, 2020
d2e9857
raft: properly check for raft-only for certain conditionals
calvn Jun 4, 2020
29a27e5
Merge remote-tracking branch 'origin/master' into feature-raft-ha-sto…
calvn Jun 4, 2020
cb3c8e0
raft: add bootstrap to api and cli
calvn Jun 5, 2020
c80d6ad
raft: fix bootstrap cli command
calvn Jun 5, 2020
83beaf4
raft: add test for setting up new cluster with raft HA
calvn Jun 8, 2020
187af61
raft: extend TestRaft_HA_NewCluster to include inmem and consul backends
calvn Jun 8, 2020
6a2fb4b
raft: add test for updating an existing cluster to use raft HA
calvn Jun 10, 2020
155e67f
raft: remove debug log lines, clean up verifyRaftPeers
calvn Jun 10, 2020
19622c3
Merge remote-tracking branch 'origin/master' into feature-raft-ha-sto…
calvn Jun 10, 2020
317ce9d
raft: minor cleanup
calvn Jun 11, 2020
dcd97ed
raft: minor cleanup
calvn Jun 13, 2020
282e53c
Update physical/raft/raft.go
calvn Jun 13, 2020
be61b48
Update vault/ha.go
calvn Jun 13, 2020
568d9fc
Update vault/ha.go
calvn Jun 13, 2020
8bb5d20
Update vault/logical_system_raft.go
calvn Jun 13, 2020
fe55165
Update vault/raft.go
calvn Jun 13, 2020
415ae01
Update vault/raft.go
calvn Jun 13, 2020
b30369d
address feedback comments
calvn Jun 15, 2020
c2ad602
address feedback comments
calvn Jun 15, 2020
023ee75
raft: refactor tls keyring logic
calvn Jun 15, 2020
a444f8a
address feedback comments
calvn Jun 16, 2020
29325fe
Update vault/raft.go
calvn Jun 16, 2020
f2ce361
Update vault/raft.go
calvn Jun 16, 2020
ca3d360
address feedback comments
calvn Jun 16, 2020
4854a10
testing: fix import ordering
calvn Jun 16, 2020
ccbfa23
Merge branch 'feature-raft-ha-storage' of github.com:hashicorp/vault …
calvn Jun 16, 2020
e1112e7
raft: rename var, cleanup comment line
calvn Jun 16, 2020
f050a67
Merge remote-tracking branch 'origin/master' into feature-raft-ha-sto…
calvn Jun 17, 2020
22d390b
docs: remove ha_storage restriction note on raft
calvn Jun 17, 2020
2839c5f
docs: more raft HA interaction updates with migration and recovery mode
calvn Jun 17, 2020
23606e9
docs: update the raft join command
calvn Jun 17, 2020
beb706e
raft: update comments
calvn Jun 18, 2020
0f20555
raft: add missing isRaftHAOnly check for clearing out state set earlier
calvn Jun 18, 2020
de8c08e
raft: allow retry join block within ha_storage stanza
calvn Jun 18, 2020
47d72e5
raft: update a few ha_storage config checks
calvn Jun 18, 2020
5c0e909
Update command/operator_raft_bootstrap.go
calvn Jun 18, 2020
26321a8
raft: address feedback comments
calvn Jun 18, 2020
41618b9
raft: fix panic when checking for config.HAStorage.Type
calvn Jun 19, 2020
7749cc9
Merge remote-tracking branch 'origin/feature-raft-ha-storage' into ra…
calvn Jun 19, 2020
bf835b7
raft: fix ha_storage parsing for retry_join block
calvn Jun 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
address feedback comments
  • Loading branch information
calvn committed Jun 15, 2020
commit c2ad602298a2d75ba1eb2ec502de1a66136e15e7
2 changes: 1 addition & 1 deletion command/operator_raft_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (c *OperatorRaftJoinCommand) Run(args []string) int {
args = f.Args()
switch len(args) {
case 0:
// No-opThis is accepted if we're using raft of HA-only
// No-op: This is acceptable if we're using raft for HA-only
case 1:
leaderAPIAddr = strings.TrimSpace(args[0])
default:
Expand Down
5 changes: 5 additions & 0 deletions physical/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,11 @@ func (l *RaftLock) Lock(stopCh <-chan struct{}) (<-chan struct{}, error) {

l.b.l.RLock()

// Ensure that we still have a raft instance after grabbing the read lock
if l.b.raft == nil {
return nil, errors.New("attempted to grab a lock on a nil raft backend")
}

// Cache the notifyCh locally
leaderNotifyCh := l.b.raftNotifyCh

Expand Down
12 changes: 6 additions & 6 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -1704,7 +1704,7 @@ func (c *Core) sealInternal() error {
return c.sealInternalWithOptions(true, false, true)
}

func (c *Core) sealInternalWithOptions(grabStateLock, keepHALock, shutdownRaft bool) error {
func (c *Core) sealInternalWithOptions(grabStateLock, keepHALock, performCleanup bool) error {
// Mark sealed, and if already marked return
if swapped := atomic.CompareAndSwapUint32(c.sealed, 0, 1); !swapped {
return nil
Expand Down Expand Up @@ -1785,18 +1785,18 @@ func (c *Core) sealInternalWithOptions(grabStateLock, keepHALock, shutdownRaft b

c.teardownReplicationResolverHandler()

// If the storage backend needs to be sealed
if shutdownRaft {
// Perform additional cleanup upon sealing.
if performCleanup {
if raftBackend := c.getRaftBackend(); raftBackend != nil {
if err := raftBackend.TeardownCluster(c.getClusterListener()); err != nil {
c.logger.Error("error stopping storage cluster", "error", err)
return err
}
}
}

// Stop the cluster listener
c.stopClusterListener()
// Stop the cluster listener
c.stopClusterListener()
}

c.logger.Debug("sealing barrier")
if err := c.barrier.Seal(); err != nil {
Expand Down
16 changes: 12 additions & 4 deletions vault/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ func (c *Core) startPeriodicRaftTLSRotate(ctx context.Context) error {
// If there already exists a pending key update then the update
// hasn't replicated down to all standby nodes yet. Don't allow any
// new keys to be created until all standbys have seen this previous
// rotation. As a backoff strategy another rotation attempt is
// rotation. As a backoff strategy, another rotation attempt is
// scheduled for 5 minutes from now.
logger.Warn("skipping new raft TLS config creation, keys are pending")
return time.Now().Add(time.Minute * 5), nil
Expand All @@ -320,7 +320,7 @@ func (c *Core) startPeriodicRaftTLSRotate(ctx context.Context) error {
}

// Write the keyring again with the new applied index. This allows us to
// track if standby nodes receive the update.
// track if standby nodes received the update.
keyring.Keys[1].AppliedIndex = raftBackend.AppliedIndex()
keyring.AppliedIndex = raftBackend.AppliedIndex()
entry, err = logical.StorageEntryJSON(raftTLSStoragePath, keyring)
Expand All @@ -339,7 +339,13 @@ func (c *Core) startPeriodicRaftTLSRotate(ctx context.Context) error {
// checkCommitted verifies key updates have been applied to all nodes and
// finalizes the rotation by deleting the old keys and updating the raft
// backend.
checkCommitted := func() error {
checkCommitted := func(haOnly bool) error {
// No-op here if we're using raft for HA-only. Since the storage is
// shared, the two phase commit is not done on rotation.
if haOnly {
return nil
}

keyring, err := readKeyring()
if err != nil {
return errwrap.Wrapf("failed to read raft TLS keyring: {{err}}", err)
Expand Down Expand Up @@ -395,6 +401,8 @@ func (c *Core) startPeriodicRaftTLSRotate(ctx context.Context) error {
keyCheckInterval := time.NewTicker(1 * time.Minute)
defer keyCheckInterval.Stop()

isRaftHAOnly := c.isRaftHAOnly()

var backoff bool
for {
// If we encountered and error we should try to create the key
Expand All @@ -406,7 +414,7 @@ func (c *Core) startPeriodicRaftTLSRotate(ctx context.Context) error {

select {
case <-keyCheckInterval.C:
err := checkCommitted()
err := checkCommitted(isRaftHAOnly)
if err != nil {
logger.Error("failed to activate TLS key", "error", err)
}
Expand Down