upgrade test: call validation func during upgrade (#20258)

This commit is contained in:
cskh 2024-01-18 17:04:06 -05:00 committed by GitHub
parent 9897be76ad
commit 539659508a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 37 additions and 6 deletions

View File

@ -27,7 +27,7 @@ func Test_Upgrade_Standard_Basic_Agentless(t *testing.T) {
sp := ct.Sprawl
cfg := sp.Config()
require.NoError(t, ct.Sprawl.LoadKVDataToCluster("dc1", 1, &api.WriteOptions{}))
require.NoError(t, sp.Upgrade(cfg, "dc1", sprawl.UpgradeTypeStandard, utils.TargetImages(), nil))
require.NoError(t, sp.Upgrade(cfg, "dc1", sprawl.UpgradeTypeStandard, utils.TargetImages(), nil, nil))
t.Log("Finished standard upgrade ...")
// verify data is not lost

View File

@ -54,7 +54,7 @@ func TestTrafficManagement_ResolverDefaultSubset_Agentless(t *testing.T) {
sp := ct.Sprawl
cfg := sp.Config()
require.NoError(t, ct.Sprawl.LoadKVDataToCluster("dc1", 1, &api.WriteOptions{}))
require.NoError(t, sp.Upgrade(cfg, "dc1", sprawl.UpgradeTypeStandard, utils.TargetImages(), nil))
require.NoError(t, sp.Upgrade(cfg, "dc1", sprawl.UpgradeTypeStandard, utils.TargetImages(), nil, nil))
t.Log("Finished standard upgrade ...")
// verify data is not lost

View File

@ -246,12 +246,20 @@ func (s *Sprawl) Relaunch(
return s.RelaunchWithPhase(cfg, LaunchPhaseRegular)
}
// Upgrade upgrades the cluster to the targetImages version
// Parameters:
// - clusterName: the cluster to upgrade
// - upgradeType: the type of upgrade, standard or autopilot
// - targetImages: the target version to upgrade to
// - newServersInTopology: the number of new servers to add to the topology for autopilot upgrade only
// - validationFunc: the validation function to run during upgrade
func (s *Sprawl) Upgrade(
cfg *topology.Config,
clusterName string,
upgradeType string,
targetImages topology.Images,
newServersInTopology []int,
validationFunc func() error,
) error {
cluster := cfg.Cluster(clusterName)
if cluster == nil {
@ -266,9 +274,9 @@ func (s *Sprawl) Upgrade(
switch upgradeType {
case UpgradeTypeAutopilot:
err = s.autopilotUpgrade(cfg, cluster, newServersInTopology)
err = s.autopilotUpgrade(cfg, cluster, newServersInTopology, validationFunc)
case UpgradeTypeStandard:
err = s.standardUpgrade(cluster, targetImages)
err = s.standardUpgrade(cluster, targetImages, validationFunc)
default:
err = fmt.Errorf("upgrade type unsupported %s", upgradeType)
}
@ -283,7 +291,7 @@ func (s *Sprawl) Upgrade(
// standardUpgrade upgrades server agents in the cluster to the targetImages
// individually
func (s *Sprawl) standardUpgrade(cluster *topology.Cluster,
targetImages topology.Images) error {
targetImages topology.Images, validationFunc func() error) error {
upgradeFn := func(nodeID topology.NodeID) error {
cfgUpgrade := s.Config()
clusterCopy := cfgUpgrade.Cluster(cluster.Name)
@ -311,6 +319,13 @@ func (s *Sprawl) standardUpgrade(cluster *topology.Cluster,
if err := upgradeFn(node.ID()); err != nil {
return fmt.Errorf("error upgrading node %s: %w", node.Name, err)
}
// run the validation function after upgrading each server agent
if validationFunc != nil {
if err := validationFunc(); err != nil {
return fmt.Errorf("error validating cluster: %w", err)
}
}
}
// upgrade client agents one at a time
@ -322,6 +337,13 @@ func (s *Sprawl) standardUpgrade(cluster *topology.Cluster,
if err := upgradeFn(node.ID()); err != nil {
return fmt.Errorf("error upgrading node %s: %w", node.Name, err)
}
// run the validation function after upgrading each client agent
if validationFunc != nil {
if err := validationFunc(); err != nil {
return fmt.Errorf("error validating cluster: %w", err)
}
}
}
return nil
@ -330,7 +352,7 @@ func (s *Sprawl) standardUpgrade(cluster *topology.Cluster,
// autopilotUpgrade upgrades server agents by joining new servers with
// higher version. After upgrade completes, the number of server agents
// are doubled
func (s *Sprawl) autopilotUpgrade(cfg *topology.Config, cluster *topology.Cluster, newServersInTopology []int) error {
func (s *Sprawl) autopilotUpgrade(cfg *topology.Config, cluster *topology.Cluster, newServersInTopology []int, validationFunc func() error) error {
leader, err := s.Leader(cluster.Name)
if err != nil {
return fmt.Errorf("error get leader: %w", err)
@ -393,9 +415,18 @@ func (s *Sprawl) autopilotUpgrade(cfg *topology.Config, cluster *topology.Cluste
node.IsNewServer = false
}
// Run the validation code
if validationFunc != nil {
if err := validationFunc(); err != nil {
return fmt.Errorf("error validating cluster: %w", err)
}
}
return nil
}
// RelaunchWithPhase relaunch the toplogy with the given phase
// and wait for the cluster to be ready (i.e, leadership is established)
func (s *Sprawl) RelaunchWithPhase(
cfg *topology.Config,
launchPhase LaunchPhase,