agent: remove data race in agent config (#20200)

To fix an issue displaying the current reloaded config in the 
v1/agent/self endpoint #18681 caused the agent's internal 
config struct member to be deepcopied and replaced on reload.

This is not safe because the field is not protected by a lock, nor 
should it be due to how it is accessed by the rest of the system.

This PR does the same deepcopy, but into a new field solely for 
the point of capturing the current reloaded values for display 
purposes. If there has been no reload then the original config is used.
This commit is contained in:
R.B. Boyer 2024-01-12 15:11:21 -06:00 committed by GitHub
parent d4b67677c6
commit 7f9ed032fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 25 deletions

View File

@ -24,6 +24,12 @@ import (
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/rboyer/safeio"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
"github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
@ -31,11 +37,6 @@ import (
"github.com/hashicorp/hcp-scada-provider/capability"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
"github.com/rboyer/safeio"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/acl/resolver"
@ -240,6 +241,9 @@ type Agent struct {
// config is the agent configuration.
config *config.RuntimeConfig
displayOnlyConfigCopy *config.RuntimeConfig
displayOnlyConfigCopyLock sync.Mutex
// Used for writing our logs
logger hclog.InterceptLogger
@ -4438,11 +4442,22 @@ func (a *Agent) reloadConfigInternal(newCfg *config.RuntimeConfig) error {
a.config.EnableDebug = newCfg.EnableDebug
// update Agent config with new config
a.config = newCfg.DeepCopy()
a.displayOnlyConfigCopyLock.Lock()
a.displayOnlyConfigCopy = newCfg.DeepCopy()
a.displayOnlyConfigCopyLock.Unlock()
return nil
}
func (a *Agent) getRuntimeConfigForDisplay() *config.RuntimeConfig {
a.displayOnlyConfigCopyLock.Lock()
defer a.displayOnlyConfigCopyLock.Unlock()
if a.displayOnlyConfigCopy != nil {
return a.displayOnlyConfigCopy.DeepCopy()
}
return a.config
}
// LocalBlockingQuery performs a blocking query in a generic way against
// local agent state that has no RPC or raft to back it. It uses `hash` parameter
// instead of an `index`.

View File

@ -11,14 +11,15 @@ import (
"strings"
"time"
"github.com/mitchellh/hashstructure"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/hashicorp/go-bexpr"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/serf/coordinate"
"github.com/hashicorp/serf/serf"
"github.com/mitchellh/hashstructure"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/hashicorp/consul/acl"
cachetype "github.com/hashicorp/consul/agent/cache-types"
@ -89,6 +90,8 @@ func (s *HTTPHandlers) AgentSelf(resp http.ResponseWriter, req *http.Request) (i
}
}
displayConfig := s.agent.getRuntimeConfigForDisplay()
var xds *XDSSelf
if s.agent.xdsServer != nil {
xds = &XDSSelf{
@ -96,15 +99,15 @@ func (s *HTTPHandlers) AgentSelf(resp http.ResponseWriter, req *http.Request) (i
"envoy": xdscommon.EnvoyVersions,
},
// Prefer the TLS port. See comment on the XDSSelf struct for details.
Port: s.agent.config.GRPCTLSPort,
Port: displayConfig.GRPCTLSPort,
Ports: GRPCPorts{
Plaintext: s.agent.config.GRPCPort,
TLS: s.agent.config.GRPCTLSPort,
Plaintext: displayConfig.GRPCPort,
TLS: displayConfig.GRPCTLSPort,
},
}
// Fallback to standard port if TLS is not enabled.
if s.agent.config.GRPCTLSPort <= 0 {
xds.Port = s.agent.config.GRPCPort
if displayConfig.GRPCTLSPort <= 0 {
xds.Port = displayConfig.GRPCPort
}
}
@ -119,22 +122,22 @@ func (s *HTTPHandlers) AgentSelf(resp http.ResponseWriter, req *http.Request) (i
Version string
BuildDate string
}{
Datacenter: s.agent.config.Datacenter,
PrimaryDatacenter: s.agent.config.PrimaryDatacenter,
NodeName: s.agent.config.NodeName,
NodeID: string(s.agent.config.NodeID),
Partition: s.agent.config.PartitionOrEmpty(),
Revision: s.agent.config.Revision,
Server: s.agent.config.ServerMode,
Datacenter: displayConfig.Datacenter,
PrimaryDatacenter: displayConfig.PrimaryDatacenter,
NodeName: displayConfig.NodeName,
NodeID: string(displayConfig.NodeID),
Partition: displayConfig.PartitionOrEmpty(),
Revision: displayConfig.Revision,
Server: displayConfig.ServerMode,
// We expect the ent version to be part of the reported version string, and that's now part of the metadata, not the actual version.
Version: s.agent.config.VersionWithMetadata(),
BuildDate: s.agent.config.BuildDate.Format(time.RFC3339),
Version: displayConfig.VersionWithMetadata(),
BuildDate: displayConfig.BuildDate.Format(time.RFC3339),
}
return Self{
Config: config,
DebugConfig: s.agent.config.Sanitized(),
Coord: cs[s.agent.config.SegmentName],
DebugConfig: displayConfig.Sanitized(),
Coord: cs[displayConfig.SegmentName],
Member: s.agent.AgentLocalMember(),
Stats: s.agent.Stats(),
Meta: s.agent.State.Metadata(),