From 20eb0d3e945bcc528b251e551f8290a38fbbc727 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" Date: Mon, 9 Sep 2019 13:04:30 -0500 Subject: [PATCH] cache: remove data race in agent cache In normal operations there is a read/write race related to request QueryOptions fields. An example race: WARNING: DATA RACE Read at 0x00c000836950 by goroutine 30: github.com/hashicorp/consul/agent/structs.(*ServiceConfigRequest).CacheInfo() /go/src/github.com/hashicorp/consul/agent/structs/config_entry.go:506 +0x109 github.com/hashicorp/consul/agent/cache.(*Cache).getWithIndex() /go/src/github.com/hashicorp/consul/agent/cache/cache.go:262 +0x5c github.com/hashicorp/consul/agent/cache.(*Cache).notifyBlockingQuery() /go/src/github.com/hashicorp/consul/agent/cache/watch.go:89 +0xd7 Previous write at 0x00c000836950 by goroutine 147: github.com/hashicorp/consul/agent/cache-types.(*ResolvedServiceConfig).Fetch() /go/src/github.com/hashicorp/consul/agent/cache-types/resolved_service_config.go:31 +0x219 github.com/hashicorp/consul/agent/cache.(*Cache).fetch.func1() /go/src/github.com/hashicorp/consul/agent/cache/cache.go:495 +0x112 This patch does a lightweight copy of the request struct so that the embedded QueryOptions fields that are mutated during Fetch() are scoped to just that one RPC. --- agent/cache-types/catalog_datacenters.go | 4 ++++ agent/cache-types/catalog_list_services.go | 4 ++++ agent/cache-types/catalog_services.go | 4 ++++ agent/cache-types/config_entry.go | 4 ++++ agent/cache-types/connect_ca_leaf.go | 4 ++++ agent/cache-types/connect_ca_root.go | 4 ++++ agent/cache-types/discovery_chain.go | 4 ++++ agent/cache-types/health_services.go | 4 ++++ agent/cache-types/intention_match.go | 4 ++++ agent/cache-types/node_services.go | 4 ++++ agent/cache-types/prepared_query.go | 4 ++++ agent/cache-types/resolved_service_config.go | 4 ++++ agent/cache-types/service_dump.go | 4 ++++ 13 files changed, 52 insertions(+) diff --git a/agent/cache-types/catalog_datacenters.go b/agent/cache-types/catalog_datacenters.go index 60c2b27b47..c1245e17ed 100644 --- a/agent/cache-types/catalog_datacenters.go +++ b/agent/cache-types/catalog_datacenters.go @@ -25,6 +25,10 @@ func (c *CatalogDatacenters) Fetch(opts cache.FetchOptions, req cache.Request) ( "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Allways allow stale - there's no point in hitting leader if the request is // going to be served from cache and endup arbitrarily stale anyway. This // allows cached service-discover to automatically read scale across all diff --git a/agent/cache-types/catalog_list_services.go b/agent/cache-types/catalog_list_services.go index 0c4c4e43d1..8434d56fc1 100644 --- a/agent/cache-types/catalog_list_services.go +++ b/agent/cache-types/catalog_list_services.go @@ -25,6 +25,10 @@ func (c *CatalogListServices) Fetch(opts cache.FetchOptions, req cache.Request) "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/catalog_services.go b/agent/cache-types/catalog_services.go index d85a6aadab..2e50e2908e 100644 --- a/agent/cache-types/catalog_services.go +++ b/agent/cache-types/catalog_services.go @@ -26,6 +26,10 @@ func (c *CatalogServices) Fetch(opts cache.FetchOptions, req cache.Request) (cac "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/config_entry.go b/agent/cache-types/config_entry.go index 2a93971666..d62f9a8a65 100644 --- a/agent/cache-types/config_entry.go +++ b/agent/cache-types/config_entry.go @@ -25,6 +25,10 @@ func (c *ConfigEntries) Fetch(opts cache.FetchOptions, req cache.Request) (cache "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/connect_ca_leaf.go b/agent/cache-types/connect_ca_leaf.go index 64a9edb7e9..955ce4d33a 100644 --- a/agent/cache-types/connect_ca_leaf.go +++ b/agent/cache-types/connect_ca_leaf.go @@ -298,6 +298,10 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Do we already have a cert in the cache? var existing *structs.IssuedCert // Really important this is not a pointer type since otherwise we would set it diff --git a/agent/cache-types/connect_ca_root.go b/agent/cache-types/connect_ca_root.go index 7bea86d6ae..aa48245a50 100644 --- a/agent/cache-types/connect_ca_root.go +++ b/agent/cache-types/connect_ca_root.go @@ -27,6 +27,10 @@ func (c *ConnectCARoot) Fetch(opts cache.FetchOptions, req cache.Request) (cache "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/discovery_chain.go b/agent/cache-types/discovery_chain.go index 62e4d04f51..60e386deb3 100644 --- a/agent/cache-types/discovery_chain.go +++ b/agent/cache-types/discovery_chain.go @@ -26,6 +26,10 @@ func (c *CompiledDiscoveryChain) Fetch(opts cache.FetchOptions, req cache.Reques "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/health_services.go b/agent/cache-types/health_services.go index 4cbe289bea..55ebf3e470 100644 --- a/agent/cache-types/health_services.go +++ b/agent/cache-types/health_services.go @@ -26,6 +26,10 @@ func (c *HealthServices) Fetch(opts cache.FetchOptions, req cache.Request) (cach "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/intention_match.go b/agent/cache-types/intention_match.go index 80b1e88596..0b960f0de4 100644 --- a/agent/cache-types/intention_match.go +++ b/agent/cache-types/intention_match.go @@ -25,6 +25,10 @@ func (c *IntentionMatch) Fetch(opts cache.FetchOptions, req cache.Request) (cach "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.MinQueryIndex = opts.MinIndex reqReal.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/node_services.go b/agent/cache-types/node_services.go index 09491e60b2..33f9314bc5 100644 --- a/agent/cache-types/node_services.go +++ b/agent/cache-types/node_services.go @@ -26,6 +26,10 @@ func (c *NodeServices) Fetch(opts cache.FetchOptions, req cache.Request) (cache. "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/prepared_query.go b/agent/cache-types/prepared_query.go index 964068038b..7dc67129b3 100644 --- a/agent/cache-types/prepared_query.go +++ b/agent/cache-types/prepared_query.go @@ -26,6 +26,10 @@ func (c *PreparedQuery) Fetch(opts cache.FetchOptions, req cache.Request) (cache "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Allways allow stale - there's no point in hitting leader if the request is // going to be served from cache and endup arbitrarily stale anyway. This // allows cached service-discover to automatically read scale across all diff --git a/agent/cache-types/resolved_service_config.go b/agent/cache-types/resolved_service_config.go index c1bba96068..a78c0ce812 100644 --- a/agent/cache-types/resolved_service_config.go +++ b/agent/cache-types/resolved_service_config.go @@ -26,6 +26,10 @@ func (c *ResolvedServiceConfig) Fetch(opts cache.FetchOptions, req cache.Request "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout diff --git a/agent/cache-types/service_dump.go b/agent/cache-types/service_dump.go index 868c534e7a..d4653ee2d0 100644 --- a/agent/cache-types/service_dump.go +++ b/agent/cache-types/service_dump.go @@ -25,6 +25,10 @@ func (c *InternalServiceDump) Fetch(opts cache.FetchOptions, req cache.Request) "Internal cache failure: request wrong type: %T", req) } + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + // Set the minimum query index to our current index so we block reqReal.QueryOptions.MinQueryIndex = opts.MinIndex reqReal.QueryOptions.MaxQueryTime = opts.Timeout