ca: fix a bug that caused a non blocking leaf cert query after a blocking leaf cert query to block (#12820)

Fixes #12048

Fixes #12319

Regression introduced in #11693

Local reproduction steps:
1. `consul agent -dev`
2. `curl -sLiv 'localhost:8500/v1/agent/connect/ca/leaf/web'`
3. make note of the `X-Consul-Index` header returned 
4. `curl -sLi 'localhost:8500/v1/agent/connect/ca/leaf/web?index=<VALUE_FROM_STEP_3>'`
5. Kill the above curl when it hangs with Ctrl-C
6. Repeat (2) and it should not hang.
This commit is contained in:
R.B. Boyer 2022-04-20 12:21:47 -05:00 committed by GitHub
parent 76c03872b7
commit f3ce353a87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 105 additions and 0 deletions

3
.changelog/12820.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
ca: fix a bug that caused a non blocking leaf cert query after a blocking leaf cert query to block
```

View File

@ -6202,6 +6202,101 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) {
}
}
func TestAgentConnectCALeafCert_nonBlockingQuery_after_blockingQuery_shouldNotBlock(t *testing.T) {
// see: https://github.com/hashicorp/consul/issues/12048
runStep := func(t *testing.T, name string, fn func(t *testing.T)) {
t.Helper()
if !t.Run(name, fn) {
t.FailNow()
}
}
if testing.Short() {
t.Skip("too slow for testing.Short")
}
t.Parallel()
a := NewTestAgent(t, "")
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
testrpc.WaitForActiveCARoot(t, a.RPC, "dc1", nil)
{
// Register a local service
args := &structs.ServiceDefinition{
ID: "foo",
Name: "test",
Address: "127.0.0.1",
Port: 8000,
Check: structs.CheckType{
TTL: 15 * time.Second,
},
}
req := httptest.NewRequest("PUT", "/v1/agent/service/register", jsonReader(args))
resp := httptest.NewRecorder()
a.srv.h.ServeHTTP(resp, req)
if !assert.Equal(t, 200, resp.Code) {
t.Log("Body: ", resp.Body.String())
}
}
var (
serialNumber string
index string
issued structs.IssuedCert
)
runStep(t, "do initial non-blocking query", func(t *testing.T) {
req := httptest.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
resp := httptest.NewRecorder()
a.srv.h.ServeHTTP(resp, req)
dec := json.NewDecoder(resp.Body)
require.NoError(t, dec.Decode(&issued))
serialNumber = issued.SerialNumber
require.Equal(t, "MISS", resp.Header().Get("X-Cache"),
"for the leaf cert cache type these are always MISS")
index = resp.Header().Get("X-Consul-Index")
})
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
// launch goroutine for blocking query
req := httptest.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil).Clone(ctx)
resp := httptest.NewRecorder()
a.srv.h.ServeHTTP(resp, req)
}()
// We just need to ensure that the above blocking query is in-flight before
// the next step, so do a little sleep.
time.Sleep(50 * time.Millisecond)
// The initial non-blocking query populated the leaf cert cache entry
// implicitly. The agent cache doesn't prune entries very often at all, so
// in between both of these steps the data should still be there, causing
// this to be a HIT that completes in less than 10m (the default inner leaf
// cert blocking query timeout).
runStep(t, "do a non-blocking query that should not block", func(t *testing.T) {
req := httptest.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
resp := httptest.NewRecorder()
a.srv.h.ServeHTTP(resp, req)
var issued2 structs.IssuedCert
dec := json.NewDecoder(resp.Body)
require.NoError(t, dec.Decode(&issued2))
require.Equal(t, "HIT", resp.Header().Get("X-Cache"))
// If this is actually returning a cached result, the serial number
// should be unchanged.
require.Equal(t, serialNumber, issued2.SerialNumber)
require.Equal(t, issued, issued2)
})
}
func TestAgentConnectCALeafCert_Vault_doesNotChurnLeafCertsAtIdle(t *testing.T) {
ca.SkipIfVaultNotPresent(t)

View File

@ -376,6 +376,13 @@ func (c *Cache) getEntryLocked(
// Check if re-validate is requested. If so the first time round the
// loop is not a hit but subsequent ones should be treated normally.
if !tEntry.Opts.Refresh && info.MustRevalidate {
if entry.Fetching {
// There is an active blocking query for this data, which has not
// returned. We can logically deduce that the contents of the cache
// are actually current, and we can simply return this while
// leaving the blocking query alone.
return true, true, entry
}
return true, false, entry
}