Merge master

This commit is contained in:
freddygv 2020-09-14 16:17:43 -06:00
commit 7fd518ff1d
1131 changed files with 39931 additions and 15074 deletions

3
.changelog/7628.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
agent: Allow to restrict servers that can join a given Serf Consul cluster.
```

3
.changelog/7899.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
acl: allow auth methods created in the primary datacenter to optionally create global tokens.
```

3
.changelog/7970.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
acl: Added ACL Node Identities for easier creation of Consul Agent tokens.
```

3
.changelog/8158.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
connect: fix crash that would result if a mesh or terminating gateway's upstream has a hostname as an address and no healthy service instances available.
```

3
.changelog/8190.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
connect: Append port number to expected ingress hosts.
```

3
.changelog/8194.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
connect: various changes to make namespaces for intentions work more like for other subsystems.
```

11
.changelog/8211.txt Normal file
View File

@ -0,0 +1,11 @@
```release-note:bug
agent: Fixed a bug where Consul could crash when `verify_outgoing` was set to true but no client certificate was used.
```
```release-note:bug
auto_encrypt: Fixed an issue where auto encrypt certificate signing wasn't using the connect signing rate limiter.
```
```release-note:bug
auto_encrypt: Fixed several issues around retrieving the first TLS certificate where it would have the wrong CN and SANs. This was being masked by a second bug (also fixed) causing that certificate to immediately be discarded with a second certificate request being made afterwards.
```

3
.changelog/8216.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
connect: support Envoy v1.14.4, v1.13.4, v1.12.6.
```

3
.changelog/8218.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
dns: Improve RCODE of response when query targets a non-existent datacenter. [[GH-8102](https://github.com/hashicorp/consul/issues/8102)]
```

3
.changelog/8222.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
xds: version sniff envoy and switch regular expressions from 'regex' to 'safe_regex' on newer envoy versions.
```

3
.changelog/8268.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
version: The `version` CLI subcommand was altered to always show the git revision the binary was built from on the second line of output. Additionally the command gained a `-format` flag with the option now of outputting the version information in JSON form. **NOTE** This change has the potential to break any parsing done by users of the `version` commands output. In many cases nothing will need to be done but it is possible depending on how the output is parsed.
```

3
.changelog/8311.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
auto_encrypt: Fixed an issue that caused auto encrypt certificates to not be updated properly if the agents token was changed and the old token was deleted.
```

3
.changelog/8343.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
gossip: Avoid issue where two unique leave events for the same node could lead to infinite rebroadcast storms.
```

3
.changelog/8371.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
connect: Fixed issue where specifying a prometheus bind address would cause ingress gateways to fail to start up.
```

3
.changelog/8458.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
connect: Add support for http2 and grpc to ingress gateways
```

3
.changelog/8470.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
connect: use stronger validation that ingress gateways have compatible protocols defined for their upstreams
```

7
.changelog/8522.txt Normal file
View File

@ -0,0 +1,7 @@
```release-note:improvement
serf: update to `v0.9.4` which supports primary keys in the ListKeys operation.
```
```release-note:improvement
api: `GET v1/operator/keyring` also lists primary keys.
```

3
.changelog/8537.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
api: Fixed a panic caused by an api request with Connect=null
```

3
.changelog/8545.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
agent: expose the list of supported envoy versions on /v1/agent/self
```

3
.changelog/8547.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
agent: ensure that we normalize bootstrapped config entries
```

3
.changelog/8552.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
cache: Config parameters for cache throttling are now reloaded automatically on agent reload. Restarting the agent is not needed anymore.
```

3
.changelog/8569.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
xds: use envoy's rbac filter to handle intentions entirely within envoy
```

11
.changelog/8575.txt Normal file
View File

@ -0,0 +1,11 @@
```release-note:improvement
api: Added constants for common tag keys and values in the `Tags` field of the `AgentMember` struct.
```
```release-note:improvement
api: Added `IsConsulServer` method to the `AgentMember` type to easily determine whether the agent is a server.
```
```release-note:improvement
api: Added `ACLMode` method to the `AgentMember` type to determine what ACL mode the agent is operating in.
```

3
.changelog/8585.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
connect: add support for specifying load balancing policy in service-resolver
```

3
.changelog/8588.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
connect: fix renewing secondary intermediate certificates
```

3
.changelog/8596.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
connect: all config entries pick up a meta field
```

3
.changelog/8601.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
connect: fix bug in preventing some namespaced config entry modifications
```

3
.changelog/8602.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
api: Allow for the client to use TLS over a Unix domain socket.
```

3
.changelog/8603.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
telemetry: track node and service counts and emit them as metrics
```

3
.changelog/8606.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
connect: `connect envoy` command now respects the `-ca-path` flag
```

3
.changelog/_8621.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
snapshot agent: Deregister critical snapshotting TTL check if leadership is transferred.
```

40
.changelog/changelog.tmpl Normal file
View File

@ -0,0 +1,40 @@
{{- if index .NotesByType "breaking-change" -}}
BREAKING CHANGES:
{{range index .NotesByType "breaking-change" -}}
* {{ template "note" .}}
{{ end -}}
{{- end -}}
{{- if .NotesByType.security }}
SECURITY:
{{range .NotesByType.security -}}
* {{ template "note" . }}
{{ end -}}
{{- end -}}
{{- if .NotesByType.feature -}}
FEATURES:
{{range .NotesByType.feature -}}
* {{ template "note" . }}
{{ end -}}
{{- end -}}
{{- if .NotesByType.improvement }}
IMPROVEMENTS:
{{range .NotesByType.improvement -}}
* {{ template "note" . }}
{{ end -}}
{{- end -}}
{{- if .NotesByType.bug }}
BUG FIXES:
{{range .NotesByType.bug -}}
* {{ template "note" . }}
{{ end -}}
{{- end -}}

3
.changelog/note.tmpl Normal file
View File

@ -0,0 +1,3 @@
{{- define "note" -}}
{{.Body}} [[GH-{{- .Issue -}}](https://github.com/hashicorp/consul/issues/{{- .Issue -}})]
{{- end -}}

View File

@ -3,7 +3,7 @@ version: 2
references:
images:
go: &GOLANG_IMAGE circleci/golang:1.14.6
go: &GOLANG_IMAGE circleci/golang:1.14.7
middleman: &MIDDLEMAN_IMAGE hashicorp/middleman-hashicorp:0.3.40
ember: &EMBER_IMAGE circleci/node:12-browsers
@ -19,7 +19,7 @@ references:
EMAIL: noreply@hashicorp.com
GIT_AUTHOR_NAME: circleci-consul
GIT_COMMITTER_NAME: circleci-consul
S3_ARTIFACT_BUCKET: consul-dev-artifacts
S3_ARTIFACT_BUCKET: consul-dev-artifacts-v2
BASH_ENV: .circleci/bash_env.sh
VAULT_BINARY_VERSION: 1.2.2
@ -33,6 +33,27 @@ steps:
curl -sSL "${url}/v${GOTESTSUM_RELEASE}/gotestsum_${GOTESTSUM_RELEASE}_linux_amd64.tar.gz" | \
sudo tar -xz --overwrite -C /usr/local/bin gotestsum
get-aws-cli: &get-aws-cli
run:
name: download and install AWS CLI
command: |
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
echo -e "${AWS_CLI_GPG_KEY}" | gpg --import
curl -o awscliv2.sig https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip.sig
gpg --verify awscliv2.sig awscliv2.zip
unzip awscliv2.zip
sudo ./aws/install
aws-assume-role: &aws-assume-role
run:
name: assume-role aws creds
command: |
# assume role has duration of 15 min (the minimum allowed)
CREDENTIALS="$(aws sts assume-role --duration-seconds 900 --role-arn ${ROLE_ARN} --role-session-name build-${CIRCLE_SHA1} | jq '.Credentials')"
echo "export AWS_ACCESS_KEY_ID=$(echo $CREDENTIALS | jq -r '.AccessKeyId')" >> $BASH_ENV
echo "export AWS_SECRET_ACCESS_KEY=$(echo $CREDENTIALS | jq -r '.SecretAccessKey')" >> $BASH_ENV
echo "export AWS_SESSION_TOKEN=$(echo $CREDENTIALS | jq -r '.SessionToken')" >> $BASH_ENV
# This step MUST be at the end of any set of steps due to the 'when' condition
notify-slack-failure: &notify-slack-failure
name: notify-slack-failure
@ -115,6 +136,36 @@ jobs:
fi
- run: *notify-slack-failure
check-generated-protobuf:
docker:
- image: *GOLANG_IMAGE
environment:
<<: *ENVIRONMENT
steps:
- checkout
- run:
name: Install protobuf
command: |
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.12.3/protoc-3.12.3-linux-x86_64.zip
sudo unzip -d /usr/local protoc-*.zip
sudo chmod +x /usr/local/bin/protoc
rm protoc-*.zip
- run:
name: Install gogo/protobuf
command: |
gogo_version=$(go list -m github.com/gogo/protobuf | awk '{print $2}')
mkdir -p .gotools; cd .gotools; go mod init consul-tools
go get -v github.com/hashicorp/protoc-gen-go-binary
go get -v github.com/gogo/protobuf/protoc-gen-gofast@${gogo_version}
- run:
command: make --always-make proto
- run: |
if ! git diff --exit-code; then
echo "Generated code was not updated correctly"
exit 1
fi
go-test:
docker:
- image: *GOLANG_IMAGE
@ -359,13 +410,13 @@ jobs:
# upload development build to s3
dev-upload-s3:
docker:
- image: circleci/python:stretch
- image: *GOLANG_IMAGE
environment:
<<: *ENVIRONMENT
steps:
- run:
name: Install awscli
command: sudo pip install awscli
- checkout
- *get-aws-cli
- *aws-assume-role
# get consul binary
- attach_workspace:
at: bin/
@ -640,13 +691,13 @@ jobs:
command: bash <(curl -s https://codecov.io/bash) -v -c -C $CIRCLE_SHA1 -F ui
- run: *notify-slack-failure
envoy-integration-test-1.11.2:
envoy-integration-test-1.12.6:
docker:
# We only really need bash and docker-compose which is installed on all
# Circle images but pick Go since we have to pick one of them.
- image: *GOLANG_IMAGE
environment:
ENVOY_VERSION: "1.11.2"
ENVOY_VERSION: "1.12.6"
steps: &ENVOY_INTEGRATION_TEST_STEPS
- checkout
# Get go binary from workspace
@ -675,13 +726,6 @@ jobs:
path: *TEST_RESULTS_DIR
- run: *notify-slack-failure
envoy-integration-test-1.12.6:
docker:
- image: *GOLANG_IMAGE
environment:
ENVOY_VERSION: "1.12.6"
steps: *ENVOY_INTEGRATION_TEST_STEPS
envoy-integration-test-1.13.4:
docker:
- image: *GOLANG_IMAGE
@ -696,6 +740,13 @@ jobs:
ENVOY_VERSION: "1.14.4"
steps: *ENVOY_INTEGRATION_TEST_STEPS
envoy-integration-test-1.15.0:
docker:
- image: *GOLANG_IMAGE
environment:
ENVOY_VERSION: "1.15.0"
steps: *ENVOY_INTEGRATION_TEST_STEPS
# run integration tests for the connect ca providers
test-connect-ca-providers:
docker:
@ -744,6 +795,7 @@ workflows:
- stable-website
- /^docs\/.*/
- /^ui\/.*/
- check-generated-protobuf: *filter-ignore-non-go-branches
- lint-consul-retry: *filter-ignore-non-go-branches
- lint: *filter-ignore-non-go-branches
- test-connect-ca-providers: *filter-ignore-non-go-branches
@ -807,9 +859,6 @@ workflows:
- nomad-integration-0_8:
requires:
- dev-build
- envoy-integration-test-1.11.2:
requires:
- dev-build
- envoy-integration-test-1.12.6:
requires:
- dev-build
@ -819,6 +868,9 @@ workflows:
- envoy-integration-test-1.14.4:
requires:
- dev-build
- envoy-integration-test-1.15.0:
requires:
- dev-build
website:
jobs:

View File

@ -124,7 +124,7 @@ The underlying script dumps the full Consul log output to `test.log` in
the directory of the target package. In the example above it would be
located at `consul/connect/proxy/test.log`.
Historically, the defaults for `FLAKE_CPUS` (30) and `FLAKE_N` (0.15) have been
Historically, the defaults for `FLAKE_CPUS` (0.15) and `FLAKE_N` (30) have been
sufficient to surface a flaky test. If a test is run in this environment and
it does not fail after 30 iterations, it should be sufficiently stable.

View File

@ -1,17 +1,63 @@
## UNRELEASED
## 1.8.4 (September 11, 2020)
FEATURES:
* agent: expose the list of supported envoy versions on /v1/agent/self [[GH-8545](https://github.com/hashicorp/consul/issues/8545)]
* cache: Config parameters for cache throttling are now reloaded automatically on agent reload. Restarting the agent is not needed anymore. [[GH-8552](https://github.com/hashicorp/consul/issues/8552)]
* connect: all config entries pick up a meta field [[GH-8596](https://github.com/hashicorp/consul/issues/8596)]
IMPROVEMENTS:
* api: Added `ACLMode` method to the `AgentMember` type to determine what ACL mode the agent is operating in. [[GH-8575](https://github.com/hashicorp/consul/issues/8575)]
* api: Added `IsConsulServer` method to the `AgentMember` type to easily determine whether the agent is a server. [[GH-8575](https://github.com/hashicorp/consul/issues/8575)]
* api: Added constants for common tag keys and values in the `Tags` field of the `AgentMember` struct. [[GH-8575](https://github.com/hashicorp/consul/issues/8575)]
* api: Allow for the client to use TLS over a Unix domain socket. [[GH-8602](https://github.com/hashicorp/consul/issues/8602)]
* api: `GET v1/operator/keyring` also lists primary keys. [[GH-8522](https://github.com/hashicorp/consul/issues/8522)]
* connect: Add support for http2 and grpc to ingress gateways [[GH-8458](https://github.com/hashicorp/consul/issues/8458)]
* serf: update to `v0.9.4` which supports primary keys in the ListKeys operation. [[GH-8522](https://github.com/hashicorp/consul/issues/8522)]
BUGFIXES:
* connect: use stronger validation that ingress gateways have compatible protocols defined for their upstreams [[GH-8494](https://github.com/hashicorp/consul/issues/8494)]
* agent: ensure that we normalize bootstrapped config entries [[GH-8547](https://github.com/hashicorp/consul/issues/8547)]
* api: Fixed a panic caused by an api request with Connect=null [[GH-8537](https://github.com/hashicorp/consul/issues/8537)]
* connect: `connect envoy` command now respects the `-ca-path` flag [[GH-8606](https://github.com/hashicorp/consul/issues/8606)]
* connect: fix bug in preventing some namespaced config entry modifications [[GH-8601](https://github.com/hashicorp/consul/issues/8601)]
* connect: fix renewing secondary intermediate certificates [[GH-8588](https://github.com/hashicorp/consul/issues/8588)]
* ui: fixed a bug related to in-folder KV creation [GH-8613](https://github.com/hashicorp/consul/pull/8613)
## 1.8.3 (August 12, 2020)
BUGFIXES:
* catalog: fixed a bug where nodes, services, and checks would not be restored with the correct Create/ModifyIndex when restoring from a snapshot [[GH-8485](https://github.com/hashicorp/consul/pull/8474)]
* vendor: update github.com/armon/go-metrics to v0.3.4 to mitigate a potential panic when emitting Prometheus metrics at an interval longer than the metric expiry time [[GH-8478](https://github.com/hashicorp/consul/pull/8478)]
* connect: **(Consul Enterprise only)** Fixed a regression that prevented mesh gateways from routing to services in their local datacenter that reside outside of the default namespace.
## 1.8.2 (August 07, 2020)
* auto_config: Fixed an issue where auto-config could be enabled in secondary DCs without enabling token replication when ACLs were enabled. [[GH-8451](https://github.com/hashicorp/consul/pull/8451)]
* xds: revert setting set_node_on_first_message_only to true when generating envoy bootstrap config [[GH-8440](https://github.com/hashicorp/consul/issues/8440)]
## 1.8.1 (July 30, 2020)
FEATURES:
* acl: Added ACL Node Identities for easier creation of Consul Agent tokens. [[GH-7970](https://github.com/hashicorp/consul/pull/7970)]
* agent: Added Consul client agent automatic configuration utilizing JWTs for authorizing the request to generate ACL tokens, TLS certificates and retrieval of the gossip encryption key. [[GH-8003](https://github.com/hashicorp/consul/pull/8003)], [[GH-8035](https://github.com/hashicorp/consul/pull/8035)], [[GH-8086](https://github.com/hashicorp/consul/pull/8086)], [[GH-8148](https://github.com/hashicorp/consul/pull/8148)], [[GH-8157](https://github.com/hashicorp/consul/pull/8157)], [[GH-8159](https://github.com/hashicorp/consul/pull/8159)], [[GH-8193](https://github.com/hashicorp/consul/pull/8193)], [[GH-8253](https://github.com/hashicorp/consul/pull/8253)], [[GH-8301](https://github.com/hashicorp/consul/pull/8301)], [[GH-8360](https://github.com/hashicorp/consul/pull/8360)], [[GH-8362](https://github.com/hashicorp/consul/pull/8362)], [[GH-8363](https://github.com/hashicorp/consul/pull/8363)], [[GH-8364](https://github.com/hashicorp/consul/pull/8364)]
* agent: Added Consul client agent automatic configuration utilizing JWTs for authorizing the request to generate ACL tokens, TLS certificates and retrieval of the gossip encryption key. [[GH-8003](https://github.com/hashicorp/consul/pull/8003)], [[GH-8035](https://github.com/hashicorp/consul/pull/8035)], [[GH-8086](https://github.com/hashicorp/consul/pull/8086)], [[GH-8148](https://github.com/hashicorp/consul/pull/8148)], [[GH-8157](https://github.com/hashicorp/consul/pull/8157)], [[GH-8159](https://github.com/hashicorp/consul/pull/8159)], [[GH-8193](https://github.com/hashicorp/consul/pull/8193)], [[GH-8253](https://github.com/hashicorp/consul/pull/8253)], [[GH-8301](https://github.com/hashicorp/consul/pull/8301)], [[GH-8360](https://github.com/hashicorp/consul/pull/8360)], [[GH-8362](https://github.com/hashicorp/consul/pull/8362)], [[GH-8363](https://github.com/hashicorp/consul/pull/8363)], [[GH-8364](https://github.com/hashicorp/consul/pull/8364)], [[GH-8409](https://github.com/hashicorp/consul/pull/8409)]
IMPROVEMENTS:
* acl: allow auth methods created in the primary datacenter to optionally create global tokens [[GH-7899](https://github.com/hashicorp/consul/issues/7899)]
* agent: Allow to restrict servers that can join a given Serf Consul cluster. [[GH-7628](https://github.com/hashicorp/consul/issues/7628)]
* agent: new configuration options allow ratelimiting of the agent-cache: `cache.entry_fetch_rate` and `cache.entry_fetch_max_burst`. [[GH-8226](https://github.com/hashicorp/consul/pull/8226)]
* api: Added methods to allow passing query options to leader and peers endpoints to mirror HTTP API [[GH-8395](https://github.com/hashicorp/consul/pull/8395)]
* auto_config: when configuring auto_config, connect is turned on automatically [[GH-8433](https://github.com/hashicorp/consul/pull/8433)]
* connect: various changes to make namespaces for intentions work more like for other subsystems [[GH-8194](https://github.com/hashicorp/consul/issues/8194)]
* connect: Append port number to expected ingress hosts [[GH-8190](https://github.com/hashicorp/consul/pull/8190)]
* connect: add support for envoy 1.15.0 and drop support for 1.11.x [[GH-8424](https://github.com/hashicorp/consul/issues/8424)]
* connect: support Envoy v1.14.4, v1.13.4, v1.12.6 [[GH-8216](https://github.com/hashicorp/consul/issues/8216)]
* dns: Improve RCODE of response when query targets a non-existent datacenter. [[GH-8102](https://github.com/hashicorp/consul/issues/8102)],[[GH-8218](https://github.com/hashicorp/consul/issues/8218)]
* version: The `version` CLI subcommand was altered to always show the git revision the binary was built from on the second line of output. Additionally the command gained a `-format` flag with the option now of outputting the version information in JSON form. **NOTE** This change has the potential to break any parsing done by users of the `version` commands output. In many cases nothing will need to be done but it is possible depending on how the output is parsed. [[GH-8268](https://github.com/hashicorp/consul/pull/8268)]
@ -19,12 +65,15 @@ IMPROVEMENTS:
BUGFIXES:
* agent: Fixed a bug where Consul could crash when `verify_outgoing` was set to true but no client certificate was used. [[GH-8211](https://github.com/hashicorp/consul/pull/8211)]
* agent: Fixed an issue with lock contention during RPCs when under load while using the Prometheus metrics sink. [[GH-8372](https://github.com/hashicorp/consul/pull/8372)]
* auto_encrypt: Fixed an issue where auto encrypt certificate signing wasn't using the connect signing rate limiter. [[GH-8211](https://github.com/hashicorp/consul/pull/8211)]
* auto_encrypt: Fixed several issues around retrieving the first TLS certificate where it would have the wrong CN and SANs. This was being masked by a second bug (also fixed) causing that certificate to immediately be discarded with a second certificate request being made afterwards. [[GH-8211](https://github.com/hashicorp/consul/pull/8211)]
* auto_encrypt: Fixed an issue that caused auto encrypt certificates to not be updated properly if the agents token was changed and the old token was deleted. [[GH-8311](https://github.com/hashicorp/consul/pull/8311)]
* connect: fix crash that would result if a mesh or terminating gateway's upstream has a hostname as an address and no healthy service instances available. [[GH-8158](https://github.com/hashicorp/consul/issues/8158)]
* connect: Fixed issue where specifying a prometheus bind address would cause ingress gateways to fail to start up [[GH-8371]](https://github.com/hashicorp/consul/pull/8371)
* gossip: Avoid issue where two unique leave events for the same node could lead to infinite rebroadcast storms [[GH-8343](https://github.com/hashicorp/consul/issues/8343)]
* router: Mark its own cluster as healthy when rebalancing. [[GH-8406](https://github.com/hashicorp/consul/pull/8406)]
* snapshot: **(Consul Enterprise only)** Fixed a regression when using Azure blob storage.
* xds: version sniff envoy and switch regular expressions from 'regex' to 'safe_regex' on newer envoy versions [[GH-8222](https://github.com/hashicorp/consul/issues/8222)]
## 1.8.0 (June 18, 2020)
@ -95,6 +144,39 @@ BUGFIXES:
* ui: Miscellaneous amends for Safari and Firefox [[GH-7904](https://github.com/hashicorp/consul/issues/7904)] [[GH-7907](https://github.com/hashicorp/consul/pull/7907)]
* ui: Ensure a value is always passed to CONSUL_SSO_ENABLED [[GH-7913](https://github.com/hashicorp/consul/pull/7913)]
## 1.7.8 (September 11, 2020)
FEATURES:
* agent: expose the list of supported envoy versions on /v1/agent/self [[GH-8545](https://github.com/hashicorp/consul/issues/8545)]
BUG FIXES:
* connect: fix bug in preventing some namespaced config entry modifications [[GH-8601](https://github.com/hashicorp/consul/issues/8601)]
* api: fixed a panic caused by an api request with Connect=null [[GH-8537](https://github.com/hashicorp/consul/pull/8537)]
## 1.7.7 (August 12, 2020)
BUGFIXES:
* catalog: fixed a bug where nodes, services, and checks would not be restored with the correct Create/ModifyIndex when restoring from a snapshot [[GH-8486](https://github.com/hashicorp/consul/pull/8486)]
* vendor: update github.com/armon/go-metrics to v0.3.4 to mitigate a potential panic when emitting Prometheus metrics at an interval longer than the metric expiry time [[GH-8478](https://github.com/hashicorp/consul/pull/8478)]
## 1.7.6 (August 07, 2020)
BUG FIXES:
* xds: revert setting set_node_on_first_message_only to true when generating envoy bootstrap config [[GH-8441](https://github.com/hashicorp/consul/issues/8441)]
## 1.7.5 (July 30, 2020)
BUG FIXES:
* agent: Fixed an issue with lock contention during RPCs when under load while using the Prometheus metrics sink. [[GH-8372](https://github.com/hashicorp/consul/pull/8372)]
* gossip: Avoid issue where two unique leave events for the same node could lead to infinite rebroadcast storms [[GH-8353](https://github.com/hashicorp/consul/issues/8353)]
* snapshot: **(Consul Enterprise only)** Fixed a regression when using Azure blob storage.
* Return a service splitter's weight or a zero [[GH-8355](https://github.com/hashicorp/consul/issues/8355)]
## 1.7.4 (June 10, 2020)
SECURITY:
@ -297,6 +379,25 @@ BUGFIXES:
* ui: Discovery-Chain: Improve parsing of redirects [[GH-7174](https://github.com/hashicorp/consul/pull/7174)]
* ui: Fix styling of duplicate intention error message [[GH6936]](https://github.com/hashicorp/consul/pull/6936)
## 1.6.9 (September 11, 2020)
BUG FIXES:
* api: fixed a panic caused by an api request with Connect=null [[GH-8537](https://github.com/hashicorp/consul/pull/8537)]
## 1.6.8 (August 12, 2020)
BUG FIXES:
* vendor: update github.com/armon/go-metrics to v0.3.4 to mitigate a potential panic when emitting Prometheus metrics at an interval longer than the metric expiry time [[GH-8478](https://github.com/hashicorp/consul/pull/8478)]
## 1.6.7 (July 30, 2020)
BUG FIXES:
* agent: Fixed an issue with lock contention during RPCs when under load while using the Prometheus metrics sink. [[GH-8372](https://github.com/hashicorp/consul/pull/8372)]
* gossip: Avoid issue where two unique leave events for the same node could lead to infinite rebroadcast storms [[GH-8345](https://github.com/hashicorp/consul/issues/8345)]
## 1.6.6 (June 10, 2020)
SECURITY:

View File

@ -363,14 +363,6 @@ else
@go test -v ./agent/connect/ca
endif
proto-delete:
@echo "Removing $(PROTOGOFILES)"
-@rm $(PROTOGOFILES)
@echo "Removing $(PROTOGOBINFILES)"
-@rm $(PROTOGOBINFILES)
proto-rebuild: proto-delete proto
proto: $(PROTOGOFILES) $(PROTOGOBINFILES)
@echo "Generated all protobuf Go files"
@ -387,4 +379,4 @@ module-versions:
.PHONY: all ci bin dev dist cov test test-flake test-internal cover lint ui static-assets tools
.PHONY: docker-images go-build-image ui-build-image static-assets-docker consul-docker ui-docker
.PHONY: version proto proto-rebuild proto-delete test-envoy-integ
.PHONY: version proto test-envoy-integ

View File

@ -1,7 +1,7 @@
# Consul [![CircleCI](https://circleci.com/gh/hashicorp/consul/tree/master.svg?style=svg)](https://circleci.com/gh/hashicorp/consul/tree/master) [![Discuss](https://img.shields.io/badge/discuss-consul-ca2171.svg?style=flat)](https://discuss.hashicorp.com/c/consul)
* Website: https://www.consul.io
* Tutorials: [https://learn.hashicorp.com](https://learn.hashicorp.com/consul)
* Tutorials: [HashiCorp Learn](https://learn.hashicorp.com/consul)
* Forum: [Discuss](https://discuss.hashicorp.com/c/consul)
Consul is a distributed, highly available, and data center aware solution to connect and configure applications across dynamic, distributed infrastructure.
@ -41,9 +41,10 @@ contacting us at security@hashicorp.com.
A few quick start guides are available on the Consul website:
* **Standalone binary install:** https://learn.hashicorp.com/consul/getting-started/install
* **Minikube install:** https://learn.hashicorp.com/consul/kubernetes/minikube
* **Kubernetes install:** https://learn.hashicorp.com/consul/kubernetes/kubernetes-deployment-guide
* **Standalone binary install:** https://learn.hashicorp.com/tutorials/consul/get-started-install
* **Minikube install:** https://learn.hashicorp.com/tutorials/consul/kubernetes-minikube
* **Kind install:** https://learn.hashicorp.com/tutorials/consul/kubernetes-kind
* **Kubernetes install:** https://learn.hashicorp.com/tutorials/consul/kubernetes-deployment-guide
## Documentation

View File

@ -59,7 +59,7 @@ func (a *Agent) aclAccessorID(secretID string) string {
return ident.ID()
}
func (a *Agent) initializeACLs() error {
func initializeACLs(nodeName string) (acl.Authorizer, error) {
// Build a policy for the agent master token.
// The builtin agent master policy allows reading any node information
// and allows writes to the agent with the node name of the running agent
@ -69,7 +69,7 @@ func (a *Agent) initializeACLs() error {
PolicyRules: acl.PolicyRules{
Agents: []*acl.AgentRule{
{
Node: a.config.NodeName,
Node: nodeName,
Policy: acl.PolicyWrite,
},
},
@ -81,12 +81,7 @@ func (a *Agent) initializeACLs() error {
},
},
}
master, err := acl.NewPolicyAuthorizerWithDefaults(acl.DenyAll(), []*acl.Policy{policy}, nil)
if err != nil {
return err
}
a.aclMasterAuthorizer = master
return nil
return acl.NewPolicyAuthorizerWithDefaults(acl.DenyAll(), []*acl.Policy{policy}, nil)
}
// vetServiceRegister makes sure the service registration action is allowed by

View File

@ -25,27 +25,6 @@ type authzResolver func(string) (structs.ACLIdentity, acl.Authorizer, error)
type identResolver func(string) (structs.ACLIdentity, error)
type TestACLAgent struct {
// Name is an optional name of the agent.
Name string
HCL string
// Config is the agent configuration. If Config is nil then
// TestConfig() is used. If Config.DataDir is set then it is
// the callers responsibility to clean up the data directory.
// Otherwise, a temporary data directory is created and removed
// when Shutdown() is called.
Config *config.RuntimeConfig
// LogOutput is the sink for the logs. If nil, logs are written
// to os.Stderr.
LogOutput io.Writer
// DataDir is the data directory which is used when Config.DataDir
// is not set. It is created automatically and removed when
// Shutdown() is called.
DataDir string
resolveAuthzFn authzResolver
resolveIdentFn identResolver
@ -56,39 +35,42 @@ type TestACLAgent struct {
// Basically it needs a local state for some of the vet* functions, a logger and a delegate.
// The key is that we are the delegate so we can control the ResolveToken responses
func NewTestACLAgent(t *testing.T, name string, hcl string, resolveAuthz authzResolver, resolveIdent identResolver) *TestACLAgent {
a := &TestACLAgent{Name: name, HCL: hcl, resolveAuthzFn: resolveAuthz, resolveIdentFn: resolveIdent}
dataDir := `data_dir = "acl-agent"`
t.Helper()
logOutput := testutil.NewLogBuffer(t)
logger := hclog.NewInterceptLogger(&hclog.LoggerOptions{
Name: a.Name,
Level: hclog.Debug,
Output: logOutput,
})
a := &TestACLAgent{resolveAuthzFn: resolveAuthz, resolveIdentFn: resolveIdent}
opts := []AgentOption{
WithLogger(logger),
WithBuilderOpts(config.BuilderOpts{
HCL: []string{
TestConfigHCL(NodeID()),
a.HCL,
dataDir,
},
}),
dataDir := testutil.TempDir(t, "acl-agent")
logBuffer := testutil.NewLogBuffer(t)
loader := func(source config.Source) (*config.RuntimeConfig, []string, error) {
dataDir := fmt.Sprintf(`data_dir = "%s"`, dataDir)
opts := config.BuilderOpts{
HCL: []string{TestConfigHCL(NodeID()), hcl, dataDir},
}
agent, err := New(opts...)
cfg, warnings, err := config.Load(opts, source)
if cfg != nil {
cfg.Telemetry.Disable = true
}
return cfg, warnings, err
}
bd, err := NewBaseDeps(loader, logBuffer)
require.NoError(t, err)
a.Config = agent.GetConfig()
bd.Logger = hclog.NewInterceptLogger(&hclog.LoggerOptions{
Name: name,
Level: hclog.Debug,
Output: logBuffer,
TimeFormat: "04:05.000",
})
bd.MetricsHandler = metrics.NewInmemSink(1*time.Second, time.Minute)
agent, err := New(bd)
require.NoError(t, err)
agent.delegate = a
agent.State = local.NewState(LocalConfig(bd.RuntimeConfig), bd.Logger, bd.Tokens)
agent.State.TriggerSyncChanges = func() {}
a.Agent = agent
agent.LogOutput = logOutput
agent.logger = logger
agent.MemSink = metrics.NewInmemSink(1*time.Second, time.Minute)
a.Agent.delegate = a
a.Agent.State = local.NewState(LocalConfig(a.Config), a.Agent.logger, a.Agent.tokens)
a.Agent.State.TriggerSyncChanges = func() {}
return a
}
@ -202,7 +184,9 @@ func TestACL_AgentMasterToken(t *testing.T) {
t.Parallel()
a := NewTestACLAgent(t, t.Name(), TestACLConfig(), nil, nil)
a.loadTokens(a.config)
err := a.tokens.Load(a.config.ACLTokens, a.logger)
require.NoError(t, err)
authz, err := a.resolveToken("towel")
require.NotNil(t, authz)
require.Nil(t, err)

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,8 @@
package agent
import (
"encoding/json"
"fmt"
"net/http"
"path/filepath"
"strconv"
"strings"
@ -17,10 +15,10 @@ import (
"github.com/hashicorp/consul/agent/debug"
"github.com/hashicorp/consul/agent/structs"
token_store "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/agent/xds/proxysupport"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/lib/file"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/logging/monitor"
"github.com/hashicorp/consul/types"
@ -38,6 +36,11 @@ type Self struct {
Member serf.Member
Stats map[string]map[string]string
Meta map[string]string
XDS *xdsSelf `json:"xDS,omitempty"`
}
type xdsSelf struct {
SupportedProxies map[string][]string
}
func (s *HTTPServer) AgentSelf(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
@ -60,6 +63,15 @@ func (s *HTTPServer) AgentSelf(resp http.ResponseWriter, req *http.Request) (int
}
}
var xds *xdsSelf
if s.agent.grpcServer != nil {
xds = &xdsSelf{
SupportedProxies: map[string][]string{
"envoy": proxysupport.EnvoyVersions,
},
}
}
config := struct {
Datacenter string
NodeName string
@ -82,6 +94,7 @@ func (s *HTTPServer) AgentSelf(resp http.ResponseWriter, req *http.Request) (int
Member: s.agent.LocalMember(),
Stats: s.agent.Stats(),
Meta: s.agent.State.Metadata(),
XDS: xds,
}, nil
}
@ -1217,20 +1230,10 @@ func (s *HTTPServer) AgentToken(resp http.ResponseWriter, req *http.Request) (in
return nil, nil
}
if s.agent.config.ACLEnableTokenPersistence {
// we hold the lock around updating the internal token store
// as well as persisting the tokens because we don't want to write
// into the store to have something else wipe it out before we can
// persist everything (like an agent config reload). The token store
// lock is only held for those operations so other go routines that
// just need to read some token out of the store will not be impacted
// any more than they would be without token persistence.
s.agent.persistedTokensLock.Lock()
defer s.agent.persistedTokensLock.Unlock()
}
// Figure out the target token.
target := strings.TrimPrefix(req.URL.Path, "/v1/agent/token/")
err = s.agent.tokens.WithPersistenceLock(func() error {
triggerAntiEntropySync := false
switch target {
case "acl_token", "default":
@ -1252,44 +1255,17 @@ func (s *HTTPServer) AgentToken(resp http.ResponseWriter, req *http.Request) (in
s.agent.tokens.UpdateReplicationToken(args.Token, token_store.TokenSourceAPI)
default:
resp.WriteHeader(http.StatusNotFound)
fmt.Fprintf(resp, "Token %q is unknown", target)
return nil, nil
return NotFoundError{Reason: fmt.Sprintf("Token %q is unknown", target)}
}
// TODO: is it safe to move this out of WithPersistenceLock?
if triggerAntiEntropySync {
s.agent.sync.SyncFull.Trigger()
}
if s.agent.config.ACLEnableTokenPersistence {
tokens := persistedTokens{}
if tok, source := s.agent.tokens.UserTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.Default = tok
}
if tok, source := s.agent.tokens.AgentTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.Agent = tok
}
if tok, source := s.agent.tokens.AgentMasterTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.AgentMaster = tok
}
if tok, source := s.agent.tokens.ReplicationTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.Replication = tok
}
data, err := json.Marshal(tokens)
return nil
})
if err != nil {
s.agent.logger.Warn("failed to persist tokens", "error", err)
return nil, fmt.Errorf("Failed to marshal tokens for persistence: %v", err)
}
if err := file.WriteAtomicWithPerms(filepath.Join(s.agent.config.DataDir, tokensPath), data, 0700, 0600); err != nil {
s.agent.logger.Warn("failed to persist tokens", "error", err)
return nil, fmt.Errorf("Failed to persist tokens - %v", err)
}
return nil, err
}
s.agent.logger.Info("Updated agent's ACL token", "token", target)

View File

@ -13,7 +13,6 @@ import (
"net/http/httptest"
"net/url"
"os"
"reflect"
"strconv"
"strings"
"testing"
@ -27,6 +26,7 @@ import (
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
tokenStore "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/agent/xds/proxysupport"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/sdk/testutil"
@ -1209,39 +1209,65 @@ func TestAgent_Checks_ACLFilter(t *testing.T) {
func TestAgent_Self(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `
cases := map[string]struct {
hcl string
expectXDS bool
}{
"normal": {
hcl: `
node_meta {
somekey = "somevalue"
}
`)
`,
expectXDS: true,
},
"no grpc": {
hcl: `
node_meta {
somekey = "somevalue"
}
ports = {
grpc = -1
}
`,
expectXDS: false,
},
}
for name, tc := range cases {
tc := tc
t.Run(name, func(t *testing.T) {
a := NewTestAgent(t, tc.hcl)
defer a.Shutdown()
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
req, _ := http.NewRequest("GET", "/v1/agent/self", nil)
obj, err := a.srv.AgentSelf(nil, req)
if err != nil {
t.Fatalf("err: %v", err)
}
require.NoError(t, err)
val := obj.(Self)
if int(val.Member.Port) != a.Config.SerfPortLAN {
t.Fatalf("incorrect port: %v", obj)
}
if val.DebugConfig["SerfPortLAN"].(int) != a.Config.SerfPortLAN {
t.Fatalf("incorrect port: %v", obj)
}
require.Equal(t, a.Config.SerfPortLAN, int(val.Member.Port))
require.Equal(t, a.Config.SerfPortLAN, val.DebugConfig["SerfPortLAN"].(int))
cs, err := a.GetLANCoordinate()
if err != nil {
t.Fatalf("err: %v", err)
}
if c := cs[a.config.SegmentName]; !reflect.DeepEqual(c, val.Coord) {
t.Fatalf("coordinates are not equal: %v != %v", c, val.Coord)
}
require.NoError(t, err)
require.Equal(t, cs[a.config.SegmentName], val.Coord)
delete(val.Meta, structs.MetaSegmentKey) // Added later, not in config.
if !reflect.DeepEqual(a.config.NodeMeta, val.Meta) {
t.Fatalf("meta fields are not equal: %v != %v", a.config.NodeMeta, val.Meta)
require.Equal(t, a.config.NodeMeta, val.Meta)
if tc.expectXDS {
require.NotNil(t, val.XDS, "xds component missing when gRPC is enabled")
require.Equal(t,
map[string][]string{"envoy": proxysupport.EnvoyVersions},
val.XDS.SupportedProxies,
)
} else {
require.Nil(t, val.XDS, "xds component should be missing when gRPC is disabled")
}
})
}
}
@ -1332,7 +1358,7 @@ func TestAgent_Reload(t *testing.T) {
t.Fatal("missing redis service")
}
cfg2 := TestConfig(testutil.Logger(t), config.Source{
cfg2 := TestConfig(testutil.Logger(t), config.FileSource{
Name: "reload",
Format: "hcl",
Data: `
@ -1466,7 +1492,7 @@ func TestAgent_ReloadDoesNotTriggerWatch(t *testing.T) {
})
// Let's take almost the same config
cfg2 := TestConfig(testutil.Logger(t), config.Source{
cfg2 := TestConfig(testutil.Logger(t), config.FileSource{
Name: "reload",
Format: "hcl",
Data: `
@ -4542,12 +4568,15 @@ func TestAgent_Monitor(t *testing.T) {
req = req.WithContext(cancelCtx)
resp := httptest.NewRecorder()
errCh := make(chan error)
chErr := make(chan error)
chStarted := make(chan struct{})
go func() {
close(chStarted)
_, err := a.srv.AgentMonitor(resp, req)
errCh <- err
chErr <- err
}()
<-chStarted
require.NoError(t, a.Shutdown())
// Wait until we have received some type of logging output
@ -4556,7 +4585,7 @@ func TestAgent_Monitor(t *testing.T) {
}, 3*time.Second, 100*time.Millisecond)
cancelFunc()
err := <-errCh
err := <-chErr
require.NoError(t, err)
got := resp.Body.String()
@ -4745,13 +4774,14 @@ func TestAgent_Token(t *testing.T) {
init tokens
raw tokens
effective tokens
expectedErr error
}{
{
name: "bad token name",
method: "PUT",
url: "nope?token=root",
body: body("X"),
code: http.StatusNotFound,
expectedErr: NotFoundError{Reason: `Token "nope" is unknown`},
},
{
name: "bad JSON",
@ -4913,7 +4943,12 @@ func TestAgent_Token(t *testing.T) {
url := fmt.Sprintf("/v1/agent/token/%s", tt.url)
resp := httptest.NewRecorder()
req, _ := http.NewRequest(tt.method, url, tt.body)
_, err := a.srv.AgentToken(resp, req)
if tt.expectedErr != nil {
require.Equal(t, tt.expectedErr, err)
return
}
require.NoError(t, err)
require.Equal(t, tt.code, resp.Code)
require.Equal(t, tt.effective.user, a.tokens.UserToken())

View File

@ -23,18 +23,13 @@ func (a *Agent) initEnterprise(consulCfg *consul.Config) error {
return nil
}
// loadEnterpriseTokens is a noop stub for the func defined agent_ent.go
func (a *Agent) loadEnterpriseTokens(conf *config.RuntimeConfig) {
}
// reloadEnterprise is a noop stub for the func defined agent_ent.go
func (a *Agent) reloadEnterprise(conf *config.RuntimeConfig) error {
return nil
}
// enterpriseConsulConfig is a noop stub for the func defined in agent_ent.go
func (a *Agent) enterpriseConsulConfig(base *consul.Config) (*consul.Config, error) {
return base, nil
func enterpriseConsulConfig(_ *consul.Config, _ *config.RuntimeConfig) {
}
// WriteEvent is a noop stub for the func defined agent_ent.go

View File

@ -21,6 +21,7 @@ import (
"testing"
"time"
"github.com/golang/protobuf/jsonpb"
"github.com/google/tcpproxy"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
@ -32,16 +33,17 @@ import (
"github.com/hashicorp/consul/internal/go-sso/oidcauth/oidcauthtest"
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/proto/pbautoconf"
"github.com/hashicorp/consul/sdk/freeport"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/testrpc"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-uuid"
"github.com/hashicorp/serf/coordinate"
"github.com/hashicorp/serf/serf"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/time/rate"
"gopkg.in/square/go-jose.v2/jwt"
)
@ -250,120 +252,6 @@ func TestAgent_ReconnectConfigWanDisabled(t *testing.T) {
require.Nil(t, a.consulConfig().SerfWANConfig)
}
func TestAgent_setupNodeID(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `
node_id = ""
`)
defer a.Shutdown()
cfg := a.config
// The auto-assigned ID should be valid.
id := a.consulConfig().NodeID
if _, err := uuid.ParseUUID(string(id)); err != nil {
t.Fatalf("err: %v", err)
}
// Running again should get the same ID (persisted in the file).
cfg.NodeID = ""
if err := a.setupNodeID(cfg); err != nil {
t.Fatalf("err: %v", err)
}
if newID := a.consulConfig().NodeID; id != newID {
t.Fatalf("bad: %q vs %q", id, newID)
}
// Set an invalid ID via.Config.
cfg.NodeID = types.NodeID("nope")
err := a.setupNodeID(cfg)
if err == nil || !strings.Contains(err.Error(), "uuid string is wrong length") {
t.Fatalf("err: %v", err)
}
// Set a valid ID via.Config.
newID, err := uuid.GenerateUUID()
if err != nil {
t.Fatalf("err: %v", err)
}
cfg.NodeID = types.NodeID(strings.ToUpper(newID))
if err := a.setupNodeID(cfg); err != nil {
t.Fatalf("err: %v", err)
}
if id := a.consulConfig().NodeID; string(id) != newID {
t.Fatalf("bad: %q vs. %q", id, newID)
}
// Set an invalid ID via the file.
fileID := filepath.Join(cfg.DataDir, "node-id")
if err := ioutil.WriteFile(fileID, []byte("adf4238a!882b!9ddc!4a9d!5b6758e4159e"), 0600); err != nil {
t.Fatalf("err: %v", err)
}
cfg.NodeID = ""
err = a.setupNodeID(cfg)
if err == nil || !strings.Contains(err.Error(), "uuid is improperly formatted") {
t.Fatalf("err: %v", err)
}
// Set a valid ID via the file.
if err := ioutil.WriteFile(fileID, []byte("ADF4238a-882b-9ddc-4a9d-5b6758e4159e"), 0600); err != nil {
t.Fatalf("err: %v", err)
}
cfg.NodeID = ""
if err := a.setupNodeID(cfg); err != nil {
t.Fatalf("err: %v", err)
}
if id := a.consulConfig().NodeID; string(id) != "adf4238a-882b-9ddc-4a9d-5b6758e4159e" {
t.Fatalf("bad: %q vs. %q", id, newID)
}
}
func TestAgent_makeNodeID(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `
node_id = ""
`)
defer a.Shutdown()
// We should get a valid host-based ID initially.
id, err := a.makeNodeID()
if err != nil {
t.Fatalf("err: %v", err)
}
if _, err := uuid.ParseUUID(id); err != nil {
t.Fatalf("err: %v", err)
}
// Calling again should yield a random ID by default.
another, err := a.makeNodeID()
if err != nil {
t.Fatalf("err: %v", err)
}
if id == another {
t.Fatalf("bad: %s vs %s", id, another)
}
// Turn on host-based IDs and try again. We should get the same ID
// each time (and a different one from the random one above).
a.GetConfig().DisableHostNodeID = false
id, err = a.makeNodeID()
if err != nil {
t.Fatalf("err: %v", err)
}
if id == another {
t.Fatalf("bad: %s vs %s", id, another)
}
// Calling again should yield the host-based ID.
another, err = a.makeNodeID()
if err != nil {
t.Fatalf("err: %v", err)
}
if id != another {
t.Fatalf("bad: %s vs %s", id, another)
}
}
func TestAgent_AddService(t *testing.T) {
t.Run("normal", func(t *testing.T) {
t.Parallel()
@ -878,10 +766,18 @@ func TestCacheRateLimit(test *testing.T) {
test.Run(fmt.Sprintf("rate_limit_at_%v", currentTest.rateLimit), func(t *testing.T) {
tt := currentTest
t.Parallel()
a := NewTestAgent(t, fmt.Sprintf("cache = { entry_fetch_rate = %v, entry_fetch_max_burst = 1 }", tt.rateLimit))
a := NewTestAgent(t, "cache = { entry_fetch_rate = 1, entry_fetch_max_burst = 100 }")
defer a.Shutdown()
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
cfg := a.config
require.Equal(t, rate.Limit(1), a.config.Cache.EntryFetchRate)
require.Equal(t, 100, a.config.Cache.EntryFetchMaxBurst)
cfg.Cache.EntryFetchRate = rate.Limit(tt.rateLimit)
cfg.Cache.EntryFetchMaxBurst = 1
a.reloadConfigInternal(cfg)
require.Equal(t, rate.Limit(tt.rateLimit), a.config.Cache.EntryFetchRate)
require.Equal(t, 1, a.config.Cache.EntryFetchMaxBurst)
var wg sync.WaitGroup
stillProcessing := true
@ -1649,15 +1545,12 @@ func TestAgent_RestoreServiceWithAliasCheck(t *testing.T) {
a.logger.Info("testharness: " + fmt.Sprintf(format, args...))
}
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
cfg := `
server = false
bootstrap = false
enable_central_service_config = false
data_dir = "` + dataDir + `"
`
a := StartTestAgent(t, TestAgent{HCL: cfg, DataDir: dataDir})
defer os.RemoveAll(dataDir)
a := StartTestAgent(t, TestAgent{HCL: cfg})
defer a.Shutdown()
testCtx, testCancel := context.WithCancel(context.Background())
@ -1740,7 +1633,7 @@ node_name = "` + a.Config.NodeName + `"
t.Helper()
// Reload and retain former NodeID and data directory.
a2 := StartTestAgent(t, TestAgent{HCL: futureHCL, DataDir: dataDir})
a2 := StartTestAgent(t, TestAgent{HCL: futureHCL, DataDir: a.DataDir})
defer a2.Shutdown()
a = nil
@ -2024,7 +1917,7 @@ func TestAgent_HTTPCheck_EnableAgentTLSForChecks(t *testing.T) {
Status: api.HealthCritical,
}
url := fmt.Sprintf("https://%s/v1/agent/self", a.srv.ln.Addr().String())
url := fmt.Sprintf("https://%s/v1/agent/self", a.HTTPAddr())
chk := &structs.CheckType{
HTTP: url,
Interval: 20 * time.Millisecond,
@ -2125,15 +2018,11 @@ func TestAgent_PersistService(t *testing.T) {
func testAgent_PersistService(t *testing.T, extraHCL string) {
t.Helper()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
defer os.RemoveAll(dataDir)
cfg := `
server = false
bootstrap = false
data_dir = "` + dataDir + `"
` + extraHCL
a := StartTestAgent(t, TestAgent{HCL: cfg, DataDir: dataDir})
a := StartTestAgent(t, TestAgent{HCL: cfg})
defer a.Shutdown()
svc := &structs.NodeService{
@ -2199,7 +2088,7 @@ func testAgent_PersistService(t *testing.T, extraHCL string) {
a.Shutdown()
// Should load it back during later start
a2 := StartTestAgent(t, TestAgent{HCL: cfg, DataDir: dataDir})
a2 := StartTestAgent(t, TestAgent{HCL: cfg, DataDir: a.DataDir})
defer a2.Shutdown()
restored := a2.State.ServiceState(structs.NewServiceID(svc.ID, nil))
@ -2337,15 +2226,11 @@ func TestAgent_PurgeServiceOnDuplicate(t *testing.T) {
func testAgent_PurgeServiceOnDuplicate(t *testing.T, extraHCL string) {
t.Helper()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
defer os.RemoveAll(dataDir)
cfg := `
data_dir = "` + dataDir + `"
server = false
bootstrap = false
` + extraHCL
a := StartTestAgent(t, TestAgent{HCL: cfg, DataDir: dataDir})
a := StartTestAgent(t, TestAgent{HCL: cfg})
defer a.Shutdown()
svc1 := &structs.NodeService{
@ -2368,7 +2253,7 @@ func testAgent_PurgeServiceOnDuplicate(t *testing.T, extraHCL string) {
tags = ["bar"]
port = 9000
}
`, DataDir: dataDir})
`, DataDir: a.DataDir})
defer a2.Shutdown()
sid := svc1.CompoundServiceID()
@ -2382,15 +2267,12 @@ func testAgent_PurgeServiceOnDuplicate(t *testing.T, extraHCL string) {
func TestAgent_PersistCheck(t *testing.T) {
t.Parallel()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
cfg := `
data_dir = "` + dataDir + `"
server = false
bootstrap = false
enable_script_checks = true
`
a := StartTestAgent(t, TestAgent{HCL: cfg, DataDir: dataDir})
defer os.RemoveAll(dataDir)
a := StartTestAgent(t, TestAgent{HCL: cfg})
defer a.Shutdown()
check := &structs.HealthCheck{
@ -2446,7 +2328,7 @@ func TestAgent_PersistCheck(t *testing.T) {
a.Shutdown()
// Should load it back during later start
a2 := StartTestAgent(t, TestAgent{Name: "Agent2", HCL: cfg, DataDir: dataDir})
a2 := StartTestAgent(t, TestAgent{Name: "Agent2", HCL: cfg, DataDir: a.DataDir})
defer a2.Shutdown()
result := requireCheckExists(t, a2, check.CheckID)
@ -2497,18 +2379,14 @@ func TestAgent_PurgeCheck(t *testing.T) {
func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
t.Parallel()
nodeID := NodeID()
dataDir := testutil.TempDir(t, "agent")
a := StartTestAgent(t, TestAgent{
DataDir: dataDir,
HCL: `
node_id = "` + nodeID + `"
node_name = "Node ` + nodeID + `"
data_dir = "` + dataDir + `"
server = false
bootstrap = false
enable_script_checks = true
`})
defer os.RemoveAll(dataDir)
defer a.Shutdown()
check1 := &structs.HealthCheck{
@ -2528,11 +2406,10 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
// Start again with the check registered in config
a2 := StartTestAgent(t, TestAgent{
Name: "Agent2",
DataDir: dataDir,
DataDir: a.DataDir,
HCL: `
node_id = "` + nodeID + `"
node_name = "Node ` + nodeID + `"
data_dir = "` + dataDir + `"
server = false
bootstrap = false
enable_script_checks = true
@ -2547,7 +2424,7 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
defer a2.Shutdown()
cid := check1.CompoundCheckID()
file := filepath.Join(dataDir, checksDir, cid.StringHash())
file := filepath.Join(a.DataDir, checksDir, cid.StringHash())
if _, err := os.Stat(file); err == nil {
t.Fatalf("should have removed persisted check")
}
@ -3468,163 +3345,6 @@ func TestAgent_reloadWatchesHTTPS(t *testing.T) {
}
}
func TestAgent_loadTokens(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `
acl = {
enabled = true
tokens = {
agent = "alfa"
agent_master = "bravo",
default = "charlie"
replication = "delta"
}
}
`)
defer a.Shutdown()
require := require.New(t)
tokensFullPath := filepath.Join(a.config.DataDir, tokensPath)
t.Run("original-configuration", func(t *testing.T) {
require.Equal("alfa", a.tokens.AgentToken())
require.Equal("bravo", a.tokens.AgentMasterToken())
require.Equal("charlie", a.tokens.UserToken())
require.Equal("delta", a.tokens.ReplicationToken())
})
t.Run("updated-configuration", func(t *testing.T) {
cfg := &config.RuntimeConfig{
ACLToken: "echo",
ACLAgentToken: "foxtrot",
ACLAgentMasterToken: "golf",
ACLReplicationToken: "hotel",
}
// ensures no error for missing persisted tokens file
require.NoError(a.loadTokens(cfg))
require.Equal("echo", a.tokens.UserToken())
require.Equal("foxtrot", a.tokens.AgentToken())
require.Equal("golf", a.tokens.AgentMasterToken())
require.Equal("hotel", a.tokens.ReplicationToken())
})
t.Run("persisted-tokens", func(t *testing.T) {
cfg := &config.RuntimeConfig{
ACLToken: "echo",
ACLAgentToken: "foxtrot",
ACLAgentMasterToken: "golf",
ACLReplicationToken: "hotel",
}
tokens := `{
"agent" : "india",
"agent_master" : "juliett",
"default": "kilo",
"replication" : "lima"
}`
require.NoError(ioutil.WriteFile(tokensFullPath, []byte(tokens), 0600))
require.NoError(a.loadTokens(cfg))
// no updates since token persistence is not enabled
require.Equal("echo", a.tokens.UserToken())
require.Equal("foxtrot", a.tokens.AgentToken())
require.Equal("golf", a.tokens.AgentMasterToken())
require.Equal("hotel", a.tokens.ReplicationToken())
a.config.ACLEnableTokenPersistence = true
require.NoError(a.loadTokens(cfg))
require.Equal("india", a.tokens.AgentToken())
require.Equal("juliett", a.tokens.AgentMasterToken())
require.Equal("kilo", a.tokens.UserToken())
require.Equal("lima", a.tokens.ReplicationToken())
})
t.Run("persisted-tokens-override", func(t *testing.T) {
tokens := `{
"agent" : "mike",
"agent_master" : "november",
"default": "oscar",
"replication" : "papa"
}`
cfg := &config.RuntimeConfig{
ACLToken: "quebec",
ACLAgentToken: "romeo",
ACLAgentMasterToken: "sierra",
ACLReplicationToken: "tango",
}
require.NoError(ioutil.WriteFile(tokensFullPath, []byte(tokens), 0600))
require.NoError(a.loadTokens(cfg))
require.Equal("mike", a.tokens.AgentToken())
require.Equal("november", a.tokens.AgentMasterToken())
require.Equal("oscar", a.tokens.UserToken())
require.Equal("papa", a.tokens.ReplicationToken())
})
t.Run("partial-persisted", func(t *testing.T) {
tokens := `{
"agent" : "uniform",
"agent_master" : "victor"
}`
cfg := &config.RuntimeConfig{
ACLToken: "whiskey",
ACLAgentToken: "xray",
ACLAgentMasterToken: "yankee",
ACLReplicationToken: "zulu",
}
require.NoError(ioutil.WriteFile(tokensFullPath, []byte(tokens), 0600))
require.NoError(a.loadTokens(cfg))
require.Equal("uniform", a.tokens.AgentToken())
require.Equal("victor", a.tokens.AgentMasterToken())
require.Equal("whiskey", a.tokens.UserToken())
require.Equal("zulu", a.tokens.ReplicationToken())
})
t.Run("persistence-error-not-json", func(t *testing.T) {
cfg := &config.RuntimeConfig{
ACLToken: "one",
ACLAgentToken: "two",
ACLAgentMasterToken: "three",
ACLReplicationToken: "four",
}
require.NoError(ioutil.WriteFile(tokensFullPath, []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, 0600))
err := a.loadTokens(cfg)
require.Error(err)
require.Equal("one", a.tokens.UserToken())
require.Equal("two", a.tokens.AgentToken())
require.Equal("three", a.tokens.AgentMasterToken())
require.Equal("four", a.tokens.ReplicationToken())
})
t.Run("persistence-error-wrong-top-level", func(t *testing.T) {
cfg := &config.RuntimeConfig{
ACLToken: "alfa",
ACLAgentToken: "bravo",
ACLAgentMasterToken: "charlie",
ACLReplicationToken: "foxtrot",
}
require.NoError(ioutil.WriteFile(tokensFullPath, []byte("[1,2,3]"), 0600))
err := a.loadTokens(cfg)
require.Error(err)
require.Equal("alfa", a.tokens.UserToken())
require.Equal("bravo", a.tokens.AgentToken())
require.Equal("charlie", a.tokens.AgentMasterToken())
require.Equal("foxtrot", a.tokens.ReplicationToken())
})
}
func TestAgent_SecurityChecks(t *testing.T) {
t.Parallel()
hcl := `
@ -3643,7 +3363,6 @@ func TestAgent_SecurityChecks(t *testing.T) {
func TestAgent_ReloadConfigOutgoingRPCConfig(t *testing.T) {
t.Parallel()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
defer os.RemoveAll(dataDir)
hcl := `
data_dir = "` + dataDir + `"
verify_outgoing = true
@ -3667,7 +3386,7 @@ func TestAgent_ReloadConfigOutgoingRPCConfig(t *testing.T) {
key_file = "../test/key/ourdomain.key"
verify_server_hostname = true
`
c := TestConfig(testutil.Logger(t), config.Source{Name: t.Name(), Format: "hcl", Data: hcl})
c := TestConfig(testutil.Logger(t), config.FileSource{Name: t.Name(), Format: "hcl", Data: hcl})
require.NoError(t, a.reloadConfigInternal(c))
tlsConf = a.tlsConfigurator.OutgoingRPCConfig()
require.False(t, tlsConf.InsecureSkipVerify)
@ -3678,7 +3397,6 @@ func TestAgent_ReloadConfigOutgoingRPCConfig(t *testing.T) {
func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) {
t.Parallel()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
defer os.RemoveAll(dataDir)
hcl := `data_dir = "` + dataDir + `"
enable_local_script_checks=true
services=[{
@ -3697,7 +3415,7 @@ func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) {
require.Equal(t, "passing", check.Status, "check %q is wrong", id)
}
c := TestConfig(testutil.Logger(t), config.Source{Name: t.Name(), Format: "hcl", Data: hcl})
c := TestConfig(testutil.Logger(t), config.FileSource{Name: t.Name(), Format: "hcl", Data: hcl})
require.NoError(t, a.reloadConfigInternal(c))
// After reload, should be passing directly (no critical state)
for id, check := range a.State.Checks(nil) {
@ -3708,7 +3426,6 @@ func TestAgent_ReloadConfigAndKeepChecksStatus(t *testing.T) {
func TestAgent_ReloadConfigIncomingRPCConfig(t *testing.T) {
t.Parallel()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
defer os.RemoveAll(dataDir)
hcl := `
data_dir = "` + dataDir + `"
verify_outgoing = true
@ -3736,7 +3453,7 @@ func TestAgent_ReloadConfigIncomingRPCConfig(t *testing.T) {
key_file = "../test/key/ourdomain.key"
verify_server_hostname = true
`
c := TestConfig(testutil.Logger(t), config.Source{Name: t.Name(), Format: "hcl", Data: hcl})
c := TestConfig(testutil.Logger(t), config.FileSource{Name: t.Name(), Format: "hcl", Data: hcl})
require.NoError(t, a.reloadConfigInternal(c))
tlsConf, err = tlsConf.GetConfigForClient(nil)
require.NoError(t, err)
@ -3748,7 +3465,6 @@ func TestAgent_ReloadConfigIncomingRPCConfig(t *testing.T) {
func TestAgent_ReloadConfigTLSConfigFailure(t *testing.T) {
t.Parallel()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
defer os.RemoveAll(dataDir)
hcl := `
data_dir = "` + dataDir + `"
verify_outgoing = true
@ -3765,7 +3481,7 @@ func TestAgent_ReloadConfigTLSConfigFailure(t *testing.T) {
data_dir = "` + dataDir + `"
verify_incoming = true
`
c := TestConfig(testutil.Logger(t), config.Source{Name: t.Name(), Format: "hcl", Data: hcl})
c := TestConfig(testutil.Logger(t), config.FileSource{Name: t.Name(), Format: "hcl", Data: hcl})
require.Error(t, a.reloadConfigInternal(c))
tlsConf, err := tlsConf.GetConfigForClient(nil)
require.NoError(t, err)
@ -3777,7 +3493,6 @@ func TestAgent_ReloadConfigTLSConfigFailure(t *testing.T) {
func TestAgent_consulConfig_AutoEncryptAllowTLS(t *testing.T) {
t.Parallel()
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
defer os.RemoveAll(dataDir)
hcl := `
data_dir = "` + dataDir + `"
verify_incoming = true
@ -4722,7 +4437,13 @@ func TestAutoConfig_Integration(t *testing.T) {
})
require.NoError(t, err)
client := StartTestAgent(t, TestAgent{Name: "test-client", HCL: `
client := StartTestAgent(t, TestAgent{Name: "test-client",
Overrides: `
connect {
test_ca_leaf_root_change_spread = "1ns"
}
`,
HCL: `
bootstrap = false
server = false
ca_file = "` + caFile + `"
@ -4736,7 +4457,8 @@ func TestAutoConfig_Integration(t *testing.T) {
enabled = true
intro_token = "` + token + `"
server_addresses = ["` + srv.Config.RPCBindAddr.String() + `"]
}`})
}`,
})
defer client.Shutdown()
@ -4776,6 +4498,21 @@ func TestAutoConfig_Integration(t *testing.T) {
// ensure that a new cert gets generated and pushed into the TLS configurator
retry.Run(t, func(r *retry.R) {
require.NotEqual(r, cert1, client.Agent.tlsConfigurator.Cert())
// check that the on disk certs match expectations
data, err := ioutil.ReadFile(filepath.Join(client.DataDir, "auto-config.json"))
require.NoError(r, err)
rdr := strings.NewReader(string(data))
var resp pbautoconf.AutoConfigResponse
pbUnmarshaler := &jsonpb.Unmarshaler{
AllowUnknownFields: false,
}
require.NoError(r, pbUnmarshaler.Unmarshal(rdr, &resp), "data: %s", data)
actual, err := tls.X509KeyPair([]byte(resp.Certificate.CertPEM), []byte(resp.Certificate.PrivateKeyPEM))
require.NoError(r, err)
require.Equal(r, client.Agent.tlsConfigurator.Cert(), &actual)
})
// spot check that we now have an ACL token
@ -4856,3 +4593,33 @@ func TestAgent_AutoEncrypt(t *testing.T) {
require.Len(t, x509Cert.URIs, 1)
require.Equal(t, id.URI(), x509Cert.URIs[0])
}
func TestSharedRPCRouter(t *testing.T) {
// this test runs both a server and client and ensures that the shared
// router is being used. It would be possible for the Client and Server
// types to create and use their own routers and for RPCs such as the
// ones used in WaitForTestAgent to succeed. However accessing the
// router stored on the agent ensures that Serf information from the
// Client/Server types are being set in the same shared rpc router.
srv := NewTestAgent(t, "")
defer srv.Shutdown()
testrpc.WaitForTestAgent(t, srv.RPC, "dc1")
mgr, server := srv.Agent.router.FindLANRoute()
require.NotNil(t, mgr)
require.NotNil(t, server)
client := NewTestAgent(t, `
server = false
bootstrap = false
retry_join = ["`+srv.Config.SerfBindAddrLAN.String()+`"]
`)
testrpc.WaitForTestAgent(t, client.RPC, "dc1")
mgr, server = client.Agent.router.FindLANRoute()
require.NotNil(t, mgr)
require.NotNil(t, server)
}

94
agent/apiserver.go Normal file
View File

@ -0,0 +1,94 @@
package agent
import (
"context"
"net"
"sync"
"time"
"github.com/hashicorp/go-hclog"
"golang.org/x/sync/errgroup"
)
// apiServers is a wrapper around errgroup.Group for managing go routines for
// long running agent components (ex: http server, dns server). If any of the
// servers fail, the failed channel will be closed, which will cause the agent
// to be shutdown instead of running in a degraded state.
//
// This struct exists as a shim for using errgroup.Group without making major
// changes to Agent. In the future it may be removed and replaced with more
// direct usage of errgroup.Group.
type apiServers struct {
logger hclog.Logger
group *errgroup.Group
servers []apiServer
// failed channel is closed when the first server goroutines exit with a
// non-nil error.
failed <-chan struct{}
}
type apiServer struct {
// Protocol supported by this server. One of: dns, http, https
Protocol string
// Addr the server is listening on
Addr net.Addr
// Run will be called in a goroutine to run the server. When any Run exits
// with a non-nil error, the failed channel will be closed.
Run func() error
// Shutdown function used to stop the server
Shutdown func(context.Context) error
}
// NewAPIServers returns an empty apiServers that is ready to Start servers.
func NewAPIServers(logger hclog.Logger) *apiServers {
group, ctx := errgroup.WithContext(context.TODO())
return &apiServers{
logger: logger,
group: group,
failed: ctx.Done(),
}
}
func (s *apiServers) Start(srv apiServer) {
srv.logger(s.logger).Info("Starting server")
s.servers = append(s.servers, srv)
s.group.Go(srv.Run)
}
func (s apiServer) logger(base hclog.Logger) hclog.Logger {
return base.With(
"protocol", s.Protocol,
"address", s.Addr.String(),
"network", s.Addr.Network())
}
// Shutdown all the servers and log any errors as warning. Each server is given
// 1 second, or until ctx is cancelled, to shutdown gracefully.
func (s *apiServers) Shutdown(ctx context.Context) {
shutdownGroup := new(sync.WaitGroup)
for i := range s.servers {
server := s.servers[i]
shutdownGroup.Add(1)
go func() {
defer shutdownGroup.Done()
logger := server.logger(s.logger)
logger.Info("Stopping server")
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
if err := server.Shutdown(ctx); err != nil {
logger.Warn("Failed to stop server")
}
}()
}
s.servers = nil
shutdownGroup.Wait()
}
// WaitForShutdown waits until all server goroutines have exited. Shutdown
// must be called before WaitForShutdown, otherwise it will block forever.
func (s *apiServers) WaitForShutdown() error {
return s.group.Wait()
}

65
agent/apiserver_test.go Normal file
View File

@ -0,0 +1,65 @@
package agent
import (
"context"
"fmt"
"net"
"testing"
"time"
"github.com/hashicorp/go-hclog"
"github.com/stretchr/testify/require"
)
func TestAPIServers_WithServiceRunError(t *testing.T) {
servers := NewAPIServers(hclog.New(nil))
server1, chErr1 := newAPIServerStub()
server2, _ := newAPIServerStub()
t.Run("Start", func(t *testing.T) {
servers.Start(server1)
servers.Start(server2)
select {
case <-servers.failed:
t.Fatalf("expected servers to still be running")
case <-time.After(5 * time.Millisecond):
}
})
err := fmt.Errorf("oops, I broke")
t.Run("server exit non-nil error", func(t *testing.T) {
chErr1 <- err
select {
case <-servers.failed:
case <-time.After(time.Second):
t.Fatalf("expected failed channel to be closed")
}
})
t.Run("shutdown remaining services", func(t *testing.T) {
servers.Shutdown(context.Background())
require.Equal(t, err, servers.WaitForShutdown())
})
}
func newAPIServerStub() (apiServer, chan error) {
chErr := make(chan error)
return apiServer{
Protocol: "http",
Addr: &net.TCPAddr{
IP: net.ParseIP("127.0.0.11"),
Port: 5505,
},
Run: func() error {
return <-chErr
},
Shutdown: func(ctx context.Context) error {
close(chErr)
return nil
},
}, chErr
}

View File

@ -2,79 +2,78 @@ package autoconf
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/proto/pbautoconf"
"github.com/hashicorp/go-discover"
discoverk8s "github.com/hashicorp/go-discover/provider/k8s"
"github.com/hashicorp/go-hclog"
"github.com/golang/protobuf/jsonpb"
)
const (
// autoConfigFileName is the name of the file that the agent auto-config settings are
// stored in within the data directory
autoConfigFileName = "auto-config.json"
dummyTrustDomain = "dummytrustdomain"
)
var (
pbMarshaler = &jsonpb.Marshaler{
OrigName: false,
EnumsAsInts: false,
Indent: " ",
EmitDefaults: true,
}
pbUnmarshaler = &jsonpb.Unmarshaler{
AllowUnknownFields: false,
}
)
// AutoConfig is all the state necessary for being able to parse a configuration
// as well as perform the necessary RPCs to perform Agent Auto Configuration.
//
// NOTE: This struct and methods on it are not currently thread/goroutine safe.
// However it doesn't spawn any of its own go routines yet and is used in a
// synchronous fashion. In the future if either of those two conditions change
// then we will need to add some locking here. I am deferring that for now
// to help ease the review of this already large PR.
type AutoConfig struct {
builderOpts config.BuilderOpts
sync.Mutex
acConfig Config
logger hclog.Logger
directRPC DirectRPC
cache Cache
waiter *lib.RetryWaiter
overrides []config.Source
certMonitor CertMonitor
config *config.RuntimeConfig
autoConfigData string
autoConfigResponse *pbautoconf.AutoConfigResponse
autoConfigSource config.Source
running bool
done chan struct{}
// cancel is used to cancel the entire AutoConfig
// go routine. This is the main field protected
// by the mutex as it being non-nil indicates that
// the go routine has been started and is stoppable.
// note that it doesn't indcate that the go routine
// is currently running.
cancel context.CancelFunc
// cancelWatches is used to cancel the existing
// cache watches regarding the agents certificate. This is
// mainly only necessary when the Agent token changes.
cancelWatches context.CancelFunc
// cacheUpdates is the chan used to have the cache
// send us back events
cacheUpdates chan cache.UpdateEvent
// tokenUpdates is the struct used to receive
// events from the token store when the Agent
// token is updated.
tokenUpdates token.Notifier
}
// New creates a new AutoConfig object for providing automatic
// Consul configuration.
func New(config *Config) (*AutoConfig, error) {
if config == nil {
return nil, fmt.Errorf("must provide a config struct")
// New creates a new AutoConfig object for providing automatic Consul configuration.
func New(config Config) (*AutoConfig, error) {
switch {
case config.Loader == nil:
return nil, fmt.Errorf("must provide a config loader")
case config.DirectRPC == nil:
return nil, fmt.Errorf("must provide a direct RPC delegate")
case config.Cache == nil:
return nil, fmt.Errorf("must provide a cache")
case config.TLSConfigurator == nil:
return nil, fmt.Errorf("must provide a TLS configurator")
case config.Tokens == nil:
return nil, fmt.Errorf("must provide a token store")
}
if config.DirectRPC == nil {
return nil, fmt.Errorf("must provide a direct RPC delegate")
if config.FallbackLeeway == 0 {
config.FallbackLeeway = 10 * time.Second
}
if config.FallbackRetry == 0 {
config.FallbackRetry = time.Minute
}
logger := config.Logger
@ -84,33 +83,22 @@ func New(config *Config) (*AutoConfig, error) {
logger = logger.Named(logging.AutoConfig)
}
waiter := config.Waiter
if waiter == nil {
waiter = lib.NewRetryWaiter(1, 0, 10*time.Minute, lib.NewJitterRandomStagger(25))
if config.Waiter == nil {
config.Waiter = lib.NewRetryWaiter(1, 0, 10*time.Minute, lib.NewJitterRandomStagger(25))
}
ac := &AutoConfig{
builderOpts: config.BuilderOpts,
return &AutoConfig{
acConfig: config,
logger: logger,
directRPC: config.DirectRPC,
waiter: waiter,
overrides: config.Overrides,
certMonitor: config.CertMonitor,
}
return ac, nil
}, nil
}
// ReadConfig will parse the current configuration and inject any
// auto-config sources if present into the correct place in the parsing chain.
func (ac *AutoConfig) ReadConfig() (*config.RuntimeConfig, error) {
src := config.Source{
Name: autoConfigFileName,
Format: "json",
Data: ac.autoConfigData,
}
cfg, warnings, err := LoadConfig(ac.builderOpts, src, ac.overrides...)
ac.Lock()
defer ac.Unlock()
cfg, warnings, err := ac.acConfig.Loader(ac.autoConfigSource)
if err != nil {
return cfg, err
}
@ -123,46 +111,6 @@ func (ac *AutoConfig) ReadConfig() (*config.RuntimeConfig, error) {
return cfg, nil
}
// restorePersistedAutoConfig will attempt to load the persisted auto-config
// settings from the data directory. It returns true either when there was an
// unrecoverable error or when the configuration was successfully loaded from
// disk. Recoverable errors, such as "file not found" are suppressed and this
// method will return false for the first boolean.
func (ac *AutoConfig) restorePersistedAutoConfig() (bool, error) {
if ac.config.DataDir == "" {
// no data directory means we don't have anything to potentially load
return false, nil
}
path := filepath.Join(ac.config.DataDir, autoConfigFileName)
ac.logger.Debug("attempting to restore any persisted configuration", "path", path)
content, err := ioutil.ReadFile(path)
if err == nil {
rdr := strings.NewReader(string(content))
var resp pbautoconf.AutoConfigResponse
if err := pbUnmarshaler.Unmarshal(rdr, &resp); err != nil {
return false, fmt.Errorf("failed to decode persisted auto-config data: %w", err)
}
if err := ac.update(&resp); err != nil {
return false, fmt.Errorf("error restoring persisted auto-config response: %w", err)
}
ac.logger.Info("restored persisted configuration", "path", path)
return true, nil
}
if !os.IsNotExist(err) {
return true, fmt.Errorf("failed to load %s: %w", path, err)
}
// ignore non-existence errors as that is an indicator that we haven't
// performed the auto configuration before
return false, nil
}
// InitialConfiguration will perform a one-time RPC request to the configured servers
// to retrieve various cluster wide configurations. See the proto/pbautoconf/auto_config.proto
// file for a complete reference of what configurations can be applied in this manner.
@ -182,22 +130,26 @@ func (ac *AutoConfig) InitialConfiguration(ctx context.Context) (*config.Runtime
ac.config = config
}
if !ac.config.AutoConfig.Enabled {
return ac.config, nil
}
ready, err := ac.restorePersistedAutoConfig()
switch {
case ac.config.AutoConfig.Enabled:
resp, err := ac.readPersistedAutoConfig()
if err != nil {
return nil, err
}
if !ready {
if resp == nil {
ac.logger.Info("retrieving initial agent auto configuration remotely")
if err := ac.getInitialConfiguration(ctx); err != nil {
resp, err = ac.getInitialConfiguration(ctx)
if err != nil {
return nil, err
}
}
ac.logger.Debug("updating auto-config settings")
if err = ac.recordInitialConfiguration(resp); err != nil {
return nil, err
}
// re-read the configuration now that we have our initial auto-config
config, err := ac.ReadConfig()
if err != nil {
@ -206,6 +158,21 @@ func (ac *AutoConfig) InitialConfiguration(ctx context.Context) (*config.Runtime
ac.config = config
return ac.config, nil
case ac.config.AutoEncryptTLS:
certs, err := ac.autoEncryptInitialCerts(ctx)
if err != nil {
return nil, err
}
if err := ac.setInitialTLSCertificates(certs); err != nil {
return nil, err
}
ac.logger.Info("automatically upgraded to TLS")
return ac.config, nil
default:
return ac.config, nil
}
}
// introToken is responsible for determining the correct intro token to use
@ -235,118 +202,45 @@ func (ac *AutoConfig) introToken() (string, error) {
return token, nil
}
// serverHosts is responsible for taking the list of server addresses and
// resolving any go-discover provider invocations. It will then return a list
// of hosts. These might be hostnames and is expected that DNS resolution may
// be performed after this function runs. Additionally these may contain ports
// so SplitHostPort could also be necessary.
func (ac *AutoConfig) serverHosts() ([]string, error) {
servers := ac.config.AutoConfig.ServerAddresses
// recordInitialConfiguration is responsible for recording the AutoConfigResponse from
// the AutoConfig.InitialConfiguration RPC. It is an all-in-one function to do the following
// * update the Agent token in the token store
func (ac *AutoConfig) recordInitialConfiguration(resp *pbautoconf.AutoConfigResponse) error {
ac.autoConfigResponse = resp
providers := make(map[string]discover.Provider)
for k, v := range discover.Providers {
providers[k] = v
ac.autoConfigSource = config.LiteralSource{
Name: autoConfigFileName,
Config: translateConfig(resp.Config),
}
providers["k8s"] = &discoverk8s.Provider{}
disco, err := discover.New(
discover.WithUserAgent(lib.UserAgent()),
discover.WithProviders(providers),
)
// we need to re-read the configuration to determine what the correct ACL
// token to push into the token store is. Any user provided token will override
// any AutoConfig generated token.
config, err := ac.ReadConfig()
if err != nil {
return nil, fmt.Errorf("Failed to create go-discover resolver: %w", err)
return fmt.Errorf("failed to fully resolve configuration: %w", err)
}
var addrs []string
for _, addr := range servers {
switch {
case strings.Contains(addr, "provider="):
resolved, err := disco.Addrs(addr, ac.logger.StandardLogger(&hclog.StandardLoggerOptions{InferLevels: true}))
// ignoring the return value which would indicate a change in the token
_ = ac.acConfig.Tokens.UpdateAgentToken(config.ACLTokens.ACLAgentToken, token.TokenSourceConfig)
// extra a structs.SignedResponse from the AutoConfigResponse for use in cache prepopulation
signed, err := extractSignedResponse(resp)
if err != nil {
ac.logger.Error("failed to resolve go-discover auto-config servers", "configuration", addr, "err", err)
continue
return fmt.Errorf("failed to extract certificates from the auto-config response: %w", err)
}
addrs = append(addrs, resolved...)
ac.logger.Debug("discovered auto-config servers", "servers", resolved)
default:
addrs = append(addrs, addr)
}
// prepopulate the cache
if err = ac.populateCertificateCache(signed); err != nil {
return fmt.Errorf("failed to populate the cache with certificate responses: %w", err)
}
if len(addrs) == 0 {
return nil, fmt.Errorf("no auto-config server addresses available for use")
}
return addrs, nil
}
// resolveHost will take a single host string and convert it to a list of TCPAddrs
// This will process any port in the input as well as looking up the hostname using
// normal DNS resolution.
func (ac *AutoConfig) resolveHost(hostPort string) []net.TCPAddr {
port := ac.config.ServerPort
host, portStr, err := net.SplitHostPort(hostPort)
if err != nil {
if strings.Contains(err.Error(), "missing port in address") {
host = hostPort
} else {
ac.logger.Warn("error splitting host address into IP and port", "address", hostPort, "error", err)
return nil
}
} else {
port, err = strconv.Atoi(portStr)
if err != nil {
ac.logger.Warn("Parsed port is not an integer", "port", portStr, "error", err)
return nil
}
}
// resolve the host to a list of IPs
ips, err := net.LookupIP(host)
if err != nil {
ac.logger.Warn("IP resolution failed", "host", host, "error", err)
return nil
}
var addrs []net.TCPAddr
for _, ip := range ips {
addrs = append(addrs, net.TCPAddr{IP: ip, Port: port})
}
return addrs
}
// recordResponse takes an AutoConfig RPC response records it with the agent
// This will persist the configuration to disk (unless in dev mode running without
// a data dir) and will reload the configuration.
func (ac *AutoConfig) recordResponse(resp *pbautoconf.AutoConfigResponse) error {
serialized, err := pbMarshaler.MarshalToString(resp)
if err != nil {
return fmt.Errorf("failed to encode auto-config response as JSON: %w", err)
}
if err := ac.update(resp); err != nil {
// update the TLS configurator with the latest certificates
if err := ac.updateTLSFromResponse(resp); err != nil {
return err
}
// now that we know the configuration is generally fine including TLS certs go ahead and persist it to disk.
if ac.config.DataDir == "" {
ac.logger.Debug("not persisting auto-config settings because there is no data directory")
return nil
}
path := filepath.Join(ac.config.DataDir, autoConfigFileName)
err = ioutil.WriteFile(path, []byte(serialized), 0660)
if err != nil {
return fmt.Errorf("failed to write auto-config configurations: %w", err)
}
ac.logger.Debug("auto-config settings were persisted to disk")
return nil
return ac.persistAutoConfig(resp)
}
// getInitialConfigurationOnce will perform full server to TCPAddr resolution and
@ -370,7 +264,7 @@ func (ac *AutoConfig) getInitialConfigurationOnce(ctx context.Context, csr strin
var resp pbautoconf.AutoConfigResponse
servers, err := ac.serverHosts()
servers, err := ac.autoConfigHosts()
if err != nil {
return nil, err
}
@ -383,10 +277,11 @@ func (ac *AutoConfig) getInitialConfigurationOnce(ctx context.Context, csr strin
}
ac.logger.Debug("making AutoConfig.InitialConfiguration RPC", "addr", addr.String())
if err = ac.directRPC.RPC(ac.config.Datacenter, ac.config.NodeName, &addr, "AutoConfig.InitialConfiguration", &request, &resp); err != nil {
if err = ac.acConfig.DirectRPC.RPC(ac.config.Datacenter, ac.config.NodeName, &addr, "AutoConfig.InitialConfiguration", &request, &resp); err != nil {
ac.logger.Error("AutoConfig.InitialConfiguration RPC failed", "addr", addr.String(), "error", err)
continue
}
ac.logger.Debug("AutoConfig.InitialConfiguration RPC was successful")
// update the Certificate with the private key we generated locally
if resp.Certificate != nil {
@ -397,197 +292,113 @@ func (ac *AutoConfig) getInitialConfigurationOnce(ctx context.Context, csr strin
}
}
return nil, ctx.Err()
return nil, fmt.Errorf("No server successfully responded to the auto-config request")
}
// getInitialConfiguration implements a loop to retry calls to getInitialConfigurationOnce.
// It uses the RetryWaiter on the AutoConfig object to control how often to attempt
// the initial configuration process. It is also canceallable by cancelling the provided context.
func (ac *AutoConfig) getInitialConfiguration(ctx context.Context) error {
func (ac *AutoConfig) getInitialConfiguration(ctx context.Context) (*pbautoconf.AutoConfigResponse, error) {
// generate a CSR
csr, key, err := ac.generateCSR()
if err != nil {
return err
return nil, err
}
// this resets the failures so that we will perform immediate request
wait := ac.waiter.Success()
wait := ac.acConfig.Waiter.Success()
for {
select {
case <-wait:
resp, err := ac.getInitialConfigurationOnce(ctx, csr, key)
if resp != nil {
return ac.recordResponse(resp)
if resp, err := ac.getInitialConfigurationOnce(ctx, csr, key); err == nil && resp != nil {
return resp, nil
} else if err != nil {
ac.logger.Error(err.Error())
} else {
ac.logger.Error("No error returned when fetching the initial auto-configuration but no response was either")
ac.logger.Error("No error returned when fetching configuration from the servers but no response was either")
}
wait = ac.waiter.Failed()
wait = ac.acConfig.Waiter.Failed()
case <-ctx.Done():
ac.logger.Info("interrupted during initial auto configuration", "err", ctx.Err())
return ctx.Err()
return nil, ctx.Err()
}
}
}
// generateCSR will generate a CSR for an Agent certificate. This should
// be sent along with the AutoConfig.InitialConfiguration RPC. The generated
// CSR does NOT have a real trust domain as when generating this we do
// not yet have the CA roots. The server will update the trust domain
// for us though.
func (ac *AutoConfig) generateCSR() (csr string, key string, err error) {
// We don't provide the correct host here, because we don't know any
// better at this point. Apart from the domain, we would need the
// ClusterID, which we don't have. This is why we go with
// dummyTrustDomain the first time. Subsequent CSRs will have the
// correct TrustDomain.
id := &connect.SpiffeIDAgent{
// will be replaced
Host: dummyTrustDomain,
Datacenter: ac.config.Datacenter,
Agent: ac.config.NodeName,
}
caConfig, err := ac.config.ConnectCAConfiguration()
if err != nil {
return "", "", fmt.Errorf("Cannot generate CSR: %w", err)
}
conf, err := caConfig.GetCommonConfig()
if err != nil {
return "", "", fmt.Errorf("Failed to load common CA configuration: %w", err)
}
if conf.PrivateKeyType == "" {
conf.PrivateKeyType = connect.DefaultPrivateKeyType
}
if conf.PrivateKeyBits == 0 {
conf.PrivateKeyBits = connect.DefaultPrivateKeyBits
}
// Create a new private key
pk, pkPEM, err := connect.GeneratePrivateKeyWithConfig(conf.PrivateKeyType, conf.PrivateKeyBits)
if err != nil {
return "", "", fmt.Errorf("Failed to generate private key: %w", err)
}
dnsNames := append([]string{"localhost"}, ac.config.AutoConfig.DNSSANs...)
ipAddresses := append([]net.IP{net.ParseIP("127.0.0.1"), net.ParseIP("::")}, ac.config.AutoConfig.IPSANs...)
// Create a CSR.
//
// The Common Name includes the dummy trust domain for now but Server will
// override this when it is signed anyway so it's OK.
cn := connect.AgentCN(ac.config.NodeName, dummyTrustDomain)
csr, err = connect.CreateCSR(id, cn, pk, dnsNames, ipAddresses)
if err != nil {
return "", "", err
}
return csr, pkPEM, nil
}
// update will take an AutoConfigResponse and do all things necessary
// to restore those settings. This currently involves updating the
// config data to be used during a call to ReadConfig, updating the
// tls Configurator and prepopulating the cache.
func (ac *AutoConfig) update(resp *pbautoconf.AutoConfigResponse) error {
if err := ac.updateConfigFromResponse(resp); err != nil {
return err
}
if err := ac.updateTLSFromResponse(resp); err != nil {
return err
}
return nil
}
// updateConfigFromResponse is responsible for generating the JSON compatible with the
// agent/config.Config struct
func (ac *AutoConfig) updateConfigFromResponse(resp *pbautoconf.AutoConfigResponse) error {
// here we want to serialize the translated configuration for use in injecting into the normal
// configuration parsing chain.
conf, err := json.Marshal(translateConfig(resp.Config))
if err != nil {
return fmt.Errorf("failed to encode auto-config configuration as JSON: %w", err)
}
ac.autoConfigData = string(conf)
return nil
}
// updateTLSFromResponse will update the TLS certificate and roots in the shared
// TLS configurator.
func (ac *AutoConfig) updateTLSFromResponse(resp *pbautoconf.AutoConfigResponse) error {
if ac.certMonitor == nil {
return nil
}
roots, err := translateCARootsToStructs(resp.CARoots)
if err != nil {
return err
}
cert, err := translateIssuedCertToStructs(resp.Certificate)
if err != nil {
return err
}
update := &structs.SignedResponse{
IssuedCert: *cert,
ConnectCARoots: *roots,
ManualCARoots: resp.ExtraCACertificates,
}
if resp.Config != nil && resp.Config.TLS != nil {
update.VerifyServerHostname = resp.Config.TLS.VerifyServerHostname
}
if err := ac.certMonitor.Update(update); err != nil {
return fmt.Errorf("failed to update the certificate monitor: %w", err)
}
return nil
}
func (ac *AutoConfig) Start(ctx context.Context) error {
if ac.certMonitor == nil {
ac.Lock()
defer ac.Unlock()
if !ac.config.AutoConfig.Enabled && !ac.config.AutoEncryptTLS {
return nil
}
if !ac.config.AutoConfig.Enabled {
return nil
if ac.running || ac.cancel != nil {
return fmt.Errorf("AutoConfig is already running")
}
_, err := ac.certMonitor.Start(ctx)
return err
// create the top level context to control the go
// routine executing the `run` method
ctx, cancel := context.WithCancel(ctx)
// create the channel to get cache update events through
// really we should only ever get 10 updates
ac.cacheUpdates = make(chan cache.UpdateEvent, 10)
// setup the cache watches
cancelCertWatches, err := ac.setupCertificateCacheWatches(ctx)
if err != nil {
cancel()
return fmt.Errorf("error setting up cache watches: %w", err)
}
// start the token update notifier
ac.tokenUpdates = ac.acConfig.Tokens.Notify(token.TokenKindAgent)
// store the cancel funcs
ac.cancel = cancel
ac.cancelWatches = cancelCertWatches
ac.running = true
ac.done = make(chan struct{})
go ac.run(ctx, ac.done)
ac.logger.Info("auto-config started")
return nil
}
func (ac *AutoConfig) Done() <-chan struct{} {
ac.Lock()
defer ac.Unlock()
if ac.done != nil {
return ac.done
}
// return a closed channel to indicate that we are already done
done := make(chan struct{})
close(done)
return done
}
func (ac *AutoConfig) IsRunning() bool {
ac.Lock()
defer ac.Unlock()
return ac.running
}
func (ac *AutoConfig) Stop() bool {
if ac.certMonitor == nil {
ac.Lock()
defer ac.Unlock()
if !ac.running {
return false
}
if !ac.config.AutoConfig.Enabled {
return false
if ac.cancel != nil {
ac.cancel()
}
return ac.certMonitor.Stop()
}
func (ac *AutoConfig) FallbackTLS(ctx context.Context) (*structs.SignedResponse, error) {
// generate a CSR
csr, key, err := ac.generateCSR()
if err != nil {
return nil, err
}
resp, err := ac.getInitialConfigurationOnce(ctx, csr, key)
if err != nil {
return nil, err
}
return extractSignedResponse(resp)
return true
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,111 @@
package autoconf
import (
"context"
"fmt"
"net"
"strings"
"github.com/hashicorp/consul/agent/structs"
)
func (ac *AutoConfig) autoEncryptInitialCerts(ctx context.Context) (*structs.SignedResponse, error) {
// generate a CSR
csr, key, err := ac.generateCSR()
if err != nil {
return nil, err
}
// this resets the failures so that we will perform immediate request
wait := ac.acConfig.Waiter.Success()
for {
select {
case <-wait:
if resp, err := ac.autoEncryptInitialCertsOnce(ctx, csr, key); err == nil && resp != nil {
return resp, nil
} else if err != nil {
ac.logger.Error(err.Error())
} else {
ac.logger.Error("No error returned when fetching certificates from the servers but no response was either")
}
wait = ac.acConfig.Waiter.Failed()
case <-ctx.Done():
ac.logger.Info("interrupted during retrieval of auto-encrypt certificates", "err", ctx.Err())
return nil, ctx.Err()
}
}
}
func (ac *AutoConfig) autoEncryptInitialCertsOnce(ctx context.Context, csr, key string) (*structs.SignedResponse, error) {
request := structs.CASignRequest{
WriteRequest: structs.WriteRequest{Token: ac.acConfig.Tokens.AgentToken()},
Datacenter: ac.config.Datacenter,
CSR: csr,
}
var resp structs.SignedResponse
servers, err := ac.autoEncryptHosts()
if err != nil {
return nil, err
}
for _, s := range servers {
// try each IP to see if we can successfully make the request
for _, addr := range ac.resolveHost(s) {
if ctx.Err() != nil {
return nil, ctx.Err()
}
ac.logger.Debug("making AutoEncrypt.Sign RPC", "addr", addr.String())
err = ac.acConfig.DirectRPC.RPC(ac.config.Datacenter, ac.config.NodeName, &addr, "AutoEncrypt.Sign", &request, &resp)
if err != nil {
ac.logger.Error("AutoEncrypt.Sign RPC failed", "addr", addr.String(), "error", err)
continue
}
resp.IssuedCert.PrivateKeyPEM = key
return &resp, nil
}
}
return nil, fmt.Errorf("No servers successfully responded to the auto-encrypt request")
}
func (ac *AutoConfig) autoEncryptHosts() ([]string, error) {
// use servers known to gossip if there are any
if ac.acConfig.ServerProvider != nil {
if srv := ac.acConfig.ServerProvider.FindLANServer(); srv != nil {
return []string{srv.Addr.String()}, nil
}
}
hosts, err := ac.discoverServers(ac.config.RetryJoinLAN)
if err != nil {
return nil, err
}
var addrs []string
// The addresses we use for auto-encrypt are the retry join and start join
// addresses. These are for joining serf and therefore we cannot rely on the
// ports for these. This loop strips any port that may have been specified and
// will let subsequent resolveAddr calls add on the default RPC port.
for _, addr := range append(ac.config.StartJoinAddrsLAN, hosts...) {
host, _, err := net.SplitHostPort(addr)
if err != nil {
if strings.Contains(err.Error(), "missing port in address") {
host = addr
} else {
ac.logger.Warn("error splitting host address into IP and port", "address", addr, "error", err)
continue
}
}
addrs = append(addrs, host)
}
if len(addrs) == 0 {
return nil, fmt.Errorf("no auto-encrypt server addresses available for use")
}
return addrs, nil
}

View File

@ -0,0 +1,562 @@
package autoconf
import (
"context"
"crypto/x509"
"crypto/x509/pkix"
"encoding/asn1"
"fmt"
"net"
"net/url"
"testing"
"time"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
)
func TestAutoEncrypt_generateCSR(t *testing.T) {
type testCase struct {
conf *config.RuntimeConfig
// to validate the csr
expectedSubject pkix.Name
expectedSigAlg x509.SignatureAlgorithm
expectedPubAlg x509.PublicKeyAlgorithm
expectedDNSNames []string
expectedIPs []net.IP
expectedURIs []*url.URL
}
cases := map[string]testCase{
"ip-sans": {
conf: &config.RuntimeConfig{
Datacenter: "dc1",
NodeName: "test-node",
AutoEncryptTLS: true,
AutoEncryptIPSAN: []net.IP{net.IPv4(198, 18, 0, 1), net.IPv4(198, 18, 0, 2)},
},
expectedSubject: pkix.Name{
CommonName: connect.AgentCN("test-node", unknownTrustDomain),
Names: []pkix.AttributeTypeAndValue{
{
// 2,5,4,3 is the CommonName type ASN1 identifier
Type: asn1.ObjectIdentifier{2, 5, 4, 3},
Value: "testnode.agnt.unknown.consul",
},
},
},
expectedSigAlg: x509.ECDSAWithSHA256,
expectedPubAlg: x509.ECDSA,
expectedDNSNames: defaultDNSSANs,
expectedIPs: append(defaultIPSANs,
net.IP{198, 18, 0, 1},
net.IP{198, 18, 0, 2},
),
expectedURIs: []*url.URL{
{
Scheme: "spiffe",
Host: unknownTrustDomain,
Path: "/agent/client/dc/dc1/id/test-node",
},
},
},
"dns-sans": {
conf: &config.RuntimeConfig{
Datacenter: "dc1",
NodeName: "test-node",
AutoEncryptTLS: true,
AutoEncryptDNSSAN: []string{"foo.local", "bar.local"},
},
expectedSubject: pkix.Name{
CommonName: connect.AgentCN("test-node", unknownTrustDomain),
Names: []pkix.AttributeTypeAndValue{
{
// 2,5,4,3 is the CommonName type ASN1 identifier
Type: asn1.ObjectIdentifier{2, 5, 4, 3},
Value: "testnode.agnt.unknown.consul",
},
},
},
expectedSigAlg: x509.ECDSAWithSHA256,
expectedPubAlg: x509.ECDSA,
expectedDNSNames: append(defaultDNSSANs, "foo.local", "bar.local"),
expectedIPs: defaultIPSANs,
expectedURIs: []*url.URL{
{
Scheme: "spiffe",
Host: unknownTrustDomain,
Path: "/agent/client/dc/dc1/id/test-node",
},
},
},
}
for name, tcase := range cases {
t.Run(name, func(t *testing.T) {
ac := AutoConfig{config: tcase.conf}
csr, _, err := ac.generateCSR()
require.NoError(t, err)
request, err := connect.ParseCSR(csr)
require.NoError(t, err)
require.NotNil(t, request)
require.Equal(t, tcase.expectedSubject, request.Subject)
require.Equal(t, tcase.expectedSigAlg, request.SignatureAlgorithm)
require.Equal(t, tcase.expectedPubAlg, request.PublicKeyAlgorithm)
require.Equal(t, tcase.expectedDNSNames, request.DNSNames)
require.Equal(t, tcase.expectedIPs, request.IPAddresses)
require.Equal(t, tcase.expectedURIs, request.URIs)
})
}
}
func TestAutoEncrypt_hosts(t *testing.T) {
type testCase struct {
serverProvider ServerProvider
config *config.RuntimeConfig
hosts []string
err string
}
providerNone := newMockServerProvider(t)
providerNone.On("FindLANServer").Return(nil).Times(0)
providerWithServer := newMockServerProvider(t)
providerWithServer.On("FindLANServer").Return(&metadata.Server{Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 1234}}).Times(0)
cases := map[string]testCase{
"router-override": {
serverProvider: providerWithServer,
config: &config.RuntimeConfig{
RetryJoinLAN: []string{"127.0.0.1:9876"},
StartJoinAddrsLAN: []string{"192.168.1.2:4321"},
},
hosts: []string{"198.18.0.1:1234"},
},
"various-addresses": {
serverProvider: providerNone,
config: &config.RuntimeConfig{
RetryJoinLAN: []string{"198.18.0.1", "foo.com", "[2001:db8::1234]:1234", "abc.local:9876"},
StartJoinAddrsLAN: []string{"192.168.1.1:5432", "start.local", "[::ffff:172.16.5.4]", "main.dev:6789"},
},
hosts: []string{
"192.168.1.1",
"start.local",
"[::ffff:172.16.5.4]",
"main.dev",
"198.18.0.1",
"foo.com",
"2001:db8::1234",
"abc.local",
},
},
"split-host-port-error": {
serverProvider: providerNone,
config: &config.RuntimeConfig{
StartJoinAddrsLAN: []string{"this-is-not:a:ip:and_port"},
},
err: "no auto-encrypt server addresses available for use",
},
}
for name, tcase := range cases {
t.Run(name, func(t *testing.T) {
ac := AutoConfig{
config: tcase.config,
logger: testutil.Logger(t),
acConfig: Config{
ServerProvider: tcase.serverProvider,
},
}
hosts, err := ac.autoEncryptHosts()
if tcase.err != "" {
testutil.RequireErrorContains(t, err, tcase.err)
} else {
require.NoError(t, err)
require.Equal(t, tcase.hosts, hosts)
}
})
}
}
func TestAutoEncrypt_InitialCerts(t *testing.T) {
token := "1a148388-3dd7-4db4-9eea-520424b4a86a"
datacenter := "foo"
nodeName := "bar"
mcfg := newMockedConfig(t)
_, indexedRoots, cert := testCerts(t, nodeName, datacenter)
// The following are called once for each round through the auto-encrypt initial certs outer loop
// (not the per-host direct rpc attempts but the one involving the RetryWaiter)
mcfg.tokens.On("AgentToken").Return(token).Times(2)
mcfg.serverProvider.On("FindLANServer").Return(nil).Times(2)
request := structs.CASignRequest{
WriteRequest: structs.WriteRequest{Token: token},
Datacenter: datacenter,
// this gets removed by the mock code as its non-deterministic what it will be
CSR: "",
}
// first failure
mcfg.directRPC.On("RPC",
datacenter,
nodeName,
&net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300},
"AutoEncrypt.Sign",
&request,
&structs.SignedResponse{},
).Once().Return(fmt.Errorf("injected error"))
// second failure
mcfg.directRPC.On("RPC",
datacenter,
nodeName,
&net.TCPAddr{IP: net.IPv4(198, 18, 0, 2), Port: 8300},
"AutoEncrypt.Sign",
&request,
&structs.SignedResponse{},
).Once().Return(fmt.Errorf("injected error"))
// third times is successfuly (second attempt to first server)
mcfg.directRPC.On("RPC",
datacenter,
nodeName,
&net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300},
"AutoEncrypt.Sign",
&request,
&structs.SignedResponse{},
).Once().Return(nil).Run(func(args mock.Arguments) {
resp, ok := args.Get(5).(*structs.SignedResponse)
require.True(t, ok)
resp.ConnectCARoots = *indexedRoots
resp.IssuedCert = *cert
resp.VerifyServerHostname = true
})
mcfg.Config.Waiter = lib.NewRetryWaiter(2, 0, 1*time.Millisecond, nil)
ac := AutoConfig{
config: &config.RuntimeConfig{
Datacenter: datacenter,
NodeName: nodeName,
RetryJoinLAN: []string{"198.18.0.1:1234", "198.18.0.2:3456"},
ServerPort: 8300,
},
acConfig: mcfg.Config,
logger: testutil.Logger(t),
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
resp, err := ac.autoEncryptInitialCerts(ctx)
require.NoError(t, err)
require.NotNil(t, resp)
require.True(t, resp.VerifyServerHostname)
require.NotEmpty(t, resp.IssuedCert.PrivateKeyPEM)
resp.IssuedCert.PrivateKeyPEM = ""
cert.PrivateKeyPEM = ""
require.Equal(t, cert, &resp.IssuedCert)
require.Equal(t, indexedRoots, &resp.ConnectCARoots)
require.Empty(t, resp.ManualCARoots)
}
func TestAutoEncrypt_InitialConfiguration(t *testing.T) {
token := "010494ae-ee45-4433-903c-a58c91297714"
nodeName := "auto-encrypt"
datacenter := "dc1"
mcfg := newMockedConfig(t)
loader := setupRuntimeConfig(t)
loader.addConfigHCL(`
auto_encrypt {
tls = true
}
`)
loader.opts.Config.NodeName = &nodeName
mcfg.Config.Loader = loader.Load
indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, nodeName, datacenter, token)
// prepopulation is going to grab the token to populate the correct cache key
mcfg.tokens.On("AgentToken").Return(token).Times(0)
// no server provider
mcfg.serverProvider.On("FindLANServer").Return(&metadata.Server{Addr: &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}}).Times(1)
populateResponse := func(args mock.Arguments) {
resp, ok := args.Get(5).(*structs.SignedResponse)
require.True(t, ok)
*resp = structs.SignedResponse{
VerifyServerHostname: true,
ConnectCARoots: *indexedRoots,
IssuedCert: *cert,
ManualCARoots: extraCerts,
}
}
expectedRequest := structs.CASignRequest{
WriteRequest: structs.WriteRequest{Token: token},
Datacenter: datacenter,
// TODO (autoconf) Maybe in the future we should populate a CSR
// and do some manual parsing/verification of the contents. The
// bits not having to do with the signing key such as the requested
// SANs and CN. For now though the mockDirectRPC type will empty
// the CSR so we have to pass in an empty string to the expectation.
CSR: "",
}
mcfg.directRPC.On(
"RPC",
datacenter,
nodeName,
&net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300},
"AutoEncrypt.Sign",
&expectedRequest,
&structs.SignedResponse{}).Return(nil).Run(populateResponse)
ac, err := New(mcfg.Config)
require.NoError(t, err)
require.NotNil(t, ac)
cfg, err := ac.InitialConfiguration(context.Background())
require.NoError(t, err)
require.NotNil(t, cfg)
}
func TestAutoEncrypt_TokenUpdate(t *testing.T) {
testAC := startedAutoConfig(t, true)
newToken := "1a4cc445-86ed-46b4-a355-bbf5a11dddb0"
rootsCtx, rootsCancel := context.WithCancel(context.Background())
testAC.mcfg.cache.On("Notify",
mock.Anything,
cachetype.ConnectCARootName,
&structs.DCSpecificRequest{Datacenter: testAC.ac.config.Datacenter},
rootsWatchID,
mock.Anything,
).Return(nil).Once().Run(func(args mock.Arguments) {
rootsCancel()
})
leafCtx, leafCancel := context.WithCancel(context.Background())
testAC.mcfg.cache.On("Notify",
mock.Anything,
cachetype.ConnectCALeafName,
&cachetype.ConnectCALeafRequest{
Datacenter: "dc1",
Agent: "autoconf",
Token: newToken,
DNSSAN: defaultDNSSANs,
IPSAN: defaultIPSANs,
},
leafWatchID,
mock.Anything,
).Return(nil).Once().Run(func(args mock.Arguments) {
leafCancel()
})
// this will be retrieved once when resetting the leaf cert watch
testAC.mcfg.tokens.On("AgentToken").Return(newToken).Once()
// send the notification about the token update
testAC.tokenUpdates <- struct{}{}
// wait for the leaf cert watches
require.True(t, waitForChans(100*time.Millisecond, leafCtx.Done(), rootsCtx.Done()), "New cache watches were not started within 100ms")
}
func TestAutoEncrypt_RootsUpdate(t *testing.T) {
testAC := startedAutoConfig(t, true)
secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0])
secondRoots := structs.IndexedCARoots{
ActiveRootID: secondCA.ID,
TrustDomain: connect.TestClusterID,
Roots: []*structs.CARoot{
secondCA,
testAC.initialRoots.Roots[0],
},
QueryMeta: structs.QueryMeta{
Index: 99,
},
}
updatedCtx, cancel := context.WithCancel(context.Background())
testAC.mcfg.tlsCfg.On("UpdateAutoTLSCA",
[]string{secondCA.RootCert, testAC.initialRoots.Roots[0].RootCert},
).Return(nil).Once().Run(func(args mock.Arguments) {
cancel()
})
// when a cache event comes in we end up recalculating the fallback timer which requires this call
testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once()
req := structs.DCSpecificRequest{Datacenter: "dc1"}
require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
CorrelationID: rootsWatchID,
Result: &secondRoots,
Meta: cache.ResultMeta{
Index: secondRoots.Index,
},
}))
require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time")
}
func TestAutoEncrypt_CertUpdate(t *testing.T) {
testAC := startedAutoConfig(t, true)
secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 99, 10*time.Minute)
updatedCtx, cancel := context.WithCancel(context.Background())
testAC.mcfg.tlsCfg.On("UpdateAutoTLSCert",
secondCert.CertPEM,
"redacted",
).Return(nil).Once().Run(func(args mock.Arguments) {
cancel()
})
// when a cache event comes in we end up recalculating the fallback timer which requires this call
testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once()
req := cachetype.ConnectCALeafRequest{
Datacenter: "dc1",
Agent: "autoconf",
Token: testAC.originalToken,
DNSSAN: defaultDNSSANs,
IPSAN: defaultIPSANs,
}
require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
CorrelationID: leafWatchID,
Result: secondCert,
Meta: cache.ResultMeta{
Index: secondCert.ModifyIndex,
},
}))
require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time")
}
func TestAutoEncrypt_Fallback(t *testing.T) {
testAC := startedAutoConfig(t, true)
// at this point everything is operating normally and we are just
// waiting for events. We are going to send a new cert that is basically
// already expired and then allow the fallback routine to kick in.
secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 100, time.Nanosecond)
secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0])
secondRoots := structs.IndexedCARoots{
ActiveRootID: secondCA.ID,
TrustDomain: connect.TestClusterID,
Roots: []*structs.CARoot{
secondCA,
testAC.initialRoots.Roots[0],
},
QueryMeta: structs.QueryMeta{
Index: 101,
},
}
thirdCert := newLeaf(t, "autoconf", "dc1", secondCA, 102, 10*time.Minute)
// setup the expectation for when the certs get updated initially
updatedCtx, updateCancel := context.WithCancel(context.Background())
testAC.mcfg.tlsCfg.On("UpdateAutoTLSCert",
secondCert.CertPEM,
"redacted",
).Return(nil).Once().Run(func(args mock.Arguments) {
updateCancel()
})
// when a cache event comes in we end up recalculating the fallback timer which requires this call
testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once()
testAC.mcfg.tlsCfg.On("AutoEncryptCertExpired").Return(true).Once()
fallbackCtx, fallbackCancel := context.WithCancel(context.Background())
// also testing here that we can change server IPs for ongoing operations
testAC.mcfg.serverProvider.On("FindLANServer").Once().Return(&metadata.Server{
Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300},
})
// after sending the notification for the cert update another InitialConfiguration RPC
// will be made to pull down the latest configuration. So we need to set up the response
// for the second RPC
populateResponse := func(args mock.Arguments) {
resp, ok := args.Get(5).(*structs.SignedResponse)
require.True(t, ok)
*resp = structs.SignedResponse{
VerifyServerHostname: true,
ConnectCARoots: secondRoots,
IssuedCert: *thirdCert,
ManualCARoots: testAC.extraCerts,
}
fallbackCancel()
}
expectedRequest := structs.CASignRequest{
WriteRequest: structs.WriteRequest{Token: testAC.originalToken},
Datacenter: "dc1",
// TODO (autoconf) Maybe in the future we should populate a CSR
// and do some manual parsing/verification of the contents. The
// bits not having to do with the signing key such as the requested
// SANs and CN. For now though the mockDirectRPC type will empty
// the CSR so we have to pass in an empty string to the expectation.
CSR: "",
}
// the fallback routine to perform auto-encrypt again will need to grab this
testAC.mcfg.tokens.On("AgentToken").Return(testAC.originalToken).Once()
testAC.mcfg.directRPC.On(
"RPC",
"dc1",
"autoconf",
&net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300},
"AutoEncrypt.Sign",
&expectedRequest,
&structs.SignedResponse{}).Return(nil).Run(populateResponse).Once()
testAC.mcfg.expectInitialTLS(t, "autoconf", "dc1", testAC.originalToken, secondCA, &secondRoots, thirdCert, testAC.extraCerts)
// after the second RPC we now will use the new certs validity period in the next run loop iteration
testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once()
// now that all the mocks are set up we can trigger the whole thing by sending the second expired cert
// as a cache update event.
req := cachetype.ConnectCALeafRequest{
Datacenter: "dc1",
Agent: "autoconf",
Token: testAC.originalToken,
DNSSAN: defaultDNSSANs,
IPSAN: defaultIPSANs,
}
require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
CorrelationID: leafWatchID,
Result: secondCert,
Meta: cache.ResultMeta{
Index: secondCert.ModifyIndex,
},
}))
// wait for the TLS certificates to get updated
require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time")
// now wait for the fallback routine to be invoked
require.True(t, waitForChans(100*time.Millisecond, fallbackCtx.Done()), "fallback routines did not get invoked within the alotted time")
}

View File

@ -1,30 +0,0 @@
package autoconf
import (
"github.com/hashicorp/consul/agent/config"
)
// LoadConfig will build the configuration including the extraHead source injected
// after all other defaults but before any user supplied configuration and the overrides
// source injected as the final source in the configuration parsing chain.
func LoadConfig(builderOpts config.BuilderOpts, extraHead config.Source, overrides ...config.Source) (*config.RuntimeConfig, []string, error) {
b, err := config.NewBuilder(builderOpts)
if err != nil {
return nil, nil, err
}
if extraHead.Data != "" {
b.Head = append(b.Head, extraHead)
}
if len(overrides) != 0 {
b.Tail = append(b.Tail, overrides...)
}
cfg, err := b.BuildAndValidate()
if err != nil {
return nil, nil, err
}
return &cfg, b.Warnings, nil
}

View File

@ -3,9 +3,12 @@ package autoconf
import (
"context"
"net"
"time"
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-hclog"
)
@ -18,12 +21,35 @@ type DirectRPC interface {
RPC(dc string, node string, addr net.Addr, method string, args interface{}, reply interface{}) error
}
// CertMonitor is the interface that needs to be satisfied for AutoConfig to be able to
// setup monitoring of the Connect TLS certificate after we first get it.
type CertMonitor interface {
Update(*structs.SignedResponse) error
Start(context.Context) (<-chan struct{}, error)
Stop() bool
// Cache is an interface to represent the methods of the
// agent/cache.Cache struct that we care about
type Cache interface {
Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error
Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error
}
// ServerProvider is an interface that can be used to find one server in the local DC known to
// the agent via Gossip
type ServerProvider interface {
FindLANServer() *metadata.Server
}
// TLSConfigurator is an interface of the methods on the tlsutil.Configurator that we will require at
// runtime.
type TLSConfigurator interface {
UpdateAutoTLS(manualCAPEMs, connectCAPEMs []string, pub, priv string, verifyServerHostname bool) error
UpdateAutoTLSCA([]string) error
UpdateAutoTLSCert(pub, priv string) error
AutoEncryptCertNotAfter() time.Time
AutoEncryptCertExpired() bool
}
// TokenStore is an interface of the methods we will need to use from the token.Store.
type TokenStore interface {
AgentToken() string
UpdateAgentToken(secret string, source token.TokenSource) bool
Notify(kind token.TokenKind) token.Notifier
StopNotify(notifier token.Notifier)
}
// Config contains all the tunables for AutoConfig
@ -37,16 +63,9 @@ type Config struct {
// configuration. Setting this field is required.
DirectRPC DirectRPC
// BuilderOpts are any configuration building options that should be
// used when loading the Consul configuration. This is mostly a pass
// through from what the CLI will generate. While this option is
// not strictly required, not setting it will prevent AutoConfig
// from doing anything useful. Enabling AutoConfig requires a
// CLI flag or a config file (also specified by the CLI) flag.
// So without providing the opts its equivalent to using the
// configuration of not specifying anything to the consul agent
// cli.
BuilderOpts config.BuilderOpts
// ServerProvider is the interfaced to be used by AutoConfig to find any
// known servers during fallback operations.
ServerProvider ServerProvider
// Waiter is a RetryWaiter to be used during retrieval of the
// initial configuration. When a round of requests fails we will
@ -60,56 +79,28 @@ type Config struct {
// having the test take minutes/hours to complete.
Waiter *lib.RetryWaiter
// Overrides are a list of configuration sources to append to the tail of
// the config builder. This field is optional and mainly useful for testing
// to override values that would be otherwise not user-settable.
Overrides []config.Source
// Loader merges source with the existing FileSources and returns the complete
// RuntimeConfig.
Loader func(source config.Source) (cfg *config.RuntimeConfig, warnings []string, err error)
// CertMonitor is the Connect TLS Certificate Monitor to be used for ongoing
// certificate renewals and connect CA roots updates. This field is not
// strictly required but if not provided the TLS certificates retrieved
// through by the AutoConfig.InitialConfiguration RPC will not be used
// or renewed.
CertMonitor CertMonitor
}
// TLSConfigurator is the shared TLS Configurator. AutoConfig will update the
// auto encrypt/auto config certs as they are renewed.
TLSConfigurator TLSConfigurator
// WithLogger will cause the created AutoConfig type to use the provided logger
func (c *Config) WithLogger(logger hclog.Logger) *Config {
c.Logger = logger
return c
}
// Cache is an object implementing our Cache interface. The Cache
// used at runtime must be able to handle Roots and Leaf Cert watches
Cache Cache
// WithConnectionPool will cause the created AutoConfig type to use the provided connection pool
func (c *Config) WithDirectRPC(directRPC DirectRPC) *Config {
c.DirectRPC = directRPC
return c
}
// FallbackLeeway is the amount of time after certificate expiration before
// invoking the fallback routine. If not set this will default to 10s.
FallbackLeeway time.Duration
// WithBuilderOpts will cause the created AutoConfig type to use the provided CLI builderOpts
func (c *Config) WithBuilderOpts(builderOpts config.BuilderOpts) *Config {
c.BuilderOpts = builderOpts
return c
}
// FallbackRetry is the duration between Fallback invocations when the configured
// fallback routine returns an error. If not set this will default to 1m.
FallbackRetry time.Duration
// WithRetryWaiter will cause the created AutoConfig type to use the provided retry waiter
func (c *Config) WithRetryWaiter(waiter *lib.RetryWaiter) *Config {
c.Waiter = waiter
return c
}
// WithOverrides is used to provide a config source to append to the tail sources
// during config building. It is really only useful for testing to tune non-user
// configurable tunables to make various tests converge more quickly than they
// could otherwise.
func (c *Config) WithOverrides(overrides ...config.Source) *Config {
c.Overrides = overrides
return c
}
// WithCertMonitor is used to provide a certificate monitor to the auto-config.
// This monitor is responsible for renewing the agents TLS certificate and keeping
// the connect CA roots up to date.
func (c *Config) WithCertMonitor(certMonitor CertMonitor) *Config {
c.CertMonitor = certMonitor
return c
// Tokens is the shared token store. It is used to retrieve the current
// agent token as well as getting notifications when that token is updated.
// This field is required.
Tokens TokenStore
}

View File

@ -3,6 +3,7 @@ package autoconf
import (
"fmt"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/proto"
"github.com/hashicorp/consul/proto/pbautoconf"
@ -19,151 +20,84 @@ import (
//
// Why is this function not in the proto/pbconfig package? The answer, that
// package cannot import the agent/config package without running into import cycles.
//
// If this function is meant to output an agent/config.Config then why does it output
// a map[string]interface{}? The answer is that our config and command line option
// parsing is messed up and it would require major changes to fix (we probably should
// do them but not for the auto-config feature). To understand this we need to work
// backwards. What we want to be able to do is persist the config settings from an
// auto-config response configuration to disk. We then want that configuration
// to be able to be parsed with the normal configuration parser/builder. It sort of was
// working with returning a filled out agent/config.Config but the problem was that
// the struct has a lot of non-pointer struct members. Thus, JSON serializtion caused
// these to always be emitted even if they contained no non-empty fields. The
// configuration would then seem to parse okay, but in OSS we would get warnings for
// setting a bunch of enterprise fields like "audit" at the top level. In an attempt
// to quiet those warnings, I had converted all the existing non-pointer struct fields
// to pointers. Then there were issues with the builder code expecting concrete values.
// I could add nil checks **EVERYWHERE** in builder.go or take a different approach.
// I then made a function utilizing github.com/mitchellh/reflectwalk to un-nil all the
// struct pointers after parsing to prevent any nil pointer dereferences. At first
// glance this seemed like it was going to work but then I saw that nearly all of the
// tests in runtime_test.go were failing. The first issues was that we were not merging
// pointers to struct fields properly. It was simply taking the new pointer if non-nil
// and defaulting to the original. So I updated that code, to properly merge pointers
// to structs. That fixed a bunch of tests but then there was another issue with
// the runtime tests where it was emitting warnings for using consul enterprise only
// configuration. After spending some time tracking this down it turns out that it
// was coming from our CLI option parsing. Our CLI option parsing works by filling
// in a agent/config.Config struct. Along the way when converting to pointers to
// structs I had to add a call to that function to un-nil various pointers to prevent
// the CLI from segfaulting. However this un-nil operation was causing the various
// enterprise only keys to be materialized. Thus we were back to where we were before
// the conversion to pointers to structs and mostly stuck.
//
// Therefore, this function will create a map[string]interface{} that should be
// compatible with the agent/config.Config struct but where we can more tightly
// control which fields are output. Its not a nice solution. It has a non-trivial
// maintenance burden. In the long run we should unify the protobuf Config and
// the normal agent/config.Config so that we can just serialize the protobuf version
// without any translation. For now, this hack is necessary :(
func translateConfig(c *pbconfig.Config) map[string]interface{} {
out := map[string]interface{}{
"datacenter": c.Datacenter,
"primary_datacenter": c.PrimaryDatacenter,
"node_name": c.NodeName,
}
func translateConfig(c *pbconfig.Config) config.Config {
result := config.Config{
Datacenter: stringPtrOrNil(c.Datacenter),
PrimaryDatacenter: stringPtrOrNil(c.PrimaryDatacenter),
NodeName: stringPtrOrNil(c.NodeName),
// only output the SegmentName in the configuration if its non-empty
// this will avoid a warning later when parsing the persisted configuration
if c.SegmentName != "" {
out["segment"] = c.SegmentName
SegmentName: stringPtrOrNil(c.SegmentName),
}
// Translate Auto Encrypt settings
if a := c.AutoEncrypt; a != nil {
autoEncryptConfig := map[string]interface{}{
"tls": a.TLS,
"allow_tls": a.AllowTLS,
result.AutoEncrypt = config.AutoEncrypt{
TLS: &a.TLS,
DNSSAN: a.DNSSAN,
IPSAN: a.IPSAN,
AllowTLS: &a.AllowTLS,
}
}
if len(a.DNSSAN) > 0 {
autoEncryptConfig["dns_san"] = a.DNSSAN
}
if len(a.IPSAN) > 0 {
autoEncryptConfig["ip_san"] = a.IPSAN
}
out["auto_encrypt"] = autoEncryptConfig
}
// Translate all the ACL settings
if a := c.ACL; a != nil {
aclConfig := map[string]interface{}{
"enabled": a.Enabled,
"policy_ttl": a.PolicyTTL,
"role_ttl": a.RoleTTL,
"token_ttl": a.TokenTTL,
"down_policy": a.DownPolicy,
"default_policy": a.DefaultPolicy,
"enable_key_list_policy": a.EnableKeyListPolicy,
"disabled_ttl": a.DisabledTTL,
"enable_token_persistence": a.EnableTokenPersistence,
result.ACL = config.ACL{
Enabled: &a.Enabled,
PolicyTTL: stringPtrOrNil(a.PolicyTTL),
RoleTTL: stringPtrOrNil(a.RoleTTL),
TokenTTL: stringPtrOrNil(a.TokenTTL),
DownPolicy: stringPtrOrNil(a.DownPolicy),
DefaultPolicy: stringPtrOrNil(a.DefaultPolicy),
EnableKeyListPolicy: &a.EnableKeyListPolicy,
DisabledTTL: stringPtrOrNil(a.DisabledTTL),
EnableTokenPersistence: &a.EnableTokenPersistence,
}
if t := c.ACL.Tokens; t != nil {
var mspTokens []map[string]string
// create the slice of msp tokens if any
tokens := make([]config.ServiceProviderToken, 0, len(t.ManagedServiceProvider))
for _, mspToken := range t.ManagedServiceProvider {
mspTokens = append(mspTokens, map[string]string{
"accessor_id": mspToken.AccessorID,
"secret_id": mspToken.SecretID,
tokens = append(tokens, config.ServiceProviderToken{
AccessorID: &mspToken.AccessorID,
SecretID: &mspToken.SecretID,
})
}
tokenConfig := make(map[string]interface{})
if t.Master != "" {
tokenConfig["master"] = t.Master
result.ACL.Tokens = config.Tokens{
Master: stringPtrOrNil(t.Master),
Replication: stringPtrOrNil(t.Replication),
AgentMaster: stringPtrOrNil(t.AgentMaster),
Default: stringPtrOrNil(t.Default),
Agent: stringPtrOrNil(t.Agent),
ManagedServiceProvider: tokens,
}
if t.Replication != "" {
tokenConfig["replication"] = t.Replication
}
if t.AgentMaster != "" {
tokenConfig["agent_master"] = t.AgentMaster
}
if t.Default != "" {
tokenConfig["default"] = t.Default
}
if t.Agent != "" {
tokenConfig["agent"] = t.Agent
}
if len(mspTokens) > 0 {
tokenConfig["managed_service_provider"] = mspTokens
}
aclConfig["tokens"] = tokenConfig
}
out["acl"] = aclConfig
}
// Translate the Gossip settings
if g := c.Gossip; g != nil {
out["retry_join"] = g.RetryJoinLAN
result.RetryJoinLAN = g.RetryJoinLAN
// Translate the Gossip Encryption settings
if e := c.Gossip.Encryption; e != nil {
out["encrypt"] = e.Key
out["encrypt_verify_incoming"] = e.VerifyIncoming
out["encrypt_verify_outgoing"] = e.VerifyOutgoing
result.EncryptKey = stringPtrOrNil(e.Key)
result.EncryptVerifyIncoming = &e.VerifyIncoming
result.EncryptVerifyOutgoing = &e.VerifyOutgoing
}
}
// Translate the Generic TLS settings
if t := c.TLS; t != nil {
out["verify_outgoing"] = t.VerifyOutgoing
out["verify_server_hostname"] = t.VerifyServerHostname
if t.MinVersion != "" {
out["tls_min_version"] = t.MinVersion
}
if t.CipherSuites != "" {
out["tls_cipher_suites"] = t.CipherSuites
}
out["tls_prefer_server_cipher_suites"] = t.PreferServerCipherSuites
result.VerifyOutgoing = &t.VerifyOutgoing
result.VerifyServerHostname = &t.VerifyServerHostname
result.TLSMinVersion = stringPtrOrNil(t.MinVersion)
result.TLSCipherSuites = stringPtrOrNil(t.CipherSuites)
result.TLSPreferServerCipherSuites = &t.PreferServerCipherSuites
}
return out
return result
}
func stringPtrOrNil(v string) *string {
if v == "" {
return nil
}
return &v
}
func extractSignedResponse(resp *pbautoconf.AutoConfigResponse) (*structs.SignedResponse, error) {
@ -226,3 +160,34 @@ func mapstructureTranslateToStructs(in interface{}, out interface{}) error {
return decoder.Decode(in)
}
func translateCARootsToProtobuf(in *structs.IndexedCARoots) (*pbconnect.CARoots, error) {
var out pbconnect.CARoots
if err := mapstructureTranslateToProtobuf(in, &out); err != nil {
return nil, fmt.Errorf("Failed to re-encode CA Roots: %w", err)
}
return &out, nil
}
func translateIssuedCertToProtobuf(in *structs.IssuedCert) (*pbconnect.IssuedCert, error) {
var out pbconnect.IssuedCert
if err := mapstructureTranslateToProtobuf(in, &out); err != nil {
return nil, fmt.Errorf("Failed to re-encode CA Roots: %w", err)
}
return &out, nil
}
func mapstructureTranslateToProtobuf(in interface{}, out interface{}) error {
decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
DecodeHook: proto.HookTimeToPBTimestamp,
Result: out,
})
if err != nil {
return err
}
return decoder.Decode(in)
}

View File

@ -1,11 +1,13 @@
package autoconf
import (
"encoding/json"
"fmt"
"testing"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/structs"
pbconfig "github.com/hashicorp/consul/proto/pbconfig"
"github.com/hashicorp/consul/proto/pbconnect"
"github.com/stretchr/testify/require"
)
@ -17,7 +19,39 @@ func boolPointer(b bool) *bool {
return &b
}
func TestConfig_translateConfig(t *testing.T) {
func translateCARootToProtobuf(in *structs.CARoot) (*pbconnect.CARoot, error) {
var out pbconnect.CARoot
if err := mapstructureTranslateToProtobuf(in, &out); err != nil {
return nil, fmt.Errorf("Failed to re-encode CA Roots: %w", err)
}
return &out, nil
}
func mustTranslateCARootToProtobuf(t *testing.T, in *structs.CARoot) *pbconnect.CARoot {
out, err := translateCARootToProtobuf(in)
require.NoError(t, err)
return out
}
func mustTranslateCARootsToStructs(t *testing.T, in *pbconnect.CARoots) *structs.IndexedCARoots {
out, err := translateCARootsToStructs(in)
require.NoError(t, err)
return out
}
func mustTranslateCARootsToProtobuf(t *testing.T, in *structs.IndexedCARoots) *pbconnect.CARoots {
out, err := translateCARootsToProtobuf(in)
require.NoError(t, err)
return out
}
func mustTranslateIssuedCertToProtobuf(t *testing.T, in *structs.IssuedCert) *pbconnect.IssuedCert {
out, err := translateIssuedCertToProtobuf(in)
require.NoError(t, err)
return out
}
func TestTranslateConfig(t *testing.T) {
original := pbconfig.Config{
Datacenter: "abc",
PrimaryDatacenter: "def",
@ -71,7 +105,7 @@ func TestConfig_translateConfig(t *testing.T) {
},
}
expected := &config.Config{
expected := config.Config{
Datacenter: stringPointer("abc"),
PrimaryDatacenter: stringPointer("def"),
NodeName: stringPointer("ghi"),
@ -118,10 +152,11 @@ func TestConfig_translateConfig(t *testing.T) {
}
translated := translateConfig(&original)
data, err := json.Marshal(translated)
require.NoError(t, err)
actual, _, err := config.Parse(string(data), "json")
require.NoError(t, err)
require.Equal(t, expected, &actual)
require.Equal(t, expected, translated)
}
func TestCArootsTranslation(t *testing.T) {
_, indexedRoots, _ := testCerts(t, "autoconf", "dc1")
protoRoots := mustTranslateCARootsToProtobuf(t, indexedRoots)
require.Equal(t, indexedRoots, mustTranslateCARootsToStructs(t, protoRoots))
}

View File

@ -0,0 +1,337 @@
package autoconf
import (
"context"
"net"
"sync"
"testing"
"time"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/proto/pbautoconf"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/stretchr/testify/mock"
)
type mockDirectRPC struct {
mock.Mock
}
func newMockDirectRPC(t *testing.T) *mockDirectRPC {
m := mockDirectRPC{}
m.Test(t)
return &m
}
func (m *mockDirectRPC) RPC(dc string, node string, addr net.Addr, method string, args interface{}, reply interface{}) error {
var retValues mock.Arguments
if method == "AutoConfig.InitialConfiguration" {
req := args.(*pbautoconf.AutoConfigRequest)
csr := req.CSR
req.CSR = ""
retValues = m.Called(dc, node, addr, method, args, reply)
req.CSR = csr
} else if method == "AutoEncrypt.Sign" {
req := args.(*structs.CASignRequest)
csr := req.CSR
req.CSR = ""
retValues = m.Called(dc, node, addr, method, args, reply)
req.CSR = csr
} else {
retValues = m.Called(dc, node, addr, method, args, reply)
}
return retValues.Error(0)
}
type mockTLSConfigurator struct {
mock.Mock
}
func newMockTLSConfigurator(t *testing.T) *mockTLSConfigurator {
m := mockTLSConfigurator{}
m.Test(t)
return &m
}
func (m *mockTLSConfigurator) UpdateAutoTLS(manualCAPEMs, connectCAPEMs []string, pub, priv string, verifyServerHostname bool) error {
if priv != "" {
priv = "redacted"
}
ret := m.Called(manualCAPEMs, connectCAPEMs, pub, priv, verifyServerHostname)
return ret.Error(0)
}
func (m *mockTLSConfigurator) UpdateAutoTLSCA(pems []string) error {
ret := m.Called(pems)
return ret.Error(0)
}
func (m *mockTLSConfigurator) UpdateAutoTLSCert(pub, priv string) error {
if priv != "" {
priv = "redacted"
}
ret := m.Called(pub, priv)
return ret.Error(0)
}
func (m *mockTLSConfigurator) AutoEncryptCertNotAfter() time.Time {
ret := m.Called()
ts, _ := ret.Get(0).(time.Time)
return ts
}
func (m *mockTLSConfigurator) AutoEncryptCertExpired() bool {
ret := m.Called()
return ret.Bool(0)
}
type mockServerProvider struct {
mock.Mock
}
func newMockServerProvider(t *testing.T) *mockServerProvider {
m := mockServerProvider{}
m.Test(t)
return &m
}
func (m *mockServerProvider) FindLANServer() *metadata.Server {
ret := m.Called()
srv, _ := ret.Get(0).(*metadata.Server)
return srv
}
type mockWatcher struct {
ch chan<- cache.UpdateEvent
done <-chan struct{}
}
type mockCache struct {
mock.Mock
lock sync.Mutex
watchers map[string][]mockWatcher
}
func newMockCache(t *testing.T) *mockCache {
m := mockCache{
watchers: make(map[string][]mockWatcher),
}
m.Test(t)
return &m
}
func (m *mockCache) Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error {
ret := m.Called(ctx, t, r, correlationID, ch)
err := ret.Error(0)
if err == nil {
m.lock.Lock()
key := r.CacheInfo().Key
m.watchers[key] = append(m.watchers[key], mockWatcher{ch: ch, done: ctx.Done()})
m.lock.Unlock()
}
return err
}
func (m *mockCache) Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error {
var restore string
cert, ok := result.Value.(*structs.IssuedCert)
if ok {
// we cannot know what the private key is prior to it being injected into the cache.
// therefore redact it here and all mock expectations should take that into account
restore = cert.PrivateKeyPEM
cert.PrivateKeyPEM = "redacted"
}
ret := m.Called(t, result, dc, token, key)
if ok && restore != "" {
cert.PrivateKeyPEM = restore
}
return ret.Error(0)
}
func (m *mockCache) sendNotification(ctx context.Context, key string, u cache.UpdateEvent) bool {
m.lock.Lock()
defer m.lock.Unlock()
watchers, ok := m.watchers[key]
if !ok || len(m.watchers) < 1 {
return false
}
var newWatchers []mockWatcher
for _, watcher := range watchers {
select {
case watcher.ch <- u:
newWatchers = append(newWatchers, watcher)
case <-watcher.done:
// do nothing, this watcher will be removed from the list
case <-ctx.Done():
// return doesn't matter here really, the test is being cancelled
return true
}
}
// this removes any already cancelled watches from being sent to
m.watchers[key] = newWatchers
return true
}
type mockTokenStore struct {
mock.Mock
}
func newMockTokenStore(t *testing.T) *mockTokenStore {
m := mockTokenStore{}
m.Test(t)
return &m
}
func (m *mockTokenStore) AgentToken() string {
ret := m.Called()
return ret.String(0)
}
func (m *mockTokenStore) UpdateAgentToken(secret string, source token.TokenSource) bool {
return m.Called(secret, source).Bool(0)
}
func (m *mockTokenStore) Notify(kind token.TokenKind) token.Notifier {
ret := m.Called(kind)
n, _ := ret.Get(0).(token.Notifier)
return n
}
func (m *mockTokenStore) StopNotify(notifier token.Notifier) {
m.Called(notifier)
}
type mockedConfig struct {
Config
directRPC *mockDirectRPC
serverProvider *mockServerProvider
cache *mockCache
tokens *mockTokenStore
tlsCfg *mockTLSConfigurator
}
func newMockedConfig(t *testing.T) *mockedConfig {
directRPC := newMockDirectRPC(t)
serverProvider := newMockServerProvider(t)
mcache := newMockCache(t)
tokens := newMockTokenStore(t)
tlsCfg := newMockTLSConfigurator(t)
// I am not sure it is well defined behavior but in testing it
// out it does appear like Cleanup functions can fail tests
// Adding in the mock expectations assertions here saves us
// a bunch of code in the other test functions.
t.Cleanup(func() {
if !t.Failed() {
directRPC.AssertExpectations(t)
serverProvider.AssertExpectations(t)
mcache.AssertExpectations(t)
tokens.AssertExpectations(t)
tlsCfg.AssertExpectations(t)
}
})
return &mockedConfig{
Config: Config{
DirectRPC: directRPC,
ServerProvider: serverProvider,
Cache: mcache,
Tokens: tokens,
TLSConfigurator: tlsCfg,
Logger: testutil.Logger(t),
},
directRPC: directRPC,
serverProvider: serverProvider,
cache: mcache,
tokens: tokens,
tlsCfg: tlsCfg,
}
}
func (m *mockedConfig) expectInitialTLS(t *testing.T, agentName, datacenter, token string, ca *structs.CARoot, indexedRoots *structs.IndexedCARoots, cert *structs.IssuedCert, extraCerts []string) {
var pems []string
for _, root := range indexedRoots.Roots {
pems = append(pems, root.RootCert)
}
// we should update the TLS configurator with the proper certs
m.tlsCfg.On("UpdateAutoTLS",
extraCerts,
pems,
cert.CertPEM,
// auto-config handles the CSR and Key so our tests don't have
// a way to know that the key is correct or not. We do replace
// a non empty PEM with "redacted" so we can ensure that some
// certificate is being sent
"redacted",
true,
).Return(nil).Once()
rootRes := cache.FetchResult{Value: indexedRoots, Index: indexedRoots.QueryMeta.Index}
rootsReq := structs.DCSpecificRequest{Datacenter: datacenter}
// we should prepopulate the cache with the CA roots
m.cache.On("Prepopulate",
cachetype.ConnectCARootName,
rootRes,
datacenter,
"",
rootsReq.CacheInfo().Key,
).Return(nil).Once()
leafReq := cachetype.ConnectCALeafRequest{
Token: token,
Agent: agentName,
Datacenter: datacenter,
}
// copy the cert and redact the private key for the mock expectation
// the actual private key will not correspond to the cert but thats
// because AutoConfig is generated a key/csr internally and sending that
// on up with the request.
copy := *cert
copy.PrivateKeyPEM = "redacted"
leafRes := cache.FetchResult{
Value: &copy,
Index: copy.RaftIndex.ModifyIndex,
State: cachetype.ConnectCALeafSuccess(ca.SigningKeyID),
}
// we should prepopulate the cache with the agents cert
m.cache.On("Prepopulate",
cachetype.ConnectCALeafName,
leafRes,
datacenter,
token,
leafReq.Key(),
).Return(nil).Once()
// when prepopulating the cert in the cache we grab the token so
// we should expec that here
m.tokens.On("AgentToken").Return(token).Once()
}
func (m *mockedConfig) setupInitialTLS(t *testing.T, agentName, datacenter, token string) (*structs.IndexedCARoots, *structs.IssuedCert, []string) {
ca, indexedRoots, cert := testCerts(t, agentName, datacenter)
ca2 := connect.TestCA(t, nil)
extraCerts := []string{ca2.RootCert}
m.expectInitialTLS(t, agentName, datacenter, token, ca, indexedRoots, cert, extraCerts)
return indexedRoots, cert, extraCerts
}

View File

@ -0,0 +1,86 @@
package autoconf
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"github.com/golang/protobuf/jsonpb"
"github.com/hashicorp/consul/proto/pbautoconf"
)
const (
// autoConfigFileName is the name of the file that the agent auto-config settings are
// stored in within the data directory
autoConfigFileName = "auto-config.json"
)
var (
pbMarshaler = &jsonpb.Marshaler{
OrigName: false,
EnumsAsInts: false,
Indent: " ",
EmitDefaults: true,
}
pbUnmarshaler = &jsonpb.Unmarshaler{
AllowUnknownFields: false,
}
)
func (ac *AutoConfig) readPersistedAutoConfig() (*pbautoconf.AutoConfigResponse, error) {
if ac.config.DataDir == "" {
// no data directory means we don't have anything to potentially load
return nil, nil
}
path := filepath.Join(ac.config.DataDir, autoConfigFileName)
ac.logger.Debug("attempting to restore any persisted configuration", "path", path)
content, err := ioutil.ReadFile(path)
if err == nil {
rdr := strings.NewReader(string(content))
var resp pbautoconf.AutoConfigResponse
if err := pbUnmarshaler.Unmarshal(rdr, &resp); err != nil {
return nil, fmt.Errorf("failed to decode persisted auto-config data: %w", err)
}
ac.logger.Info("read persisted configuration", "path", path)
return &resp, nil
}
if !os.IsNotExist(err) {
return nil, fmt.Errorf("failed to load %s: %w", path, err)
}
// ignore non-existence errors as that is an indicator that we haven't
// performed the auto configuration before
return nil, nil
}
func (ac *AutoConfig) persistAutoConfig(resp *pbautoconf.AutoConfigResponse) error {
// now that we know the configuration is generally fine including TLS certs go ahead and persist it to disk.
if ac.config.DataDir == "" {
ac.logger.Debug("not persisting auto-config settings because there is no data directory")
return nil
}
serialized, err := pbMarshaler.MarshalToString(resp)
if err != nil {
return fmt.Errorf("failed to encode auto-config response as JSON: %w", err)
}
path := filepath.Join(ac.config.DataDir, autoConfigFileName)
err = ioutil.WriteFile(path, []byte(serialized), 0660)
if err != nil {
return fmt.Errorf("failed to write auto-config configurations: %w", err)
}
ac.logger.Debug("auto-config settings were persisted to disk")
return nil
}

192
agent/auto-config/run.go Normal file
View File

@ -0,0 +1,192 @@
package autoconf
import (
"context"
"fmt"
"time"
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/structs"
)
// handleCacheEvent is used to handle event notifications from the cache for the roots
// or leaf cert watches.
func (ac *AutoConfig) handleCacheEvent(u cache.UpdateEvent) error {
switch u.CorrelationID {
case rootsWatchID:
ac.logger.Debug("roots watch fired - updating CA certificates")
if u.Err != nil {
return fmt.Errorf("root watch returned an error: %w", u.Err)
}
roots, ok := u.Result.(*structs.IndexedCARoots)
if !ok {
return fmt.Errorf("invalid type for roots watch response: %T", u.Result)
}
return ac.updateCARoots(roots)
case leafWatchID:
ac.logger.Debug("leaf certificate watch fired - updating TLS certificate")
if u.Err != nil {
return fmt.Errorf("leaf watch returned an error: %w", u.Err)
}
leaf, ok := u.Result.(*structs.IssuedCert)
if !ok {
return fmt.Errorf("invalid type for agent leaf cert watch response: %T", u.Result)
}
return ac.updateLeafCert(leaf)
}
return nil
}
// handleTokenUpdate is used when a notification about the agent token being updated
// is received and various watches need cancelling/restarting to use the new token.
func (ac *AutoConfig) handleTokenUpdate(ctx context.Context) error {
ac.logger.Debug("Agent token updated - resetting watches")
// TODO (autoencrypt) Prepopulate the cache with the new token with
// the existing cache entry with the old token. The certificate doesn't
// need to change just because the token has. However there isn't a
// good way to make that happen and this behavior is benign enough
// that I am going to push off implementing it.
// the agent token has been updated so we must update our leaf cert watch.
// this cancels the current watches before setting up new ones
ac.cancelWatches()
// recreate the chan for cache updates. This is a precautionary measure to ensure
// that we don't accidentally get notified for the new watches being setup before
// a blocking query in the cache returns and sends data to the old chan. In theory
// the code in agent/cache/watch.go should prevent this where we specifically check
// for context cancellation prior to sending the event. However we could cancel
// it after that check and finish setting up the new watches before getting the old
// events. Both the go routine scheduler and the OS thread scheduler would have to
// be acting up for this to happen. Regardless the way to ensure we don't get events
// for the old watches is to simply replace the chan we are expecting them from.
close(ac.cacheUpdates)
ac.cacheUpdates = make(chan cache.UpdateEvent, 10)
// restart watches - this will be done with the correct token
cancelWatches, err := ac.setupCertificateCacheWatches(ctx)
if err != nil {
return fmt.Errorf("failed to restart watches after agent token update: %w", err)
}
ac.cancelWatches = cancelWatches
return nil
}
// handleFallback is used when the current TLS certificate has expired and the normal
// updating mechanisms have failed to renew it quickly enough. This function will
// use the configured fallback mechanism to retrieve a new cert and start monitoring
// that one.
func (ac *AutoConfig) handleFallback(ctx context.Context) error {
ac.logger.Warn("agent's client certificate has expired")
// Background because the context is mainly useful when the agent is first starting up.
switch {
case ac.config.AutoConfig.Enabled:
resp, err := ac.getInitialConfiguration(ctx)
if err != nil {
return fmt.Errorf("error while retrieving new agent certificates via auto-config: %w", err)
}
return ac.recordInitialConfiguration(resp)
case ac.config.AutoEncryptTLS:
reply, err := ac.autoEncryptInitialCerts(ctx)
if err != nil {
return fmt.Errorf("error while retrieving new agent certificate via auto-encrypt: %w", err)
}
return ac.setInitialTLSCertificates(reply)
default:
return fmt.Errorf("logic error: either auto-encrypt or auto-config must be enabled")
}
}
// run is the private method to be spawn by the Start method for
// executing the main monitoring loop.
func (ac *AutoConfig) run(ctx context.Context, exit chan struct{}) {
// The fallbackTimer is used to notify AFTER the agents
// leaf certificate has expired and where we need
// to fall back to the less secure RPC endpoint just like
// if the agent was starting up new.
//
// Check 10sec (fallback leeway duration) after cert
// expires. The agent cache should be handling the expiration
// and renew it before then.
//
// If there is no cert, AutoEncryptCertNotAfter returns
// a value in the past which immediately triggers the
// renew, but this case shouldn't happen because at
// this point, auto_encrypt was just being setup
// successfully.
calcFallbackInterval := func() time.Duration {
certExpiry := ac.acConfig.TLSConfigurator.AutoEncryptCertNotAfter()
return certExpiry.Add(ac.acConfig.FallbackLeeway).Sub(time.Now())
}
fallbackTimer := time.NewTimer(calcFallbackInterval())
// cleanup for once we are stopped
defer func() {
// cancel the go routines performing the cache watches
ac.cancelWatches()
// ensure we don't leak the timers go routine
fallbackTimer.Stop()
// stop receiving notifications for token updates
ac.acConfig.Tokens.StopNotify(ac.tokenUpdates)
ac.logger.Debug("auto-config has been stopped")
ac.Lock()
ac.cancel = nil
ac.running = false
// this should be the final cleanup task as its what notifies
// the rest of the world that this go routine has exited.
close(exit)
ac.Unlock()
}()
for {
select {
case <-ctx.Done():
ac.logger.Debug("stopping auto-config")
return
case <-ac.tokenUpdates.Ch:
ac.logger.Debug("handling a token update event")
if err := ac.handleTokenUpdate(ctx); err != nil {
ac.logger.Error("error in handling token update event", "error", err)
}
case u := <-ac.cacheUpdates:
ac.logger.Debug("handling a cache update event", "correlation_id", u.CorrelationID)
if err := ac.handleCacheEvent(u); err != nil {
ac.logger.Error("error in handling cache update event", "error", err)
}
// reset the fallback timer as the certificate may have been updated
fallbackTimer.Stop()
fallbackTimer = time.NewTimer(calcFallbackInterval())
case <-fallbackTimer.C:
// This is a safety net in case the cert doesn't get renewed
// in time. The agent would be stuck in that case because the watches
// never use the AutoEncrypt.Sign endpoint.
// check auto encrypt client cert expiration
if ac.acConfig.TLSConfigurator.AutoEncryptCertExpired() {
if err := ac.handleFallback(ctx); err != nil {
ac.logger.Error("error when handling a certificate expiry event", "error", err)
fallbackTimer = time.NewTimer(ac.acConfig.FallbackRetry)
} else {
fallbackTimer = time.NewTimer(calcFallbackInterval())
}
} else {
// this shouldn't be possible. We calculate the timer duration to be the certificate
// expiration time + some leeway (10s default). So whenever we get here the certificate
// should be expired. Regardless its probably worth resetting the timer.
fallbackTimer = time.NewTimer(calcFallbackInterval())
}
}
}
}

View File

@ -0,0 +1,111 @@
package autoconf
import (
"fmt"
"net"
"strconv"
"strings"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-discover"
discoverk8s "github.com/hashicorp/go-discover/provider/k8s"
"github.com/hashicorp/go-hclog"
)
func (ac *AutoConfig) discoverServers(servers []string) ([]string, error) {
providers := make(map[string]discover.Provider)
for k, v := range discover.Providers {
providers[k] = v
}
providers["k8s"] = &discoverk8s.Provider{}
disco, err := discover.New(
discover.WithUserAgent(lib.UserAgent()),
discover.WithProviders(providers),
)
if err != nil {
return nil, fmt.Errorf("Failed to create go-discover resolver: %w", err)
}
var addrs []string
for _, addr := range servers {
switch {
case strings.Contains(addr, "provider="):
resolved, err := disco.Addrs(addr, ac.logger.StandardLogger(&hclog.StandardLoggerOptions{InferLevels: true}))
if err != nil {
ac.logger.Error("failed to resolve go-discover auto-config servers", "configuration", addr, "err", err)
continue
}
addrs = append(addrs, resolved...)
ac.logger.Debug("discovered auto-config servers", "servers", resolved)
default:
addrs = append(addrs, addr)
}
}
return addrs, nil
}
// autoConfigHosts is responsible for taking the list of server addresses
// and resolving any go-discover provider invocations. It will then return
// a list of hosts. These might be hostnames and is expected that DNS resolution
// may be performed after this function runs. Additionally these may contain
// ports so SplitHostPort could also be necessary.
func (ac *AutoConfig) autoConfigHosts() ([]string, error) {
// use servers known to gossip if there are any
if ac.acConfig.ServerProvider != nil {
if srv := ac.acConfig.ServerProvider.FindLANServer(); srv != nil {
return []string{srv.Addr.String()}, nil
}
}
addrs, err := ac.discoverServers(ac.config.AutoConfig.ServerAddresses)
if err != nil {
return nil, err
}
if len(addrs) == 0 {
return nil, fmt.Errorf("no auto-config server addresses available for use")
}
return addrs, nil
}
// resolveHost will take a single host string and convert it to a list of TCPAddrs
// This will process any port in the input as well as looking up the hostname using
// normal DNS resolution.
func (ac *AutoConfig) resolveHost(hostPort string) []net.TCPAddr {
port := ac.config.ServerPort
host, portStr, err := net.SplitHostPort(hostPort)
if err != nil {
if strings.Contains(err.Error(), "missing port in address") {
host = hostPort
} else {
ac.logger.Warn("error splitting host address into IP and port", "address", hostPort, "error", err)
return nil
}
} else {
port, err = strconv.Atoi(portStr)
if err != nil {
ac.logger.Warn("Parsed port is not an integer", "port", portStr, "error", err)
return nil
}
}
// resolve the host to a list of IPs
ips, err := net.LookupIP(host)
if err != nil {
ac.logger.Warn("IP resolution failed", "host", host, "error", err)
return nil
}
var addrs []net.TCPAddr
for _, ip := range ips {
addrs = append(addrs, net.TCPAddr{IP: ip, Port: port})
}
return addrs
}

280
agent/auto-config/tls.go Normal file
View File

@ -0,0 +1,280 @@
package autoconf
import (
"context"
"fmt"
"net"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/proto/pbautoconf"
)
const (
// ID of the roots watch
rootsWatchID = "roots"
// ID of the leaf watch
leafWatchID = "leaf"
unknownTrustDomain = "unknown"
)
var (
defaultDNSSANs = []string{"localhost"}
defaultIPSANs = []net.IP{{127, 0, 0, 1}, net.ParseIP("::1")}
)
func extractPEMs(roots *structs.IndexedCARoots) []string {
var pems []string
for _, root := range roots.Roots {
pems = append(pems, root.RootCert)
}
return pems
}
// updateTLSFromResponse will update the TLS certificate and roots in the shared
// TLS configurator.
func (ac *AutoConfig) updateTLSFromResponse(resp *pbautoconf.AutoConfigResponse) error {
var pems []string
for _, root := range resp.GetCARoots().GetRoots() {
pems = append(pems, root.RootCert)
}
err := ac.acConfig.TLSConfigurator.UpdateAutoTLS(
resp.ExtraCACertificates,
pems,
resp.Certificate.GetCertPEM(),
resp.Certificate.GetPrivateKeyPEM(),
resp.Config.GetTLS().GetVerifyServerHostname(),
)
if err != nil {
return fmt.Errorf("Failed to update the TLS configurator with new certificates: %w", err)
}
return nil
}
func (ac *AutoConfig) setInitialTLSCertificates(certs *structs.SignedResponse) error {
if certs == nil {
return nil
}
if err := ac.populateCertificateCache(certs); err != nil {
return fmt.Errorf("error populating cache with certificates: %w", err)
}
connectCAPems := extractPEMs(&certs.ConnectCARoots)
err := ac.acConfig.TLSConfigurator.UpdateAutoTLS(
certs.ManualCARoots,
connectCAPems,
certs.IssuedCert.CertPEM,
certs.IssuedCert.PrivateKeyPEM,
certs.VerifyServerHostname,
)
if err != nil {
return fmt.Errorf("error updating TLS configurator with certificates: %w", err)
}
return nil
}
func (ac *AutoConfig) populateCertificateCache(certs *structs.SignedResponse) error {
cert, err := connect.ParseCert(certs.IssuedCert.CertPEM)
if err != nil {
return fmt.Errorf("Failed to parse certificate: %w", err)
}
// prepolutate roots cache
rootRes := cache.FetchResult{Value: &certs.ConnectCARoots, Index: certs.ConnectCARoots.QueryMeta.Index}
rootsReq := ac.caRootsRequest()
// getting the roots doesn't require a token so in order to potentially share the cache with another
if err := ac.acConfig.Cache.Prepopulate(cachetype.ConnectCARootName, rootRes, ac.config.Datacenter, "", rootsReq.CacheInfo().Key); err != nil {
return err
}
leafReq := ac.leafCertRequest()
// prepolutate leaf cache
certRes := cache.FetchResult{
Value: &certs.IssuedCert,
Index: certs.IssuedCert.RaftIndex.ModifyIndex,
State: cachetype.ConnectCALeafSuccess(connect.EncodeSigningKeyID(cert.AuthorityKeyId)),
}
if err := ac.acConfig.Cache.Prepopulate(cachetype.ConnectCALeafName, certRes, leafReq.Datacenter, leafReq.Token, leafReq.Key()); err != nil {
return err
}
return nil
}
func (ac *AutoConfig) setupCertificateCacheWatches(ctx context.Context) (context.CancelFunc, error) {
notificationCtx, cancel := context.WithCancel(ctx)
rootsReq := ac.caRootsRequest()
err := ac.acConfig.Cache.Notify(notificationCtx, cachetype.ConnectCARootName, &rootsReq, rootsWatchID, ac.cacheUpdates)
if err != nil {
cancel()
return nil, err
}
leafReq := ac.leafCertRequest()
err = ac.acConfig.Cache.Notify(notificationCtx, cachetype.ConnectCALeafName, &leafReq, leafWatchID, ac.cacheUpdates)
if err != nil {
cancel()
return nil, err
}
return cancel, nil
}
func (ac *AutoConfig) updateCARoots(roots *structs.IndexedCARoots) error {
switch {
case ac.config.AutoConfig.Enabled:
ac.Lock()
defer ac.Unlock()
var err error
ac.autoConfigResponse.CARoots, err = translateCARootsToProtobuf(roots)
if err != nil {
return err
}
if err := ac.updateTLSFromResponse(ac.autoConfigResponse); err != nil {
return err
}
return ac.persistAutoConfig(ac.autoConfigResponse)
case ac.config.AutoEncryptTLS:
pems := extractPEMs(roots)
if err := ac.acConfig.TLSConfigurator.UpdateAutoTLSCA(pems); err != nil {
return fmt.Errorf("failed to update Connect CA certificates: %w", err)
}
return nil
default:
return nil
}
}
func (ac *AutoConfig) updateLeafCert(cert *structs.IssuedCert) error {
switch {
case ac.config.AutoConfig.Enabled:
ac.Lock()
defer ac.Unlock()
var err error
ac.autoConfigResponse.Certificate, err = translateIssuedCertToProtobuf(cert)
if err != nil {
return err
}
if err := ac.updateTLSFromResponse(ac.autoConfigResponse); err != nil {
return err
}
return ac.persistAutoConfig(ac.autoConfigResponse)
case ac.config.AutoEncryptTLS:
if err := ac.acConfig.TLSConfigurator.UpdateAutoTLSCert(cert.CertPEM, cert.PrivateKeyPEM); err != nil {
return fmt.Errorf("failed to update the agent leaf cert: %w", err)
}
return nil
default:
return nil
}
}
func (ac *AutoConfig) caRootsRequest() structs.DCSpecificRequest {
return structs.DCSpecificRequest{Datacenter: ac.config.Datacenter}
}
func (ac *AutoConfig) leafCertRequest() cachetype.ConnectCALeafRequest {
return cachetype.ConnectCALeafRequest{
Datacenter: ac.config.Datacenter,
Agent: ac.config.NodeName,
DNSSAN: ac.getDNSSANs(),
IPSAN: ac.getIPSANs(),
Token: ac.acConfig.Tokens.AgentToken(),
}
}
// generateCSR will generate a CSR for an Agent certificate. This should
// be sent along with the AutoConfig.InitialConfiguration RPC or the
// AutoEncrypt.Sign RPC. The generated CSR does NOT have a real trust domain
// as when generating this we do not yet have the CA roots. The server will
// update the trust domain for us though.
func (ac *AutoConfig) generateCSR() (csr string, key string, err error) {
// We don't provide the correct host here, because we don't know any
// better at this point. Apart from the domain, we would need the
// ClusterID, which we don't have. This is why we go with
// unknownTrustDomain the first time. Subsequent CSRs will have the
// correct TrustDomain.
id := &connect.SpiffeIDAgent{
// will be replaced
Host: unknownTrustDomain,
Datacenter: ac.config.Datacenter,
Agent: ac.config.NodeName,
}
caConfig, err := ac.config.ConnectCAConfiguration()
if err != nil {
return "", "", fmt.Errorf("Cannot generate CSR: %w", err)
}
conf, err := caConfig.GetCommonConfig()
if err != nil {
return "", "", fmt.Errorf("Failed to load common CA configuration: %w", err)
}
if conf.PrivateKeyType == "" {
conf.PrivateKeyType = connect.DefaultPrivateKeyType
}
if conf.PrivateKeyBits == 0 {
conf.PrivateKeyBits = connect.DefaultPrivateKeyBits
}
// Create a new private key
pk, pkPEM, err := connect.GeneratePrivateKeyWithConfig(conf.PrivateKeyType, conf.PrivateKeyBits)
if err != nil {
return "", "", fmt.Errorf("Failed to generate private key: %w", err)
}
dnsNames := ac.getDNSSANs()
ipAddresses := ac.getIPSANs()
// Create a CSR.
//
// The Common Name includes the dummy trust domain for now but Server will
// override this when it is signed anyway so it's OK.
cn := connect.AgentCN(ac.config.NodeName, unknownTrustDomain)
csr, err = connect.CreateCSR(id, cn, pk, dnsNames, ipAddresses)
if err != nil {
return "", "", err
}
return csr, pkPEM, nil
}
func (ac *AutoConfig) getDNSSANs() []string {
sans := defaultDNSSANs
switch {
case ac.config.AutoConfig.Enabled:
sans = append(sans, ac.config.AutoConfig.DNSSANs...)
case ac.config.AutoEncryptTLS:
sans = append(sans, ac.config.AutoEncryptDNSSAN...)
}
return sans
}
func (ac *AutoConfig) getIPSANs() []net.IP {
sans := defaultIPSANs
switch {
case ac.config.AutoConfig.Enabled:
sans = append(sans, ac.config.AutoConfig.IPSANs...)
case ac.config.AutoEncryptTLS:
sans = append(sans, ac.config.AutoEncryptIPSAN...)
}
return sans
}

View File

@ -0,0 +1,56 @@
package autoconf
import (
"testing"
"time"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/stretchr/testify/require"
)
func newLeaf(t *testing.T, agentName, datacenter string, ca *structs.CARoot, idx uint64, expiration time.Duration) *structs.IssuedCert {
t.Helper()
pub, priv, err := connect.TestAgentLeaf(t, agentName, datacenter, ca, expiration)
require.NoError(t, err)
cert, err := connect.ParseCert(pub)
require.NoError(t, err)
spiffeID, err := connect.ParseCertURI(cert.URIs[0])
require.NoError(t, err)
agentID, ok := spiffeID.(*connect.SpiffeIDAgent)
require.True(t, ok, "certificate doesn't have an agent leaf cert URI")
return &structs.IssuedCert{
SerialNumber: cert.SerialNumber.String(),
CertPEM: pub,
PrivateKeyPEM: priv,
ValidAfter: cert.NotBefore,
ValidBefore: cert.NotAfter,
Agent: agentID.Agent,
AgentURI: agentID.URI().String(),
EnterpriseMeta: *structs.DefaultEnterpriseMeta(),
RaftIndex: structs.RaftIndex{
CreateIndex: idx,
ModifyIndex: idx,
},
}
}
func testCerts(t *testing.T, agentName, datacenter string) (*structs.CARoot, *structs.IndexedCARoots, *structs.IssuedCert) {
ca := connect.TestCA(t, nil)
ca.IntermediateCerts = make([]string, 0)
cert := newLeaf(t, agentName, datacenter, ca, 1, 10*time.Minute)
indexedRoots := structs.IndexedCARoots{
ActiveRootID: ca.ID,
TrustDomain: connect.TestClusterID,
Roots: []*structs.CARoot{
ca,
},
QueryMeta: structs.QueryMeta{Index: 1},
}
return ca, &indexedRoots, cert
}

File diff suppressed because one or more lines are too long

View File

@ -60,7 +60,7 @@ func (c *ServiceHTTPChecks) Fetch(opts cache.FetchOptions, req cache.Request) (c
sid := structs.NewServiceID(reqReal.ServiceID, &reqReal.EnterpriseMeta)
svcState := c.Agent.LocalState().ServiceState(sid)
if svcState == nil {
return "", result, fmt.Errorf("Internal cache failure: service '%s' not in agent state", reqReal.ServiceID)
return "", nil, fmt.Errorf("Internal cache failure: service '%s' not in agent state", reqReal.ServiceID)
}
// WatchCh will receive updates on service (de)registrations and check (de)registrations
@ -70,7 +70,7 @@ func (c *ServiceHTTPChecks) Fetch(opts cache.FetchOptions, req cache.Request) (c
hash, err := hashChecks(reply)
if err != nil {
return "", result, fmt.Errorf("Internal cache failure: %v", err)
return "", nil, fmt.Errorf("Internal cache failure: %v", err)
}
return hash, reply, nil

38
agent/cache/cache.go vendored
View File

@ -144,16 +144,26 @@ type Options struct {
EntryFetchRate rate.Limit
}
// New creates a new cache with the given RPC client and reasonable defaults.
// Further settings can be tweaked on the returned value.
func New(options Options) *Cache {
// Equal return true if both options are equivalent
func (o Options) Equal(other Options) bool {
return o.EntryFetchMaxBurst == other.EntryFetchMaxBurst && o.EntryFetchRate == other.EntryFetchRate
}
// applyDefaultValuesOnOptions set default values on options and returned updated value
func applyDefaultValuesOnOptions(options Options) Options {
if options.EntryFetchRate == 0.0 {
options.EntryFetchRate = DefaultEntryFetchRate
}
if options.EntryFetchMaxBurst == 0 {
options.EntryFetchMaxBurst = DefaultEntryFetchMaxBurst
}
return options
}
// New creates a new cache with the given RPC client and reasonable defaults.
// Further settings can be tweaked on the returned value.
func New(options Options) *Cache {
options = applyDefaultValuesOnOptions(options)
// Initialize the heap. The buffer of 1 is really important because
// its possible for the expiry loop to trigger the heap to update
// itself and it'd block forever otherwise.
@ -234,6 +244,28 @@ func (c *Cache) RegisterType(n string, typ Type) {
c.types[n] = typeEntry{Name: n, Type: typ, Opts: &opts}
}
// ReloadOptions updates the cache with the new options
// return true if Cache is updated, false if already up to date
func (c *Cache) ReloadOptions(options Options) bool {
options = applyDefaultValuesOnOptions(options)
modified := !options.Equal(c.options)
if modified {
c.entriesLock.RLock()
defer c.entriesLock.RUnlock()
for _, entry := range c.entries {
if c.options.EntryFetchRate != options.EntryFetchRate {
entry.FetchRateLimiter.SetLimit(options.EntryFetchRate)
}
if c.options.EntryFetchMaxBurst != options.EntryFetchMaxBurst {
entry.FetchRateLimiter.SetBurst(options.EntryFetchMaxBurst)
}
}
c.options.EntryFetchRate = options.EntryFetchRate
c.options.EntryFetchMaxBurst = options.EntryFetchMaxBurst
}
return modified
}
// Get loads the data for the given type and request. If data satisfying the
// minimum index is present in the cache, it is returned immediately. Otherwise,
// this will block until the data is available or the request timeout is

View File

@ -14,6 +14,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"golang.org/x/time/rate"
)
// Test a basic Get with no indexes (and therefore no blocking queries).
@ -1220,6 +1221,64 @@ func TestCacheGet_nonBlockingType(t *testing.T) {
typ.AssertExpectations(t)
}
// Test a get with an index set will wait until an index that is higher
// is set in the cache.
func TestCacheReload(t *testing.T) {
t.Parallel()
typ1 := TestType(t)
defer typ1.AssertExpectations(t)
c := New(Options{EntryFetchRate: rate.Limit(1), EntryFetchMaxBurst: 1})
c.RegisterType("t1", typ1)
typ1.Mock.On("Fetch", mock.Anything, mock.Anything).Return(FetchResult{Value: 42, Index: 42}, nil).Maybe()
require.False(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(1), EntryFetchMaxBurst: 1}), "Value should not be reloaded")
_, meta, err := c.Get(context.Background(), "t1", TestRequest(t, RequestInfo{Key: "hello1", MinIndex: uint64(1)}))
require.NoError(t, err)
require.Equal(t, meta.Index, uint64(42))
testEntry := func(t *testing.T, doTest func(t *testing.T, entry cacheEntry)) {
c.entriesLock.Lock()
tEntry, ok := c.types["t1"]
require.True(t, ok)
keyName := makeEntryKey("t1", "", "", "hello1")
ok, entryValid, entry := c.getEntryLocked(tEntry, keyName, RequestInfo{})
require.True(t, ok)
require.True(t, entryValid)
doTest(t, entry)
c.entriesLock.Unlock()
}
testEntry(t, func(t *testing.T, entry cacheEntry) {
require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(1))
require.Equal(t, entry.FetchRateLimiter.Burst(), 1)
})
// Modify only rateLimit
require.True(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(100), EntryFetchMaxBurst: 1}))
testEntry(t, func(t *testing.T, entry cacheEntry) {
require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(100))
require.Equal(t, entry.FetchRateLimiter.Burst(), 1)
})
// Modify only Burst
require.True(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(100), EntryFetchMaxBurst: 5}))
testEntry(t, func(t *testing.T, entry cacheEntry) {
require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(100))
require.Equal(t, entry.FetchRateLimiter.Burst(), 5)
})
// Modify only Burst and Limit at the same time
require.True(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(1000), EntryFetchMaxBurst: 42}))
testEntry(t, func(t *testing.T, entry cacheEntry) {
require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(1000))
require.Equal(t, entry.FetchRateLimiter.Burst(), 42)
})
}
// TestCacheThrottle checks the assumptions for the cache throttling. It sets
// up a cache with Options{EntryFetchRate: 10.0, EntryFetchMaxBurst: 1}, which
// allows for 10req/s, or one request every 100ms.

View File

@ -60,7 +60,7 @@ func TestCacheNotifyChResult(t testing.T, ch <-chan UpdateEvent, expected ...Upd
}
got := make([]UpdateEvent, 0, expectLen)
timeoutCh := time.After(50 * time.Millisecond)
timeoutCh := time.After(75 * time.Millisecond)
OUT:
for {
@ -74,7 +74,7 @@ OUT:
}
case <-timeoutCh:
t.Fatalf("got %d results on chan in 50ms, want %d", len(got), expectLen)
t.Fatalf("timeout while waiting for result: got %d results on chan, want %d", len(got), expectLen)
}
}

View File

@ -258,7 +258,7 @@ func TestCacheNotifyPolling(t *testing.T) {
}
require.Equal(events[0].Result, 42)
require.Equal(events[0].Meta.Hit, false)
require.Equal(events[0].Meta.Hit && events[1].Meta.Hit, false)
require.Equal(events[0].Meta.Index, uint64(1))
require.True(events[0].Meta.Age < 50*time.Millisecond)
require.NoError(events[0].Err)

View File

@ -1,472 +0,0 @@
package certmon
import (
"context"
"fmt"
"io/ioutil"
"sync"
"time"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/go-hclog"
)
const (
// ID of the roots watch
rootsWatchID = "roots"
// ID of the leaf watch
leafWatchID = "leaf"
)
// Cache is an interface to represent the methods of the
// agent/cache.Cache struct that we care about
type Cache interface {
Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error
Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error
}
// CertMonitor will setup the proper watches to ensure that
// the Agent's Connect TLS certificate remains up to date
type CertMonitor struct {
logger hclog.Logger
cache Cache
tlsConfigurator *tlsutil.Configurator
tokens *token.Store
leafReq cachetype.ConnectCALeafRequest
rootsReq structs.DCSpecificRequest
fallback FallbackFunc
fallbackLeeway time.Duration
fallbackRetry time.Duration
l sync.Mutex
running bool
// cancel is used to cancel the entire CertMonitor
// go routine. This is the main field protected
// by the mutex as it being non-nil indicates that
// the go routine has been started and is stoppable.
// note that it doesn't indcate that the go routine
// is currently running.
cancel context.CancelFunc
// cancelWatches is used to cancel the existing
// cache watches. This is mainly only necessary
// when the Agent token changes
cancelWatches context.CancelFunc
// cacheUpdates is the chan used to have the cache
// send us back events
cacheUpdates chan cache.UpdateEvent
// tokenUpdates is the struct used to receive
// events from the token store when the Agent
// token is updated.
tokenUpdates token.Notifier
}
// New creates a new CertMonitor for automatically rotating
// an Agent's Connect Certificate
func New(config *Config) (*CertMonitor, error) {
logger := config.Logger
if logger == nil {
logger = hclog.New(&hclog.LoggerOptions{
Level: 0,
Output: ioutil.Discard,
})
}
if config.FallbackLeeway == 0 {
config.FallbackLeeway = 10 * time.Second
}
if config.FallbackRetry == 0 {
config.FallbackRetry = time.Minute
}
if config.Cache == nil {
return nil, fmt.Errorf("CertMonitor creation requires a Cache")
}
if config.TLSConfigurator == nil {
return nil, fmt.Errorf("CertMonitor creation requires a TLS Configurator")
}
if config.Fallback == nil {
return nil, fmt.Errorf("CertMonitor creation requires specifying a FallbackFunc")
}
if config.Datacenter == "" {
return nil, fmt.Errorf("CertMonitor creation requires specifying the datacenter")
}
if config.NodeName == "" {
return nil, fmt.Errorf("CertMonitor creation requires specifying the agent's node name")
}
if config.Tokens == nil {
return nil, fmt.Errorf("CertMonitor creation requires specifying a token store")
}
return &CertMonitor{
logger: logger,
cache: config.Cache,
tokens: config.Tokens,
tlsConfigurator: config.TLSConfigurator,
fallback: config.Fallback,
fallbackLeeway: config.FallbackLeeway,
fallbackRetry: config.FallbackRetry,
rootsReq: structs.DCSpecificRequest{Datacenter: config.Datacenter},
leafReq: cachetype.ConnectCALeafRequest{
Datacenter: config.Datacenter,
Agent: config.NodeName,
DNSSAN: config.DNSSANs,
IPSAN: config.IPSANs,
},
}, nil
}
// Update is responsible for priming the cache with the certificates
// as well as injecting them into the TLS configurator
func (m *CertMonitor) Update(certs *structs.SignedResponse) error {
if certs == nil {
return nil
}
if err := m.populateCache(certs); err != nil {
return fmt.Errorf("error populating cache with certificates: %w", err)
}
connectCAPems := []string{}
for _, ca := range certs.ConnectCARoots.Roots {
connectCAPems = append(connectCAPems, ca.RootCert)
}
// Note that its expected that the private key be within the IssuedCert in the
// SignedResponse. This isn't how a server would send back the response and requires
// that the recipient of the response who also has access to the private key will
// have filled it in. The Cache definitely does this but auto-encrypt/auto-config
// will need to ensure the original response is setup this way too.
err := m.tlsConfigurator.UpdateAutoTLS(
certs.ManualCARoots,
connectCAPems,
certs.IssuedCert.CertPEM,
certs.IssuedCert.PrivateKeyPEM,
certs.VerifyServerHostname)
if err != nil {
return fmt.Errorf("error updating TLS configurator with certificates: %w", err)
}
return nil
}
// populateCache is responsible for inserting the certificates into the cache
func (m *CertMonitor) populateCache(resp *structs.SignedResponse) error {
cert, err := connect.ParseCert(resp.IssuedCert.CertPEM)
if err != nil {
return fmt.Errorf("Failed to parse certificate: %w", err)
}
// prepolutate roots cache
rootRes := cache.FetchResult{Value: &resp.ConnectCARoots, Index: resp.ConnectCARoots.QueryMeta.Index}
// getting the roots doesn't require a token so in order to potentially share the cache with another
if err := m.cache.Prepopulate(cachetype.ConnectCARootName, rootRes, m.rootsReq.Datacenter, "", m.rootsReq.CacheInfo().Key); err != nil {
return err
}
// copy the template and update the token
leafReq := m.leafReq
leafReq.Token = m.tokens.AgentToken()
// prepolutate leaf cache
certRes := cache.FetchResult{
Value: &resp.IssuedCert,
Index: resp.ConnectCARoots.QueryMeta.Index,
State: cachetype.ConnectCALeafSuccess(connect.EncodeSigningKeyID(cert.AuthorityKeyId)),
}
if err := m.cache.Prepopulate(cachetype.ConnectCALeafName, certRes, leafReq.Datacenter, leafReq.Token, leafReq.Key()); err != nil {
return err
}
return nil
}
// Start spawns the go routine to monitor the certificate and ensure it is
// rotated/renewed as necessary. The chan will indicate once the started
// go routine has exited
func (m *CertMonitor) Start(ctx context.Context) (<-chan struct{}, error) {
m.l.Lock()
defer m.l.Unlock()
if m.running || m.cancel != nil {
return nil, fmt.Errorf("the CertMonitor is already running")
}
// create the top level context to control the go
// routine executing the `run` method
ctx, cancel := context.WithCancel(ctx)
// create the channel to get cache update events through
// really we should only ever get 10 updates
m.cacheUpdates = make(chan cache.UpdateEvent, 10)
// setup the cache watches
cancelWatches, err := m.setupCacheWatches(ctx)
if err != nil {
cancel()
return nil, fmt.Errorf("error setting up cache watches: %w", err)
}
// start the token update notifier
m.tokenUpdates = m.tokens.Notify(token.TokenKindAgent)
// store the cancel funcs
m.cancel = cancel
m.cancelWatches = cancelWatches
m.running = true
exit := make(chan struct{})
go m.run(ctx, exit)
m.logger.Info("certificate monitor started")
return exit, nil
}
// Stop manually stops the go routine spawned by Start and
// returns whether the go routine was still running before
// cancelling.
//
// Note that cancelling the context passed into Start will
// also cause the go routine to stop
func (m *CertMonitor) Stop() bool {
m.l.Lock()
defer m.l.Unlock()
if !m.running {
return false
}
if m.cancel != nil {
m.cancel()
}
return true
}
// IsRunning returns whether the go routine to perform certificate monitoring
// is already running.
func (m *CertMonitor) IsRunning() bool {
m.l.Lock()
defer m.l.Unlock()
return m.running
}
// setupCacheWatches will start both the roots and leaf cert watch with a new child
// context and an up to date ACL token. The watches are started with a new child context
// whose CancelFunc is also returned.
func (m *CertMonitor) setupCacheWatches(ctx context.Context) (context.CancelFunc, error) {
notificationCtx, cancel := context.WithCancel(ctx)
// copy the request
rootsReq := m.rootsReq
err := m.cache.Notify(notificationCtx, cachetype.ConnectCARootName, &rootsReq, rootsWatchID, m.cacheUpdates)
if err != nil {
cancel()
return nil, err
}
// copy the request
leafReq := m.leafReq
leafReq.Token = m.tokens.AgentToken()
err = m.cache.Notify(notificationCtx, cachetype.ConnectCALeafName, &leafReq, leafWatchID, m.cacheUpdates)
if err != nil {
cancel()
return nil, err
}
return cancel, nil
}
// handleCacheEvent is used to handle event notifications from the cache for the roots
// or leaf cert watches.
func (m *CertMonitor) handleCacheEvent(u cache.UpdateEvent) error {
switch u.CorrelationID {
case rootsWatchID:
m.logger.Debug("roots watch fired - updating CA certificates")
if u.Err != nil {
return fmt.Errorf("root watch returned an error: %w", u.Err)
}
roots, ok := u.Result.(*structs.IndexedCARoots)
if !ok {
return fmt.Errorf("invalid type for roots watch response: %T", u.Result)
}
var pems []string
for _, root := range roots.Roots {
pems = append(pems, root.RootCert)
}
if err := m.tlsConfigurator.UpdateAutoTLSCA(pems); err != nil {
return fmt.Errorf("failed to update Connect CA certificates: %w", err)
}
case leafWatchID:
m.logger.Debug("leaf certificate watch fired - updating TLS certificate")
if u.Err != nil {
return fmt.Errorf("leaf watch returned an error: %w", u.Err)
}
leaf, ok := u.Result.(*structs.IssuedCert)
if !ok {
return fmt.Errorf("invalid type for agent leaf cert watch response: %T", u.Result)
}
if err := m.tlsConfigurator.UpdateAutoTLSCert(leaf.CertPEM, leaf.PrivateKeyPEM); err != nil {
return fmt.Errorf("failed to update the agent leaf cert: %w", err)
}
}
return nil
}
// handleTokenUpdate is used when a notification about the agent token being updated
// is received and various watches need cancelling/restarting to use the new token.
func (m *CertMonitor) handleTokenUpdate(ctx context.Context) error {
m.logger.Debug("Agent token updated - resetting watches")
// TODO (autoencrypt) Prepopulate the cache with the new token with
// the existing cache entry with the old token. The certificate doesn't
// need to change just because the token has. However there isn't a
// good way to make that happen and this behavior is benign enough
// that I am going to push off implementing it.
// the agent token has been updated so we must update our leaf cert watch.
// this cancels the current watches before setting up new ones
m.cancelWatches()
// recreate the chan for cache updates. This is a precautionary measure to ensure
// that we don't accidentally get notified for the new watches being setup before
// a blocking query in the cache returns and sends data to the old chan. In theory
// the code in agent/cache/watch.go should prevent this where we specifically check
// for context cancellation prior to sending the event. However we could cancel
// it after that check and finish setting up the new watches before getting the old
// events. Both the go routine scheduler and the OS thread scheduler would have to
// be acting up for this to happen. Regardless the way to ensure we don't get events
// for the old watches is to simply replace the chan we are expecting them from.
close(m.cacheUpdates)
m.cacheUpdates = make(chan cache.UpdateEvent, 10)
// restart watches - this will be done with the correct token
cancelWatches, err := m.setupCacheWatches(ctx)
if err != nil {
return fmt.Errorf("failed to restart watches after agent token update: %w", err)
}
m.cancelWatches = cancelWatches
return nil
}
// handleFallback is used when the current TLS certificate has expired and the normal
// updating mechanisms have failed to renew it quickly enough. This function will
// use the configured fallback mechanism to retrieve a new cert and start monitoring
// that one.
func (m *CertMonitor) handleFallback(ctx context.Context) error {
m.logger.Warn("agent's client certificate has expired")
// Background because the context is mainly useful when the agent is first starting up.
reply, err := m.fallback(ctx)
if err != nil {
return fmt.Errorf("error when getting new agent certificate: %w", err)
}
return m.Update(reply)
}
// run is the private method to be spawn by the Start method for
// executing the main monitoring loop.
func (m *CertMonitor) run(ctx context.Context, exit chan struct{}) {
// The fallbackTimer is used to notify AFTER the agents
// leaf certificate has expired and where we need
// to fall back to the less secure RPC endpoint just like
// if the agent was starting up new.
//
// Check 10sec (fallback leeway duration) after cert
// expires. The agent cache should be handling the expiration
// and renew it before then.
//
// If there is no cert, AutoEncryptCertNotAfter returns
// a value in the past which immediately triggers the
// renew, but this case shouldn't happen because at
// this point, auto_encrypt was just being setup
// successfully.
calcFallbackInterval := func() time.Duration {
certExpiry := m.tlsConfigurator.AutoEncryptCertNotAfter()
return certExpiry.Add(m.fallbackLeeway).Sub(time.Now())
}
fallbackTimer := time.NewTimer(calcFallbackInterval())
// cleanup for once we are stopped
defer func() {
// cancel the go routines performing the cache watches
m.cancelWatches()
// ensure we don't leak the timers go routine
fallbackTimer.Stop()
// stop receiving notifications for token updates
m.tokens.StopNotify(m.tokenUpdates)
m.logger.Debug("certificate monitor has been stopped")
m.l.Lock()
m.cancel = nil
m.running = false
m.l.Unlock()
// this should be the final cleanup task as its what notifies
// the rest of the world that this go routine has exited.
close(exit)
}()
for {
select {
case <-ctx.Done():
m.logger.Debug("stopping the certificate monitor")
return
case <-m.tokenUpdates.Ch:
m.logger.Debug("handling a token update event")
if err := m.handleTokenUpdate(ctx); err != nil {
m.logger.Error("error in handling token update event", "error", err)
}
case u := <-m.cacheUpdates:
m.logger.Debug("handling a cache update event", "correlation_id", u.CorrelationID)
if err := m.handleCacheEvent(u); err != nil {
m.logger.Error("error in handling cache update event", "error", err)
}
// reset the fallback timer as the certificate may have been updated
fallbackTimer.Stop()
fallbackTimer = time.NewTimer(calcFallbackInterval())
case <-fallbackTimer.C:
// This is a safety net in case the auto_encrypt cert doesn't get renewed
// in time. The agent would be stuck in that case because the watches
// never use the AutoEncrypt.Sign endpoint.
// check auto encrypt client cert expiration
if m.tlsConfigurator.AutoEncryptCertExpired() {
if err := m.handleFallback(ctx); err != nil {
m.logger.Error("error when handling a certificate expiry event", "error", err)
fallbackTimer = time.NewTimer(m.fallbackRetry)
} else {
fallbackTimer = time.NewTimer(calcFallbackInterval())
}
} else {
// this shouldn't be possible. We calculate the timer duration to be the certificate
// expiration time + some leeway (10s default). So whenever we get here the certificate
// should be expired. Regardless its probably worth resetting the timer.
fallbackTimer = time.NewTimer(calcFallbackInterval())
}
}
}
}

View File

@ -1,693 +0,0 @@
package certmon
import (
"context"
"crypto/tls"
"fmt"
"net"
"sync"
"testing"
"time"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/go-uuid"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
)
type mockFallback struct {
mock.Mock
}
func (m *mockFallback) fallback(ctx context.Context) (*structs.SignedResponse, error) {
ret := m.Called()
resp, _ := ret.Get(0).(*structs.SignedResponse)
return resp, ret.Error(1)
}
type mockWatcher struct {
ch chan<- cache.UpdateEvent
done <-chan struct{}
}
type mockCache struct {
mock.Mock
lock sync.Mutex
watchers map[string][]mockWatcher
}
func (m *mockCache) Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error {
m.lock.Lock()
key := r.CacheInfo().Key
m.watchers[key] = append(m.watchers[key], mockWatcher{ch: ch, done: ctx.Done()})
m.lock.Unlock()
ret := m.Called(t, r, correlationID)
return ret.Error(0)
}
func (m *mockCache) Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error {
ret := m.Called(t, result, dc, token, key)
return ret.Error(0)
}
func (m *mockCache) sendNotification(ctx context.Context, key string, u cache.UpdateEvent) bool {
m.lock.Lock()
defer m.lock.Unlock()
watchers, ok := m.watchers[key]
if !ok || len(m.watchers) < 1 {
return false
}
var newWatchers []mockWatcher
for _, watcher := range watchers {
select {
case watcher.ch <- u:
newWatchers = append(newWatchers, watcher)
case <-watcher.done:
// do nothing, this watcher will be removed from the list
case <-ctx.Done():
// return doesn't matter here really, the test is being cancelled
return true
}
}
// this removes any already cancelled watches from being sent to
m.watchers[key] = newWatchers
return true
}
func newMockCache(t *testing.T) *mockCache {
mcache := mockCache{watchers: make(map[string][]mockWatcher)}
mcache.Test(t)
return &mcache
}
func waitForChan(timer *time.Timer, ch <-chan struct{}) bool {
select {
case <-timer.C:
return false
case <-ch:
return true
}
}
func waitForChans(timeout time.Duration, chans ...<-chan struct{}) bool {
timer := time.NewTimer(timeout)
defer timer.Stop()
for _, ch := range chans {
if !waitForChan(timer, ch) {
return false
}
}
return true
}
func testTLSConfigurator(t *testing.T) *tlsutil.Configurator {
t.Helper()
logger := testutil.Logger(t)
cfg, err := tlsutil.NewConfigurator(tlsutil.Config{AutoTLS: true}, logger)
require.NoError(t, err)
return cfg
}
func newLeaf(t *testing.T, ca *structs.CARoot, idx uint64, expiration time.Duration) *structs.IssuedCert {
t.Helper()
pub, priv, err := connect.TestAgentLeaf(t, "node", "foo", ca, expiration)
require.NoError(t, err)
cert, err := connect.ParseCert(pub)
require.NoError(t, err)
spiffeID, err := connect.ParseCertURI(cert.URIs[0])
require.NoError(t, err)
agentID, ok := spiffeID.(*connect.SpiffeIDAgent)
require.True(t, ok, "certificate doesn't have an agent leaf cert URI")
return &structs.IssuedCert{
SerialNumber: cert.SerialNumber.String(),
CertPEM: pub,
PrivateKeyPEM: priv,
ValidAfter: cert.NotBefore,
ValidBefore: cert.NotAfter,
Agent: agentID.Agent,
AgentURI: agentID.URI().String(),
EnterpriseMeta: *structs.DefaultEnterpriseMeta(),
RaftIndex: structs.RaftIndex{
CreateIndex: idx,
ModifyIndex: idx,
},
}
}
type testCertMonitor struct {
monitor *CertMonitor
mcache *mockCache
tls *tlsutil.Configurator
tokens *token.Store
fallback *mockFallback
extraCACerts []string
initialCert *structs.IssuedCert
initialRoots *structs.IndexedCARoots
// these are some variables that the CertMonitor was created with
datacenter string
nodeName string
dns []string
ips []net.IP
verifyServerHostname bool
}
func newTestCertMonitor(t *testing.T) testCertMonitor {
t.Helper()
tlsConfigurator := testTLSConfigurator(t)
tokens := new(token.Store)
id, err := uuid.GenerateUUID()
require.NoError(t, err)
tokens.UpdateAgentToken(id, token.TokenSourceConfig)
ca := connect.TestCA(t, nil)
manualCA := connect.TestCA(t, nil)
// this cert is setup to not expire quickly. this will prevent
// the test from accidentally running the fallback routine
// before we want to force that to happen.
issued := newLeaf(t, ca, 1, 10*time.Minute)
indexedRoots := structs.IndexedCARoots{
ActiveRootID: ca.ID,
TrustDomain: connect.TestClusterID,
Roots: []*structs.CARoot{
ca,
},
QueryMeta: structs.QueryMeta{
Index: 1,
},
}
initialCerts := &structs.SignedResponse{
ConnectCARoots: indexedRoots,
IssuedCert: *issued,
ManualCARoots: []string{manualCA.RootCert},
VerifyServerHostname: true,
}
dnsSANs := []string{"test.dev"}
ipSANs := []net.IP{net.IPv4(198, 18, 0, 1)}
// this chan should be unbuffered so we can detect when the fallback func has been called.
fallback := &mockFallback{}
mcache := newMockCache(t)
rootRes := cache.FetchResult{Value: &indexedRoots, Index: 1}
rootsReq := structs.DCSpecificRequest{Datacenter: "foo"}
mcache.On("Prepopulate", cachetype.ConnectCARootName, rootRes, "foo", "", rootsReq.CacheInfo().Key).Return(nil).Once()
leafReq := cachetype.ConnectCALeafRequest{
Token: tokens.AgentToken(),
Agent: "node",
Datacenter: "foo",
DNSSAN: dnsSANs,
IPSAN: ipSANs,
}
leafRes := cache.FetchResult{
Value: issued,
Index: 1,
State: cachetype.ConnectCALeafSuccess(ca.SigningKeyID),
}
mcache.On("Prepopulate", cachetype.ConnectCALeafName, leafRes, "foo", tokens.AgentToken(), leafReq.Key()).Return(nil).Once()
// we can assert more later but this should always be done.
defer mcache.AssertExpectations(t)
cfg := new(Config).
WithCache(mcache).
WithLogger(testutil.Logger(t)).
WithTLSConfigurator(tlsConfigurator).
WithTokens(tokens).
WithFallback(fallback.fallback).
WithDNSSANs(dnsSANs).
WithIPSANs(ipSANs).
WithDatacenter("foo").
WithNodeName("node").
WithFallbackLeeway(time.Nanosecond).
WithFallbackRetry(time.Millisecond)
monitor, err := New(cfg)
require.NoError(t, err)
require.NotNil(t, monitor)
require.NoError(t, monitor.Update(initialCerts))
return testCertMonitor{
monitor: monitor,
tls: tlsConfigurator,
tokens: tokens,
mcache: mcache,
fallback: fallback,
extraCACerts: []string{manualCA.RootCert},
initialCert: issued,
initialRoots: &indexedRoots,
datacenter: "foo",
nodeName: "node",
dns: dnsSANs,
ips: ipSANs,
verifyServerHostname: true,
}
}
func tlsCertificateFromIssued(t *testing.T, issued *structs.IssuedCert) *tls.Certificate {
t.Helper()
cert, err := tls.X509KeyPair([]byte(issued.CertPEM), []byte(issued.PrivateKeyPEM))
require.NoError(t, err)
return &cert
}
// convenience method to get a TLS Certificate from the intial issued certificate and priv key
func (cm *testCertMonitor) initialTLSCertificate(t *testing.T) *tls.Certificate {
t.Helper()
return tlsCertificateFromIssued(t, cm.initialCert)
}
// just a convenience method to get a list of all the CA pems that we set up regardless
// of manual vs connect.
func (cm *testCertMonitor) initialCACerts() []string {
pems := cm.extraCACerts
for _, root := range cm.initialRoots.Roots {
pems = append(pems, root.RootCert)
}
return pems
}
func (cm *testCertMonitor) assertExpectations(t *testing.T) {
cm.mcache.AssertExpectations(t)
cm.fallback.AssertExpectations(t)
}
func TestCertMonitor_InitialCerts(t *testing.T) {
// this also ensures that the cache was prepopulated properly
cm := newTestCertMonitor(t)
// verify that the certificate was injected into the TLS configurator correctly
require.Equal(t, cm.initialTLSCertificate(t), cm.tls.Cert())
// verify that the CA certs (both Connect and manual ones) were injected correctly
require.ElementsMatch(t, cm.initialCACerts(), cm.tls.CAPems())
// verify that the auto-tls verify server hostname setting was injected correctly
require.Equal(t, cm.verifyServerHostname, cm.tls.VerifyServerHostname())
}
func TestCertMonitor_GoRoutineManagement(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
cm := newTestCertMonitor(t)
// ensure that the monitor is not running
require.False(t, cm.monitor.IsRunning())
// ensure that nothing bad happens and that it reports as stopped
require.False(t, cm.monitor.Stop())
// we will never send notifications so these just ignore everything
cm.mcache.On("Notify", cachetype.ConnectCARootName, &structs.DCSpecificRequest{Datacenter: cm.datacenter}, rootsWatchID).Return(nil).Times(2)
cm.mcache.On("Notify", cachetype.ConnectCALeafName,
&cachetype.ConnectCALeafRequest{
Token: cm.tokens.AgentToken(),
Datacenter: cm.datacenter,
Agent: cm.nodeName,
DNSSAN: cm.dns,
IPSAN: cm.ips,
},
leafWatchID,
).Return(nil).Times(2)
done, err := cm.monitor.Start(ctx)
require.NoError(t, err)
require.True(t, cm.monitor.IsRunning())
_, err = cm.monitor.Start(ctx)
testutil.RequireErrorContains(t, err, "the CertMonitor is already running")
require.True(t, cm.monitor.Stop())
require.True(t, waitForChans(100*time.Millisecond, done), "monitor didn't shut down")
require.False(t, cm.monitor.IsRunning())
done, err = cm.monitor.Start(ctx)
require.NoError(t, err)
// ensure that context cancellation causes us to stop as well
cancel()
require.True(t, waitForChans(100*time.Millisecond, done))
cm.assertExpectations(t)
}
func startedCertMonitor(t *testing.T) (context.Context, testCertMonitor) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
cm := newTestCertMonitor(t)
rootsCtx, rootsCancel := context.WithCancel(ctx)
defer rootsCancel()
leafCtx, leafCancel := context.WithCancel(ctx)
defer leafCancel()
// initial roots watch
cm.mcache.On("Notify", cachetype.ConnectCARootName,
&structs.DCSpecificRequest{
Datacenter: cm.datacenter,
},
rootsWatchID).
Return(nil).
Once().
Run(func(_ mock.Arguments) {
rootsCancel()
})
// the initial watch after starting the monitor
cm.mcache.On("Notify", cachetype.ConnectCALeafName,
&cachetype.ConnectCALeafRequest{
Token: cm.tokens.AgentToken(),
Datacenter: cm.datacenter,
Agent: cm.nodeName,
DNSSAN: cm.dns,
IPSAN: cm.ips,
},
leafWatchID).
Return(nil).
Once().
Run(func(_ mock.Arguments) {
leafCancel()
})
done, err := cm.monitor.Start(ctx)
require.NoError(t, err)
// this prevents logs after the test finishes
t.Cleanup(func() {
cm.monitor.Stop()
<-done
})
require.True(t,
waitForChans(100*time.Millisecond, rootsCtx.Done(), leafCtx.Done()),
"not all watches were started within the alotted time")
return ctx, cm
}
// This test ensures that the cache watches are restarted with the updated
// token after receiving a token update
func TestCertMonitor_TokenUpdate(t *testing.T) {
ctx, cm := startedCertMonitor(t)
rootsCtx, rootsCancel := context.WithCancel(ctx)
defer rootsCancel()
leafCtx, leafCancel := context.WithCancel(ctx)
defer leafCancel()
newToken := "8e4fe8db-162d-42d8-81ca-710fb2280ad0"
// we expect a new roots watch because when the leaf cert watch is restarted so is the root cert watch
cm.mcache.On("Notify", cachetype.ConnectCARootName,
&structs.DCSpecificRequest{
Datacenter: cm.datacenter,
},
rootsWatchID).
Return(nil).
Once().
Run(func(_ mock.Arguments) {
rootsCancel()
})
secondWatch := &cachetype.ConnectCALeafRequest{
Token: newToken,
Datacenter: cm.datacenter,
Agent: cm.nodeName,
DNSSAN: cm.dns,
IPSAN: cm.ips,
}
// the new watch after updating the token
cm.mcache.On("Notify", cachetype.ConnectCALeafName, secondWatch, leafWatchID).
Return(nil).
Once().
Run(func(args mock.Arguments) {
leafCancel()
})
cm.tokens.UpdateAgentToken(newToken, token.TokenSourceAPI)
require.True(t,
waitForChans(100*time.Millisecond, rootsCtx.Done(), leafCtx.Done()),
"not all watches were restarted within the alotted time")
cm.assertExpectations(t)
}
func TestCertMonitor_RootsUpdate(t *testing.T) {
ctx, cm := startedCertMonitor(t)
secondCA := connect.TestCA(t, cm.initialRoots.Roots[0])
secondRoots := structs.IndexedCARoots{
ActiveRootID: secondCA.ID,
TrustDomain: connect.TestClusterID,
Roots: []*structs.CARoot{
secondCA,
cm.initialRoots.Roots[0],
},
QueryMeta: structs.QueryMeta{
Index: 99,
},
}
// assert value of the CA certs prior to updating
require.ElementsMatch(t, cm.initialCACerts(), cm.tls.CAPems())
req := structs.DCSpecificRequest{Datacenter: cm.datacenter}
require.True(t, cm.mcache.sendNotification(ctx, req.CacheInfo().Key, cache.UpdateEvent{
CorrelationID: rootsWatchID,
Result: &secondRoots,
Meta: cache.ResultMeta{
Index: secondRoots.Index,
},
}))
expectedCAs := append(cm.extraCACerts, secondCA.RootCert, cm.initialRoots.Roots[0].RootCert)
// this will wait up to 200ms (8 x 25 ms waits between the 9 requests)
retry.RunWith(&retry.Counter{Count: 9, Wait: 25 * time.Millisecond}, t, func(r *retry.R) {
require.ElementsMatch(r, expectedCAs, cm.tls.CAPems())
})
cm.assertExpectations(t)
}
func TestCertMonitor_CertUpdate(t *testing.T) {
ctx, cm := startedCertMonitor(t)
secondCert := newLeaf(t, cm.initialRoots.Roots[0], 100, 10*time.Minute)
// assert value of cert prior to updating the leaf
require.Equal(t, cm.initialTLSCertificate(t), cm.tls.Cert())
key := cm.monitor.leafReq.CacheInfo().Key
// send the new certificate - this notifies only the watchers utilizing
// the new ACL token
require.True(t, cm.mcache.sendNotification(ctx, key, cache.UpdateEvent{
CorrelationID: leafWatchID,
Result: secondCert,
Meta: cache.ResultMeta{
Index: secondCert.ModifyIndex,
},
}))
tlsCert := tlsCertificateFromIssued(t, secondCert)
// this will wait up to 200ms (8 x 25 ms waits between the 9 requests)
retry.RunWith(&retry.Counter{Count: 9, Wait: 25 * time.Millisecond}, t, func(r *retry.R) {
require.Equal(r, tlsCert, cm.tls.Cert())
})
cm.assertExpectations(t)
}
func TestCertMonitor_Fallback(t *testing.T) {
ctx, cm := startedCertMonitor(t)
// at this point everything is operating normally and the monitor is just
// waiting for events. We are going to send a new cert that is basically
// already expired and then allow the fallback routine to kick in.
secondCert := newLeaf(t, cm.initialRoots.Roots[0], 100, time.Nanosecond)
secondCA := connect.TestCA(t, cm.initialRoots.Roots[0])
secondRoots := structs.IndexedCARoots{
ActiveRootID: secondCA.ID,
TrustDomain: connect.TestClusterID,
Roots: []*structs.CARoot{
secondCA,
cm.initialRoots.Roots[0],
},
QueryMeta: structs.QueryMeta{
Index: 101,
},
}
thirdCert := newLeaf(t, secondCA, 102, 10*time.Minute)
// inject a fallback routine error to check that we rerun it quickly
cm.fallback.On("fallback").Return(nil, fmt.Errorf("induced error")).Once()
// expect the fallback routine to be executed and setup the return
cm.fallback.On("fallback").Return(&structs.SignedResponse{
ConnectCARoots: secondRoots,
IssuedCert: *thirdCert,
ManualCARoots: cm.extraCACerts,
VerifyServerHostname: true,
}, nil).Once()
// Add another roots cache prepopulation expectation which should happen
// in response to executing the fallback mechanism
rootRes := cache.FetchResult{Value: &secondRoots, Index: 101}
rootsReq := structs.DCSpecificRequest{Datacenter: cm.datacenter}
cm.mcache.On("Prepopulate", cachetype.ConnectCARootName, rootRes, cm.datacenter, "", rootsReq.CacheInfo().Key).Return(nil).Once()
// add another leaf cert cache prepopulation expectation which should happen
// in response to executing the fallback mechanism
leafReq := cachetype.ConnectCALeafRequest{
Token: cm.tokens.AgentToken(),
Agent: cm.nodeName,
Datacenter: cm.datacenter,
DNSSAN: cm.dns,
IPSAN: cm.ips,
}
leafRes := cache.FetchResult{
Value: thirdCert,
Index: 101,
State: cachetype.ConnectCALeafSuccess(secondCA.SigningKeyID),
}
cm.mcache.On("Prepopulate", cachetype.ConnectCALeafName, leafRes, leafReq.Datacenter, leafReq.Token, leafReq.Key()).Return(nil).Once()
// nothing in the monitor should be looking at this as its only done
// in response to sending token updates, no need to synchronize
key := cm.monitor.leafReq.CacheInfo().Key
// send the new certificate - this notifies only the watchers utilizing
// the new ACL token
require.True(t, cm.mcache.sendNotification(ctx, key, cache.UpdateEvent{
CorrelationID: leafWatchID,
Result: secondCert,
Meta: cache.ResultMeta{
Index: secondCert.ModifyIndex,
},
}))
// if all went well we would have updated the first certificate which was pretty much expired
// causing the fallback handler to be invoked almost immediately. The fallback routine will
// return the response containing the third cert and second CA roots so now we should wait
// a little while and ensure they were applied to the TLS Configurator
tlsCert := tlsCertificateFromIssued(t, thirdCert)
expectedCAs := append(cm.extraCACerts, secondCA.RootCert, cm.initialRoots.Roots[0].RootCert)
// this will wait up to 200ms (8 x 25 ms waits between the 9 requests)
retry.RunWith(&retry.Counter{Count: 9, Wait: 25 * time.Millisecond}, t, func(r *retry.R) {
require.Equal(r, tlsCert, cm.tls.Cert())
require.ElementsMatch(r, expectedCAs, cm.tls.CAPems())
})
cm.assertExpectations(t)
}
func TestCertMonitor_New_Errors(t *testing.T) {
type testCase struct {
cfg Config
err string
}
fallback := func(_ context.Context) (*structs.SignedResponse, error) {
return nil, fmt.Errorf("Unimplemented")
}
tokens := new(token.Store)
cases := map[string]testCase{
"no-cache": {
cfg: Config{
TLSConfigurator: testTLSConfigurator(t),
Fallback: fallback,
Tokens: tokens,
Datacenter: "foo",
NodeName: "bar",
},
err: "CertMonitor creation requires a Cache",
},
"no-tls-configurator": {
cfg: Config{
Cache: cache.New(cache.Options{}),
Fallback: fallback,
Tokens: tokens,
Datacenter: "foo",
NodeName: "bar",
},
err: "CertMonitor creation requires a TLS Configurator",
},
"no-fallback": {
cfg: Config{
Cache: cache.New(cache.Options{}),
TLSConfigurator: testTLSConfigurator(t),
Tokens: tokens,
Datacenter: "foo",
NodeName: "bar",
},
err: "CertMonitor creation requires specifying a FallbackFunc",
},
"no-tokens": {
cfg: Config{
Cache: cache.New(cache.Options{}),
TLSConfigurator: testTLSConfigurator(t),
Fallback: fallback,
Datacenter: "foo",
NodeName: "bar",
},
err: "CertMonitor creation requires specifying a token store",
},
"no-datacenter": {
cfg: Config{
Cache: cache.New(cache.Options{}),
TLSConfigurator: testTLSConfigurator(t),
Fallback: fallback,
Tokens: tokens,
NodeName: "bar",
},
err: "CertMonitor creation requires specifying the datacenter",
},
"no-node-name": {
cfg: Config{
Cache: cache.New(cache.Options{}),
TLSConfigurator: testTLSConfigurator(t),
Fallback: fallback,
Tokens: tokens,
Datacenter: "foo",
},
err: "CertMonitor creation requires specifying the agent's node name",
},
}
for name, tcase := range cases {
t.Run(name, func(t *testing.T) {
monitor, err := New(&tcase.cfg)
testutil.RequireErrorContains(t, err, tcase.err)
require.Nil(t, monitor)
})
}
}

View File

@ -1,137 +0,0 @@
package certmon
import (
"context"
"net"
"time"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/go-hclog"
)
// FallbackFunc is used when the normal cache watch based Certificate
// updating fails to update the Certificate in time and a different
// method of updating the certificate is required.
type FallbackFunc func(context.Context) (*structs.SignedResponse, error)
type Config struct {
// Logger is the logger to be used while running. If not set
// then no logging will be performed.
Logger hclog.Logger
// TLSConfigurator is where the certificates and roots are set when
// they are updated. This field is required.
TLSConfigurator *tlsutil.Configurator
// Cache is an object implementing our Cache interface. The Cache
// used at runtime must be able to handle Roots and Leaf Cert watches
Cache Cache
// Tokens is the shared token store. It is used to retrieve the current
// agent token as well as getting notifications when that token is updated.
// This field is required.
Tokens *token.Store
// Fallback is a function to run when the normal cache updating of the
// agent's certificates has failed to work for one reason or another.
// This field is required.
Fallback FallbackFunc
// FallbackLeeway is the amount of time after certificate expiration before
// invoking the fallback routine. If not set this will default to 10s.
FallbackLeeway time.Duration
// FallbackRetry is the duration between Fallback invocations when the configured
// fallback routine returns an error. If not set this will default to 1m.
FallbackRetry time.Duration
// DNSSANs is a list of DNS SANs that certificate requests should include. This
// field is optional and no extra DNS SANs will be requested if unset. 'localhost'
// is unconditionally requested by the cache implementation.
DNSSANs []string
// IPSANs is a list of IP SANs to include in the certificate signing request. This
// field is optional and no extra IP SANs will be requested if unset. Both '127.0.0.1'
// and '::1' IP SANs are unconditionally requested by the cache implementation.
IPSANs []net.IP
// Datacenter is the datacenter to request certificates within. This filed is required
Datacenter string
// NodeName is the agent's node name to use when requesting certificates. This field
// is required.
NodeName string
}
// WithCache will cause the created CertMonitor type to use the provided Cache
func (cfg *Config) WithCache(cache Cache) *Config {
cfg.Cache = cache
return cfg
}
// WithLogger will cause the created CertMonitor type to use the provided logger
func (cfg *Config) WithLogger(logger hclog.Logger) *Config {
cfg.Logger = logger
return cfg
}
// WithTLSConfigurator will cause the created CertMonitor type to use the provided configurator
func (cfg *Config) WithTLSConfigurator(tlsConfigurator *tlsutil.Configurator) *Config {
cfg.TLSConfigurator = tlsConfigurator
return cfg
}
// WithTokens will cause the created CertMonitor type to use the provided token store
func (cfg *Config) WithTokens(tokens *token.Store) *Config {
cfg.Tokens = tokens
return cfg
}
// WithFallback configures a fallback function to use if the normal update mechanisms
// fail to renew the certificate in time.
func (cfg *Config) WithFallback(fallback FallbackFunc) *Config {
cfg.Fallback = fallback
return cfg
}
// WithDNSSANs configures the CertMonitor to request these DNS SANs when requesting a new
// certificate
func (cfg *Config) WithDNSSANs(sans []string) *Config {
cfg.DNSSANs = sans
return cfg
}
// WithIPSANs configures the CertMonitor to request these IP SANs when requesting a new
// certificate
func (cfg *Config) WithIPSANs(sans []net.IP) *Config {
cfg.IPSANs = sans
return cfg
}
// WithDatacenter configures the CertMonitor to request Certificates in this DC
func (cfg *Config) WithDatacenter(dc string) *Config {
cfg.Datacenter = dc
return cfg
}
// WithNodeName configures the CertMonitor to request Certificates with this agent name
func (cfg *Config) WithNodeName(name string) *Config {
cfg.NodeName = name
return cfg
}
// WithFallbackLeeway configures how long after a certificate expires before attempting to
// generarte a new certificate using the fallback mechanism. The default is 10s.
func (cfg *Config) WithFallbackLeeway(leeway time.Duration) *Config {
cfg.FallbackLeeway = leeway
return cfg
}
// WithFallbackRetry controls how quickly we will make subsequent invocations of
// the fallback func in the case of it erroring out.
func (cfg *Config) WithFallbackRetry(after time.Duration) *Config {
cfg.FallbackRetry = after
return cfg
}

View File

@ -16,7 +16,7 @@ func TestBuildAndValidate_HTTPMaxConnsPerClientExceedsRLimit(t *testing.T) {
}`
b, err := NewBuilder(BuilderOpts{})
assert.NoError(t, err)
testsrc := Source{
testsrc := FileSource{
Name: "test",
Format: "hcl",
Data: `
@ -33,7 +33,7 @@ func TestBuildAndValidate_HTTPMaxConnsPerClientExceedsRLimit(t *testing.T) {
}
b.Head = append(b.Head, testsrc)
b.Tail = append(b.Tail, DefaultConsulSource(), DevConsulSource())
b.Tail = append(b.Head, Source{Name: "hcl", Format: "hcl", Data: hcl})
b.Tail = append(b.Head, FileSource{Name: "hcl", Format: "hcl", Data: hcl})
_, validationError := b.BuildAndValidate()
if validationError == nil {

View File

@ -3,6 +3,7 @@ package config
import (
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"net"
@ -19,10 +20,13 @@ import (
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/consul/authmethod/ssoauth"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
libtempl "github.com/hashicorp/consul/lib/template"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-bexpr"
@ -33,6 +37,31 @@ import (
"golang.org/x/time/rate"
)
// Load will build the configuration including the extraHead source injected
// after all other defaults but before any user supplied configuration and the overrides
// source injected as the final source in the configuration parsing chain.
func Load(opts BuilderOpts, extraHead Source, overrides ...Source) (*RuntimeConfig, []string, error) {
b, err := NewBuilder(opts)
if err != nil {
return nil, nil, err
}
if extraHead != nil {
b.Head = append(b.Head, extraHead)
}
if len(overrides) != 0 {
b.Tail = append(b.Tail, overrides...)
}
cfg, err := b.BuildAndValidate()
if err != nil {
return nil, nil, err
}
return &cfg, b.Warnings, nil
}
// Builder constructs a valid runtime configuration from multiple
// configuration sources.
//
@ -59,8 +88,8 @@ import (
// since not all pre-conditions have to be satisfied when performing
// syntactical tests.
type Builder struct {
// options contains the input values used to construct a RuntimeConfig
options BuilderOpts
// devMode stores the value of the -dev flag, and enables development mode.
devMode *bool
// Head, Sources, and Tail are used to manage the order of the
// config sources, as described in the comments above.
@ -72,14 +101,14 @@ type Builder struct {
// parsing the configuration.
Warnings []string
// Hostname returns the hostname of the machine. If nil, os.Hostname
// is called.
Hostname func() (string, error)
// hostname is a shim for testing, allowing tests to specify a replacement
// for os.Hostname.
hostname func() (string, error)
// GetPrivateIPv4 and GetPublicIPv6 return suitable default addresses
// for cases when the user doesn't supply them.
GetPrivateIPv4 func() ([]*net.IPAddr, error)
GetPublicIPv6 func() ([]*net.IPAddr, error)
// getPrivateIPv4 and getPublicIPv6 are shims for testing, allowing tests to
// specify a replacement for ipaddr.GetPrivateIPv4 and ipaddr.GetPublicIPv6.
getPrivateIPv4 func() ([]*net.IPAddr, error)
getPublicIPv6 func() ([]*net.IPAddr, error)
// err contains the first error that occurred during
// building the runtime configuration.
@ -88,16 +117,21 @@ type Builder struct {
// NewBuilder returns a new configuration Builder from the BuilderOpts.
func NewBuilder(opts BuilderOpts) (*Builder, error) {
configFormat := opts.ConfigFormat
if configFormat != "" && configFormat != "json" && configFormat != "hcl" {
return nil, fmt.Errorf("config: -config-format must be either 'hcl' or 'json'")
}
newSource := func(name string, v interface{}) Source {
b, err := json.MarshalIndent(v, "", " ")
if err != nil {
panic(err)
}
return Source{Name: name, Format: "json", Data: string(b)}
return FileSource{Name: name, Format: "json", Data: string(b)}
}
b := &Builder{
options: opts,
devMode: opts.DevMode,
Head: []Source{DefaultSource(), DefaultEnterpriseSource()},
}
@ -121,7 +155,7 @@ func NewBuilder(opts BuilderOpts) (*Builder, error) {
}
b.Tail = append(b.Tail, newSource("flags.values", values))
for i, s := range opts.HCL {
b.Tail = append(b.Tail, Source{
b.Tail = append(b.Tail, FileSource{
Name: fmt.Sprintf("flags-%d.hcl", i),
Format: "hcl",
Data: s,
@ -207,12 +241,12 @@ func (b *Builder) sourcesFromPath(path string, format string) ([]Source, error)
func newSourceFromFile(path string, format string) (Source, error) {
data, err := ioutil.ReadFile(path)
if err != nil {
return Source{}, fmt.Errorf("config: failed to read %s: %s", path, err)
return nil, fmt.Errorf("config: failed to read %s: %s", path, err)
}
if format == "" {
format = formatFromFileExtension(path)
}
return Source{Name: path, Data: string(data), Format: format}, nil
return FileSource{Name: path, Data: string(data), Format: format}, nil
}
// shouldParse file determines whether the file to be read is of a supported extension
@ -256,14 +290,7 @@ func (b *Builder) BuildAndValidate() (RuntimeConfig, error) {
// warnings can still contain deprecation or format warnings that should
// be presented to the user.
func (b *Builder) Build() (rt RuntimeConfig, err error) {
// TODO: move to NewBuilder to remove Builder.options field
configFormat := b.options.ConfigFormat
if configFormat != "" && configFormat != "json" && configFormat != "hcl" {
return RuntimeConfig{}, fmt.Errorf("config: -config-format must be either 'hcl' or 'json'")
}
// build the list of config sources
var srcs []Source
srcs := make([]Source, 0, len(b.Head)+len(b.Sources)+len(b.Tail))
srcs = append(srcs, b.Head...)
srcs = append(srcs, b.Sources...)
srcs = append(srcs, b.Tail...)
@ -271,12 +298,13 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
// parse the config sources into a configuration
var c Config
for _, s := range srcs {
if s.Name == "" || s.Data == "" {
c2, md, err := s.Parse()
switch {
case err == ErrNoData:
continue
}
c2, md, err := Parse(s.Data, s.Format)
if err != nil {
return RuntimeConfig{}, fmt.Errorf("Error parsing %s: %s", s.Name, err)
case err != nil:
return RuntimeConfig{}, fmt.Errorf("failed to parse %v: %w", s.Source(), err)
}
var unusedErr error
@ -289,7 +317,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
}
}
if unusedErr != nil {
return RuntimeConfig{}, fmt.Errorf("Error parsing %s: %s", s.Name, unusedErr)
return RuntimeConfig{}, fmt.Errorf("failed to parse %v: %s", s.Source(), unusedErr)
}
// for now this is a soft failure that will cause warnings but not actual problems
@ -435,14 +463,14 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
switch {
case ipaddr.IsAnyV4(advertiseAddr):
addrtyp = "private IPv4"
detect = b.GetPrivateIPv4
detect = b.getPrivateIPv4
if detect == nil {
detect = ipaddr.GetPrivateIPv4
}
case ipaddr.IsAnyV6(advertiseAddr):
addrtyp = "public IPv6"
detect = b.GetPublicIPv6
detect = b.getPublicIPv6
if detect == nil {
detect = ipaddr.GetPublicIPv6
}
@ -626,10 +654,40 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
consulRaftHeartbeatTimeout := b.durationVal("consul.raft.heartbeat_timeout", c.Consul.Raft.HeartbeatTimeout) * time.Duration(performanceRaftMultiplier)
consulRaftLeaderLeaseTimeout := b.durationVal("consul.raft.leader_lease_timeout", c.Consul.Raft.LeaderLeaseTimeout) * time.Duration(performanceRaftMultiplier)
// Connect proxy defaults.
// Connect
connectEnabled := b.boolVal(c.Connect.Enabled)
connectCAProvider := b.stringVal(c.Connect.CAProvider)
connectCAConfig := c.Connect.CAConfig
// autoEncrypt and autoConfig implicitly turns on connect which is why
// they need to be above other settings that rely on connect.
autoEncryptTLS := b.boolVal(c.AutoEncrypt.TLS)
autoEncryptDNSSAN := []string{}
for _, d := range c.AutoEncrypt.DNSSAN {
autoEncryptDNSSAN = append(autoEncryptDNSSAN, d)
}
autoEncryptIPSAN := []net.IP{}
for _, i := range c.AutoEncrypt.IPSAN {
ip := net.ParseIP(i)
if ip == nil {
b.warn(fmt.Sprintf("Cannot parse ip %q from AutoEncrypt.IPSAN", i))
continue
}
autoEncryptIPSAN = append(autoEncryptIPSAN, ip)
}
autoEncryptAllowTLS := b.boolVal(c.AutoEncrypt.AllowTLS)
if autoEncryptAllowTLS {
connectEnabled = true
}
autoConfig := b.autoConfigVal(c.AutoConfig)
if autoConfig.Enabled {
connectEnabled = true
}
// Connect proxy defaults
connectMeshGatewayWANFederationEnabled := b.boolVal(c.Connect.MeshGatewayWANFederationEnabled)
if connectMeshGatewayWANFederationEnabled && !connectEnabled {
return RuntimeConfig{}, fmt.Errorf("'connect.enable_mesh_gateway_wan_federation=true' requires 'connect.enabled=true'")
@ -668,27 +726,6 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
})
}
autoEncryptTLS := b.boolVal(c.AutoEncrypt.TLS)
autoEncryptDNSSAN := []string{}
for _, d := range c.AutoEncrypt.DNSSAN {
autoEncryptDNSSAN = append(autoEncryptDNSSAN, d)
}
autoEncryptIPSAN := []net.IP{}
for _, i := range c.AutoEncrypt.IPSAN {
ip := net.ParseIP(i)
if ip == nil {
b.warn(fmt.Sprintf("Cannot parse ip %q from AutoEncrypt.IPSAN", i))
continue
}
autoEncryptIPSAN = append(autoEncryptIPSAN, ip)
}
autoEncryptAllowTLS := b.boolVal(c.AutoEncrypt.AllowTLS)
if autoEncryptAllowTLS {
connectEnabled = true
}
aclsEnabled := false
primaryDatacenter := strings.ToLower(b.stringVal(c.PrimaryDatacenter))
if c.ACLDatacenter != nil {
@ -741,6 +778,9 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
if err != nil {
return RuntimeConfig{}, fmt.Errorf("config_entries.bootstrap[%d]: %s", i, err)
}
if err := entry.Normalize(); err != nil {
return RuntimeConfig{}, fmt.Errorf("config_entries.bootstrap[%d]: %s", i, err)
}
if err := entry.Validate(); err != nil {
return RuntimeConfig{}, fmt.Errorf("config_entries.bootstrap[%d]: %s", i, err)
}
@ -760,6 +800,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
// ----------------------------------------------------------------
// build runtime config
//
dataDir := b.stringVal(c.DataDir)
rt = RuntimeConfig{
// non-user configurable values
ACLDisabledTTL: b.durationVal("acl.disabled_ttl", c.ACL.DisabledTTL),
@ -799,20 +840,24 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
// ACL
ACLsEnabled: aclsEnabled,
ACLAgentMasterToken: b.stringValWithDefault(c.ACL.Tokens.AgentMaster, b.stringVal(c.ACLAgentMasterToken)),
ACLAgentToken: b.stringValWithDefault(c.ACL.Tokens.Agent, b.stringVal(c.ACLAgentToken)),
ACLDatacenter: primaryDatacenter,
ACLDefaultPolicy: b.stringValWithDefault(c.ACL.DefaultPolicy, b.stringVal(c.ACLDefaultPolicy)),
ACLDownPolicy: b.stringValWithDefault(c.ACL.DownPolicy, b.stringVal(c.ACLDownPolicy)),
ACLEnableKeyListPolicy: b.boolValWithDefault(c.ACL.EnableKeyListPolicy, b.boolVal(c.ACLEnableKeyListPolicy)),
ACLMasterToken: b.stringValWithDefault(c.ACL.Tokens.Master, b.stringVal(c.ACLMasterToken)),
ACLReplicationToken: b.stringValWithDefault(c.ACL.Tokens.Replication, b.stringVal(c.ACLReplicationToken)),
ACLTokenTTL: b.durationValWithDefault("acl.token_ttl", c.ACL.TokenTTL, b.durationVal("acl_ttl", c.ACLTTL)),
ACLPolicyTTL: b.durationVal("acl.policy_ttl", c.ACL.PolicyTTL),
ACLRoleTTL: b.durationVal("acl.role_ttl", c.ACL.RoleTTL),
ACLToken: b.stringValWithDefault(c.ACL.Tokens.Default, b.stringVal(c.ACLToken)),
ACLTokenReplication: b.boolValWithDefault(c.ACL.TokenReplication, b.boolValWithDefault(c.EnableACLReplication, enableTokenReplication)),
ACLEnableTokenPersistence: b.boolValWithDefault(c.ACL.EnableTokenPersistence, false),
ACLTokens: token.Config{
DataDir: dataDir,
EnablePersistence: b.boolValWithDefault(c.ACL.EnableTokenPersistence, false),
ACLDefaultToken: b.stringValWithDefault(c.ACL.Tokens.Default, b.stringVal(c.ACLToken)),
ACLAgentToken: b.stringValWithDefault(c.ACL.Tokens.Agent, b.stringVal(c.ACLAgentToken)),
ACLAgentMasterToken: b.stringValWithDefault(c.ACL.Tokens.AgentMaster, b.stringVal(c.ACLAgentMasterToken)),
ACLReplicationToken: b.stringValWithDefault(c.ACL.Tokens.Replication, b.stringVal(c.ACLReplicationToken)),
},
// Autopilot
AutopilotCleanupDeadServers: b.boolVal(c.Autopilot.CleanupDeadServers),
@ -908,7 +953,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
AutoEncryptDNSSAN: autoEncryptDNSSAN,
AutoEncryptIPSAN: autoEncryptIPSAN,
AutoEncryptAllowTLS: autoEncryptAllowTLS,
AutoConfig: b.autoConfigVal(c.AutoConfig),
AutoConfig: autoConfig,
ConnectEnabled: connectEnabled,
ConnectCAProvider: connectCAProvider,
ConnectCAConfig: connectCAConfig,
@ -918,10 +963,10 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
ConnectTestCALeafRootChangeSpread: b.durationVal("connect.test_ca_leaf_root_change_spread", c.Connect.TestCALeafRootChangeSpread),
ExposeMinPort: exposeMinPort,
ExposeMaxPort: exposeMaxPort,
DataDir: b.stringVal(c.DataDir),
DataDir: dataDir,
Datacenter: datacenter,
DefaultQueryTime: b.durationVal("default_query_time", c.DefaultQueryTime),
DevMode: b.boolVal(b.options.DevMode),
DevMode: b.boolVal(b.devMode),
DisableAnonymousSignature: b.boolVal(c.DisableAnonymousSignature),
DisableCoordinates: b.boolVal(c.DisableCoordinates),
DisableHostNodeID: b.boolVal(c.DisableHostNodeID),
@ -936,7 +981,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
EnableDebug: b.boolVal(c.EnableDebug),
EnableRemoteScriptChecks: enableRemoteScriptChecks,
EnableLocalScriptChecks: enableLocalScriptChecks,
EnableSyslog: b.boolVal(c.EnableSyslog),
EnableUI: b.boolVal(c.UI),
EncryptKey: b.stringVal(c.EncryptKey),
EncryptVerifyIncoming: b.boolVal(c.EncryptVerifyIncoming),
@ -949,12 +994,16 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
KVMaxValueSize: b.uint64Val(c.Limits.KVMaxValueSize),
LeaveDrainTime: b.durationVal("performance.leave_drain_time", c.Performance.LeaveDrainTime),
LeaveOnTerm: leaveOnTerm,
Logging: logging.Config{
LogLevel: b.stringVal(c.LogLevel),
LogJSON: b.boolVal(c.LogJSON),
LogFile: b.stringVal(c.LogFile),
LogRotateBytes: b.intVal(c.LogRotateBytes),
LogFilePath: b.stringVal(c.LogFile),
EnableSyslog: b.boolVal(c.EnableSyslog),
SyslogFacility: b.stringVal(c.SyslogFacility),
LogRotateDuration: b.durationVal("log_rotate_duration", c.LogRotateDuration),
LogRotateBytes: b.intVal(c.LogRotateBytes),
LogRotateMaxFiles: b.intVal(c.LogRotateMaxFiles),
},
MaxQueryTime: b.durationVal("max_query_time", c.MaxQueryTime),
NodeID: types.NodeID(b.stringVal(c.NodeID)),
NodeMeta: c.NodeMeta,
@ -1003,7 +1052,6 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
SkipLeaveOnInt: skipLeaveOnInt,
StartJoinAddrsLAN: b.expandAllOptionalAddrs("start_join", c.StartJoinAddrsLAN),
StartJoinAddrsWAN: b.expandAllOptionalAddrs("start_join_wan", c.StartJoinAddrsWAN),
SyslogFacility: b.stringVal(c.SyslogFacility),
TLSCipherSuites: b.tlsCipherSuites("tls_cipher_suites", c.TLSCipherSuites),
TLSMinVersion: b.stringVal(c.TLSMinVersion),
TLSPreferServerCipherSuites: b.boolVal(c.TLSPreferServerCipherSuites),
@ -1030,10 +1078,8 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
return RuntimeConfig{}, fmt.Errorf("cache.entry_fetch_rate must be strictly positive, was: %v", rt.Cache.EntryFetchRate)
}
if entCfg, err := b.BuildEnterpriseRuntimeConfig(&c); err != nil {
return RuntimeConfig{}, err
} else {
rt.EnterpriseRuntimeConfig = entCfg
if err := b.BuildEnterpriseRuntimeConfig(&rt, &c); err != nil {
return rt, err
}
if rt.BootstrapExpect == 1 {
@ -1084,9 +1130,20 @@ func (b *Builder) Validate(rt RuntimeConfig) error {
return fmt.Errorf("data_dir %q is not a directory", rt.DataDir)
}
}
if rt.NodeName == "" {
switch {
case rt.NodeName == "":
return fmt.Errorf("node_name cannot be empty")
case dns.InvalidNameRe.MatchString(rt.NodeName):
b.warn("Node name %q will not be discoverable "+
"via DNS due to invalid characters. Valid characters include "+
"all alpha-numerics and dashes.", rt.NodeName)
case len(rt.NodeName) > dns.MaxLabelLength:
b.warn("Node name %q will not be discoverable "+
"via DNS due to it being too long. Valid lengths are between "+
"1 and 63 bytes.", rt.NodeName)
}
if ipaddr.IsAny(rt.AdvertiseAddrLAN.IP) {
return fmt.Errorf("Advertise address cannot be 0.0.0.0, :: or [::]")
}
@ -1305,7 +1362,13 @@ func (b *Builder) Validate(rt RuntimeConfig) error {
return err
}
return nil
if err := validateRemoteScriptsChecks(rt); err != nil {
// TODO: make this an error in a future version
b.warn(err.Error())
}
err := b.validateEnterpriseConfig(rt)
return err
}
// addrUnique checks if the given address is already in use for another
@ -1709,7 +1772,7 @@ func (b *Builder) tlsCipherSuites(name string, v *string) []uint16 {
func (b *Builder) nodeName(v *string) string {
nodeName := b.stringVal(v)
if nodeName == "" {
fn := b.Hostname
fn := b.hostname
if fn == nil {
fn = os.Hostname
}
@ -2045,7 +2108,6 @@ func (b *Builder) validateAutoConfig(rt RuntimeConfig) error {
return fmt.Errorf("auto_config.enabled is set without providing a list of addresses")
}
// TODO (autoconf) should we validate the DNS and IP SANs? The IP SANs have already been parsed into IPs
return nil
}
@ -2055,6 +2117,15 @@ func (b *Builder) validateAutoConfigAuthorizer(rt RuntimeConfig) error {
if !authz.Enabled {
return nil
}
// When in a secondary datacenter with ACLs enabled, we require token replication to be enabled
// as that is what allows us to create the local tokens to distribute to the clients. Otherwise
// we would have to have a token with the ability to create ACL tokens in the primary and make
// RPCs in response to auto config requests.
if rt.ACLsEnabled && rt.PrimaryDatacenter != rt.Datacenter && !rt.ACLTokenReplication {
return fmt.Errorf("Enabling auto-config authorization (auto_config.authorization.enabled) in non primary datacenters with ACLs enabled (acl.enabled) requires also enabling ACL token replication (acl.enable_token_replication)")
}
// Auto Config Authorization is only supported on servers
if !rt.ServerMode {
return fmt.Errorf("auto_config.authorization.enabled cannot be set to true for client agents")
@ -2138,3 +2209,15 @@ func UIPathBuilder(UIContentString string) string {
}
return "/ui/"
}
const remoteScriptCheckSecurityWarning = "using enable-script-checks without ACLs and without allow_write_http_from is DANGEROUS, use enable-local-script-checks instead, see https://www.hashicorp.com/blog/protecting-consul-from-rce-risk-in-specific-configurations/"
// validateRemoteScriptsChecks returns an error if EnableRemoteScriptChecks is
// enabled without other security features, which mitigate the risk of executing
// remote scripts.
func validateRemoteScriptsChecks(conf RuntimeConfig) error {
if conf.EnableRemoteScriptChecks && !conf.ACLsEnabled && len(conf.AllowWriteHTTPFrom) == 0 {
return errors.New(remoteScriptCheckSecurityWarning)
}
return nil
}

View File

@ -51,8 +51,12 @@ func (e enterpriseConfigKeyError) Error() string {
return fmt.Sprintf("%q is a Consul Enterprise configuration and will have no effect", e.key)
}
func (_ *Builder) BuildEnterpriseRuntimeConfig(_ *Config) (EnterpriseRuntimeConfig, error) {
return EnterpriseRuntimeConfig{}, nil
func (*Builder) BuildEnterpriseRuntimeConfig(_ *RuntimeConfig, _ *Config) error {
return nil
}
func (*Builder) validateEnterpriseConfig(_ RuntimeConfig) error {
return nil
}
// validateEnterpriseConfig is a function to validate the enterprise specific

View File

@ -3,13 +3,44 @@ package config
import (
"fmt"
"io/ioutil"
"net"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestLoad(t *testing.T) {
// Basically just testing that injection of the extra
// source works.
devMode := true
builderOpts := BuilderOpts{
// putting this in dev mode so that the config validates
// without having to specify a data directory
DevMode: &devMode,
}
cfg, warnings, err := Load(builderOpts, FileSource{
Name: "test",
Format: "hcl",
Data: `node_name = "hobbiton"`,
},
FileSource{
Name: "overrides",
Format: "json",
Data: `{"check_reap_interval": "1ms"}`,
})
require.NoError(t, err)
require.Empty(t, warnings)
require.NotNil(t, cfg)
require.Equal(t, "hobbiton", cfg.NodeName)
require.Equal(t, 1*time.Millisecond, cfg.CheckReapInterval)
}
func TestShouldParseFile(t *testing.T) {
var testcases = []struct {
filename string
@ -38,10 +69,10 @@ func TestNewBuilder_PopulatesSourcesFromConfigFiles(t *testing.T) {
require.NoError(t, err)
expected := []Source{
{Name: paths[0], Format: "hcl", Data: "content a"},
{Name: paths[1], Format: "json", Data: "content b"},
{Name: filepath.Join(paths[3], "a.hcl"), Format: "hcl", Data: "content a"},
{Name: filepath.Join(paths[3], "b.json"), Format: "json", Data: "content b"},
FileSource{Name: paths[0], Format: "hcl", Data: "content a"},
FileSource{Name: paths[1], Format: "json", Data: "content b"},
FileSource{Name: filepath.Join(paths[3], "a.hcl"), Format: "hcl", Data: "content a"},
FileSource{Name: filepath.Join(paths[3], "b.json"), Format: "json", Data: "content b"},
}
require.Equal(t, expected, b.Sources)
require.Len(t, b.Warnings, 2)
@ -54,12 +85,12 @@ func TestNewBuilder_PopulatesSourcesFromConfigFiles_WithConfigFormat(t *testing.
require.NoError(t, err)
expected := []Source{
{Name: paths[0], Format: "hcl", Data: "content a"},
{Name: paths[1], Format: "hcl", Data: "content b"},
{Name: paths[2], Format: "hcl", Data: "content c"},
{Name: filepath.Join(paths[3], "a.hcl"), Format: "hcl", Data: "content a"},
{Name: filepath.Join(paths[3], "b.json"), Format: "hcl", Data: "content b"},
{Name: filepath.Join(paths[3], "c.yaml"), Format: "hcl", Data: "content c"},
FileSource{Name: paths[0], Format: "hcl", Data: "content a"},
FileSource{Name: paths[1], Format: "hcl", Data: "content b"},
FileSource{Name: paths[2], Format: "hcl", Data: "content c"},
FileSource{Name: filepath.Join(paths[3], "a.hcl"), Format: "hcl", Data: "content a"},
FileSource{Name: filepath.Join(paths[3], "b.json"), Format: "hcl", Data: "content b"},
FileSource{Name: filepath.Join(paths[3], "c.yaml"), Format: "hcl", Data: "content c"},
}
require.Equal(t, expected, b.Sources)
}
@ -92,3 +123,62 @@ func setupConfigFiles(t *testing.T) []string {
subpath,
}
}
func TestBuilder_BuildAndValidate_NodeName(t *testing.T) {
type testCase struct {
name string
nodeName string
expectedWarn string
}
fn := func(t *testing.T, tc testCase) {
b, err := NewBuilder(BuilderOpts{
Config: Config{
NodeName: pString(tc.nodeName),
DataDir: pString("dir"),
},
})
patchBuilderShims(b)
require.NoError(t, err)
_, err = b.BuildAndValidate()
require.NoError(t, err)
require.Len(t, b.Warnings, 1)
require.Contains(t, b.Warnings[0], tc.expectedWarn)
}
var testCases = []testCase{
{
name: "invalid character - unicode",
nodeName: "🐼",
expectedWarn: `Node name "🐼" will not be discoverable via DNS due to invalid characters`,
},
{
name: "invalid character - slash",
nodeName: "thing/other/ok",
expectedWarn: `Node name "thing/other/ok" will not be discoverable via DNS due to invalid characters`,
},
{
name: "too long",
nodeName: strings.Repeat("a", 66),
expectedWarn: "due to it being too long.",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
fn(t, tc)
})
}
}
func patchBuilderShims(b *Builder) {
b.hostname = func() (string, error) {
return "thehostname", nil
}
b.getPrivateIPv4 = func() ([]*net.IPAddr, error) {
return []*net.IPAddr{ipAddr("10.0.0.1")}, nil
}
b.getPublicIPv6 = func() ([]*net.IPAddr, error) {
return []*net.IPAddr{ipAddr("dead:beef::1")}, nil
}
}

View File

@ -14,27 +14,51 @@ const (
SerfWANKeyring = "serf/remote.keyring"
)
type Source struct {
// Source parses configuration from some source.
type Source interface {
// Source returns an identifier for the Source that can be used in error message
Source() string
// Parse a configuration and return the result.
Parse() (Config, mapstructure.Metadata, error)
}
// ErrNoData indicates to Builder.Build that the source contained no data, and
// it can be skipped.
var ErrNoData = fmt.Errorf("config source contained no data")
// FileSource implements Source and parses a config from a file.
type FileSource struct {
Name string
Format string
Data string
}
// Parse parses a config fragment in either JSON or HCL format.
func Parse(data string, format string) (c Config, md mapstructure.Metadata, err error) {
var raw map[string]interface{}
switch format {
case "json":
err = json.Unmarshal([]byte(data), &raw)
case "hcl":
err = hcl.Decode(&raw, data)
default:
err = fmt.Errorf("invalid format: %s", format)
}
if err != nil {
return Config{}, mapstructure.Metadata{}, err
func (f FileSource) Source() string {
return f.Name
}
// Parse a config file in either JSON or HCL format.
func (f FileSource) Parse() (Config, mapstructure.Metadata, error) {
if f.Name == "" || f.Data == "" {
return Config{}, mapstructure.Metadata{}, ErrNoData
}
var raw map[string]interface{}
var err error
var md mapstructure.Metadata
switch f.Format {
case "json":
err = json.Unmarshal([]byte(f.Data), &raw)
case "hcl":
err = hcl.Decode(&raw, f.Data)
default:
err = fmt.Errorf("invalid format: %s", f.Format)
}
if err != nil {
return Config{}, md, err
}
var c Config
d, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
DecodeHook: mapstructure.ComposeDecodeHookFunc(
// decode.HookWeakDecodeFromSlice is only necessary when reading from
@ -49,15 +73,29 @@ func Parse(data string, format string) (c Config, md mapstructure.Metadata, err
Result: &c,
})
if err != nil {
return Config{}, mapstructure.Metadata{}, err
return Config{}, md, err
}
if err := d.Decode(raw); err != nil {
return Config{}, mapstructure.Metadata{}, err
return Config{}, md, err
}
return c, md, nil
}
// LiteralSource implements Source and returns an existing Config struct.
type LiteralSource struct {
Name string
Config Config
}
func (l LiteralSource) Source() string {
return l.Name
}
func (l LiteralSource) Parse() (Config, mapstructure.Metadata, error) {
return l.Config, mapstructure.Metadata{}, nil
}
// Cache is the tunning configuration for cache, values are optional
type Cache struct {
// EntryFetchMaxBurst max burst size of RateLimit for a single cache entry

View File

@ -12,9 +12,7 @@ import (
// DefaultSource is the default agent configuration.
// This needs to be merged first in the head.
// todo(fs): The values are sourced from multiple sources.
// todo(fs): IMO, this should be the definitive default for all configurable values
// todo(fs): and whatever is in here should clobber every default value. Hence, no sourcing.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func DefaultSource() Source {
cfg := consul.DefaultConfig()
serfLAN := cfg.SerfLANConfig.MemberlistConfig
@ -25,7 +23,7 @@ func DefaultSource() Source {
// acl stanza for now we need to be able to detect the new entries not being set (not
// just set to the defaults here) so that we can use the old entries. So the true
// default still needs to reside in the original config values
return Source{
return FileSource{
Name: "default",
Format: "hcl",
Data: `
@ -131,8 +129,9 @@ func DefaultSource() Source {
// DevSource is the additional default configuration for dev mode.
// This should be merged in the head after the default configuration.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func DevSource() Source {
return Source{
return FileSource{
Name: "dev",
Format: "hcl",
Data: `
@ -171,8 +170,9 @@ func DevSource() Source {
// NonUserSource contains the values the user cannot configure.
// This needs to be merged in the tail.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func NonUserSource() Source {
return Source{
return FileSource{
Name: "non-user",
Format: "hcl",
Data: `
@ -203,8 +203,9 @@ func NonUserSource() Source {
// VersionSource creates a config source for the version parameters.
// This should be merged in the tail since these values are not
// user configurable.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func VersionSource(rev, ver, verPre string) Source {
return Source{
return FileSource{
Name: "version",
Format: "hcl",
Data: fmt.Sprintf(`revision = %q version = %q version_prerelease = %q`, rev, ver, verPre),
@ -219,10 +220,11 @@ func DefaultVersionSource() Source {
// DefaultConsulSource returns the default configuration for the consul agent.
// This should be merged in the tail since these values are not user configurable.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func DefaultConsulSource() Source {
cfg := consul.DefaultConfig()
raft := cfg.RaftConfig
return Source{
return FileSource{
Name: "consul",
Format: "hcl",
Data: `
@ -247,8 +249,9 @@ func DefaultConsulSource() Source {
// DevConsulSource returns the consul agent configuration for the dev mode.
// This should be merged in the tail after the DefaultConsulSource.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func DevConsulSource() Source {
return Source{
return FileSource{
Name: "consul-dev",
Format: "hcl",
Data: `

View File

@ -5,8 +5,9 @@ package config
// DefaultEnterpriseSource returns the consul agent configuration for enterprise mode.
// These can be overridden by the user and therefore this source should be merged in the
// head and processed before user configuration.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func DefaultEnterpriseSource() Source {
return Source{
return FileSource{
Name: "enterprise-defaults",
Format: "hcl",
Data: ``,
@ -15,8 +16,9 @@ func DefaultEnterpriseSource() Source {
// OverrideEnterpriseSource returns the consul agent configuration for the enterprise mode.
// This should be merged in the tail after the DefaultConsulSource.
// TODO: return a LiteralSource (no decoding) instead of a FileSource
func OverrideEnterpriseSource() Source {
return Source{
return FileSource{
Name: "enterprise-overrides",
Format: "hcl",
Data: ``,

View File

@ -9,8 +9,10 @@ import (
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-uuid"
@ -62,19 +64,7 @@ type RuntimeConfig struct {
// hcl: acl.enabled = boolean
ACLsEnabled bool
// ACLAgentMasterToken is a special token that has full read and write
// privileges for this agent, and can be used to call agent endpoints
// when no servers are available.
//
// hcl: acl.tokens.agent_master = string
ACLAgentMasterToken string
// ACLAgentToken is the default token used to make requests for the agent
// itself, such as for registering itself with the catalog. If not
// configured, the 'acl_token' will be used.
//
// hcl: acl.tokens.agent = string
ACLAgentToken string
ACLTokens token.Config
// ACLDatacenter is the central datacenter that holds authoritative
// ACL records. This must be the same for the entire cluster.
@ -122,16 +112,6 @@ type RuntimeConfig struct {
// hcl: acl.tokens.master = string
ACLMasterToken string
// ACLReplicationToken is used to replicate data locally from the
// PrimaryDatacenter. Replication is only available on servers in
// datacenters other than the PrimaryDatacenter
//
// DEPRECATED (ACL-Legacy-Compat): Setting this to a non-empty value
// also enables legacy ACL replication if ACLs are enabled and in legacy mode.
//
// hcl: acl.tokens.replication = string
ACLReplicationToken string
// ACLtokenReplication is used to indicate that both tokens and policies
// should be replicated instead of just policies
//
@ -156,16 +136,6 @@ type RuntimeConfig struct {
// hcl: acl.role_ttl = "duration"
ACLRoleTTL time.Duration
// ACLToken is the default token used to make requests if a per-request
// token is not provided. If not configured the 'anonymous' token is used.
//
// hcl: acl.tokens.default = string
ACLToken string
// ACLEnableTokenPersistence determines whether or not tokens set via the agent HTTP API
// should be persisted to disk and reloaded when an agent restarts.
ACLEnableTokenPersistence bool
// AutopilotCleanupDeadServers enables the automatic cleanup of dead servers when new ones
// are added to the peer list. Defaults to true.
//
@ -724,13 +694,6 @@ type RuntimeConfig struct {
// flag: -enable-script-checks
EnableRemoteScriptChecks bool
// EnableSyslog is used to also tee all the logs over to syslog. Only supported
// on linux and OSX. Other platforms will generate an error.
//
// hcl: enable_syslog = (true|false)
// flag: -syslog
EnableSyslog bool
// EnableUI enables the statically-compiled assets for the Consul web UI and
// serves them at the default /ui/ endpoint automatically.
//
@ -858,40 +821,8 @@ type RuntimeConfig struct {
// hcl: leave_on_terminate = (true|false)
LeaveOnTerm bool
// LogLevel is the level of the logs to write. Defaults to "INFO".
//
// hcl: log_level = string
LogLevel string
// LogJSON controls whether to output logs as structured JSON. Defaults to false.
//
// hcl: log_json = (true|false)
// flag: -log-json
LogJSON bool
// LogFile is the path to the file where the logs get written to. Defaults to empty string.
//
// hcl: log_file = string
// flags: -log-file string
LogFile string
// LogRotateDuration is the time configured to rotate logs based on time
//
// hcl: log_rotate_duration = string
// flags: -log-rotate-duration string
LogRotateDuration time.Duration
// LogRotateBytes is the time configured to rotate logs based on bytes written
//
// hcl: log_rotate_bytes = int
// flags: -log-rotate-bytes int
LogRotateBytes int
// LogRotateMaxFiles is the maximum number of log file archives to keep
//
// hcl: log_rotate_max_files = int
// flags: -log-rotate-max-files int
LogRotateMaxFiles int
// Logging configuration used to initialize agent logging.
Logging logging.Config
// MaxQueryTime is the maximum amount of time a blocking query can wait
// before Consul will force a response. Consul applies jitter to the wait
@ -1422,12 +1353,6 @@ type RuntimeConfig struct {
// flag: -join-wan string -join-wan string
StartJoinAddrsWAN []string
// SyslogFacility is used to control where the syslog messages go
// By default, goes to LOCAL0
//
// hcl: syslog_facility = string
SyslogFacility string
// TLSCipherSuites is used to specify the list of supported ciphersuites.
//
// The values should be a list of the following values:

View File

@ -6,11 +6,9 @@ var entMetaJSON = `{}`
var entRuntimeConfigSanitize = `{}`
var entFullDNSJSONConfig = ``
var entTokenConfigSanitize = `"EnterpriseConfig": {},`
var entFullDNSHCLConfig = ``
var entFullRuntimeConfig = EnterpriseRuntimeConfig{}
func entFullRuntimeConfig(rt *RuntimeConfig) {}
var enterpriseNonVotingServerWarnings []string = []string{enterpriseConfigKeyError{key: "non_voting_server"}.Error()}

View File

@ -21,7 +21,9 @@ import (
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/consul/types"
"github.com/stretchr/testify/require"
@ -48,9 +50,10 @@ type configTest struct {
// should check one option at a time if possible and should use generic
// values, e.g. 'a' or 1 instead of 'servicex' or 3306.
func TestConfigFlagsAndEdgecases(t *testing.T) {
func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) {
dataDir := testutil.TempDir(t, "consul")
defer os.RemoveAll(dataDir)
defaultEntMeta := structs.DefaultEnterpriseMeta()
tests := []configTest{
// ------------------------------------------------------------
@ -289,7 +292,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
rt.EnableDebug = true
rt.EnableUI = true
rt.LeaveOnTerm = false
rt.LogLevel = "DEBUG"
rt.Logging.LogLevel = "DEBUG"
rt.RPCAdvertiseAddr = tcpAddr("127.0.0.1:8300")
rt.RPCBindAddr = tcpAddr("127.0.0.1:8300")
rt.SerfAdvertiseAddrLAN = tcpAddr("127.0.0.1:8301")
@ -424,6 +427,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
rt.EnableRemoteScriptChecks = true
rt.DataDir = dataDir
},
warns: []string{remoteScriptCheckSecurityWarning},
},
{
desc: "-encrypt",
@ -490,13 +494,6 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
writeFile(filepath.Join(dataDir, "conf"), []byte(`datacenter = "a"`))
},
},
{
desc: "-config-format invalid",
args: []string{
`-config-format=foobar`,
},
err: "-config-format must be either 'hcl' or 'json'",
},
{
desc: "-http-port",
args: []string{
@ -540,7 +537,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
`-data-dir=` + dataDir,
},
patch: func(rt *RuntimeConfig) {
rt.LogLevel = "a"
rt.Logging.LogLevel = "a"
rt.DataDir = dataDir
},
},
@ -551,7 +548,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
`-data-dir=` + dataDir,
},
patch: func(rt *RuntimeConfig) {
rt.LogJSON = true
rt.Logging.LogJSON = true
rt.DataDir = dataDir
},
},
@ -564,7 +561,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
json: []string{`{ "log_rotate_max_files": 2 }`},
hcl: []string{`log_rotate_max_files = 2`},
patch: func(rt *RuntimeConfig) {
rt.LogRotateMaxFiles = 2
rt.Logging.LogRotateMaxFiles = 2
rt.DataDir = dataDir
},
},
@ -842,7 +839,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
`-data-dir=` + dataDir,
},
patch: func(rt *RuntimeConfig) {
rt.EnableSyslog = true
rt.Logging.EnableSyslog = true
rt.DataDir = dataDir
},
},
@ -1584,7 +1581,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
args: []string{`-data-dir=` + dataDir},
json: []string{`this is not JSON`},
hcl: []string{`*** 0123 this is not HCL`},
err: "Error parsing",
err: "failed to parse",
},
{
desc: "datacenter is lower-cased",
@ -1617,7 +1614,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
json: []string{`{ "acl_replication_token": "a" }`},
hcl: []string{`acl_replication_token = "a"`},
patch: func(rt *RuntimeConfig) {
rt.ACLReplicationToken = "a"
rt.ACLTokens.ACLReplicationToken = "a"
rt.ACLTokenReplication = true
rt.DataDir = dataDir
},
@ -3292,17 +3289,15 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
err: "config_entries.bootstrap[0]: invalid config entry kind: foo",
},
{
desc: "ConfigEntry bootstrap invalid",
desc: "ConfigEntry bootstrap invalid service-defaults",
args: []string{`-data-dir=` + dataDir},
json: []string{`{
"config_entries": {
"bootstrap": [
{
"kind": "proxy-defaults",
"name": "invalid-name",
"config": {
"foo": "bar"
}
"kind": "service-defaults",
"name": "web",
"made_up_key": "blah"
}
]
}
@ -3310,14 +3305,12 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
hcl: []string{`
config_entries {
bootstrap {
kind = "proxy-defaults"
name = "invalid-name"
config {
foo = "bar"
}
kind = "service-defaults"
name = "web"
made_up_key = "blah"
}
}`},
err: "config_entries.bootstrap[0]: invalid name (\"invalid-name\"), only \"global\" is supported",
err: "config_entries.bootstrap[0]: 1 error occurred:\n\t* invalid config key \"made_up_key\"\n\n",
},
{
desc: "ConfigEntry bootstrap proxy-defaults (snake-case)",
@ -3363,6 +3356,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
&structs.ProxyConfigEntry{
Kind: structs.ProxyDefaults,
Name: structs.ProxyConfigGlobal,
EnterpriseMeta: *defaultEntMeta,
Config: map[string]interface{}{
"bar": "abc",
"moreconfig": map[string]interface{}{
@ -3420,6 +3414,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
&structs.ProxyConfigEntry{
Kind: structs.ProxyDefaults,
Name: structs.ProxyConfigGlobal,
EnterpriseMeta: *defaultEntMeta,
Config: map[string]interface{}{
"bar": "abc",
"moreconfig": map[string]interface{}{
@ -3442,6 +3437,10 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
{
"kind": "service-defaults",
"name": "web",
"meta" : {
"foo": "bar",
"gir": "zim"
},
"protocol": "http",
"external_sni": "abc-123",
"mesh_gateway": {
@ -3456,6 +3455,10 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
bootstrap {
kind = "service-defaults"
name = "web"
meta {
"foo" = "bar"
"gir" = "zim"
}
protocol = "http"
external_sni = "abc-123"
mesh_gateway {
@ -3469,6 +3472,11 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
&structs.ServiceConfigEntry{
Kind: structs.ServiceDefaults,
Name: "web",
Meta: map[string]string{
"foo": "bar",
"gir": "zim",
},
EnterpriseMeta: *defaultEntMeta,
Protocol: "http",
ExternalSNI: "abc-123",
MeshGateway: structs.MeshGatewayConfig{
@ -3487,6 +3495,10 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
{
"Kind": "service-defaults",
"Name": "web",
"Meta" : {
"foo": "bar",
"gir": "zim"
},
"Protocol": "http",
"ExternalSNI": "abc-123",
"MeshGateway": {
@ -3501,6 +3513,10 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
bootstrap {
Kind = "service-defaults"
Name = "web"
Meta {
"foo" = "bar"
"gir" = "zim"
}
Protocol = "http"
ExternalSNI = "abc-123"
MeshGateway {
@ -3514,6 +3530,11 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
&structs.ServiceConfigEntry{
Kind: structs.ServiceDefaults,
Name: "web",
Meta: map[string]string{
"foo": "bar",
"gir": "zim",
},
EnterpriseMeta: *defaultEntMeta,
Protocol: "http",
ExternalSNI: "abc-123",
MeshGateway: structs.MeshGatewayConfig{
@ -3532,6 +3553,10 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
{
"kind": "service-router",
"name": "main",
"meta" : {
"foo": "bar",
"gir": "zim"
},
"routes": [
{
"match": {
@ -3616,6 +3641,10 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
bootstrap {
kind = "service-router"
name = "main"
meta {
"foo" = "bar"
"gir" = "zim"
}
routes = [
{
match {
@ -3699,6 +3728,11 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
&structs.ServiceRouterConfigEntry{
Kind: structs.ServiceRouter,
Name: "main",
Meta: map[string]string{
"foo": "bar",
"gir": "zim",
},
EnterpriseMeta: *defaultEntMeta,
Routes: []structs.ServiceRoute{
{
Match: &structs.ServiceRouteMatch{
@ -3778,6 +3812,8 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
}
},
},
// TODO(rb): add in missing tests for ingress-gateway (snake + camel)
// TODO(rb): add in missing tests for terminating-gateway (snake + camel)
///////////////////////////////////
// Defaults sanity checks
@ -3986,6 +4022,7 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
"Both an intro token and intro token file are set. The intro token will be used instead of the file",
},
patch: func(rt *RuntimeConfig) {
rt.ConnectEnabled = true
rt.AutoConfig.Enabled = true
rt.AutoConfig.IntroToken = "blah"
rt.AutoConfig.IntroTokenFile = "blah"
@ -4080,6 +4117,48 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
err: `auto_config.authorization.static has invalid configuration: exactly one of 'JWTValidationPubKeys', 'JWKSURL', or 'OIDCDiscoveryURL' must be set for type "jwt"`,
},
{
desc: "auto config authorizer require token replication in secondary",
args: []string{
`-data-dir=` + dataDir,
`-server`,
},
hcl: []string{`
primary_datacenter = "otherdc"
acl {
enabled = true
}
auto_config {
authorization {
enabled = true
static {
jwks_url = "https://fake.uri.local"
oidc_discovery_url = "https://fake.uri.local"
}
}
}
cert_file = "foo"
`},
json: []string{`
{
"primary_datacenter": "otherdc",
"acl": {
"enabled": true
},
"auto_config": {
"authorization": {
"enabled": true,
"static": {
"jwks_url": "https://fake.uri.local",
"oidc_discovery_url": "https://fake.uri.local"
}
}
},
"cert_file": "foo"
}`},
err: `Enabling auto-config authorization (auto_config.authorization.enabled) in non primary datacenters with ACLs enabled (acl.enabled) requires also enabling ACL token replication (acl.enable_token_replication)`,
},
{
desc: "auto config authorizer invalid claim assertion",
args: []string{
@ -4245,38 +4324,27 @@ func testConfig(t *testing.T, tests []configTest, dataDir string) {
t.Fatal("NewBuilder", err)
}
// mock the hostname function unless a mock is provided
b.Hostname = tt.hostname
if b.Hostname == nil {
b.Hostname = func() (string, error) { return "nodex", nil }
patchBuilderShims(b)
if tt.hostname != nil {
b.hostname = tt.hostname
}
// mock the ip address detection
privatev4 := tt.privatev4
if privatev4 == nil {
privatev4 = func() ([]*net.IPAddr, error) {
return []*net.IPAddr{ipAddr("10.0.0.1")}, nil
if tt.privatev4 != nil {
b.getPrivateIPv4 = tt.privatev4
}
if tt.publicv6 != nil {
b.getPublicIPv6 = tt.publicv6
}
publicv6 := tt.publicv6
if publicv6 == nil {
publicv6 = func() ([]*net.IPAddr, error) {
return []*net.IPAddr{ipAddr("dead:beef::1")}, nil
}
}
b.GetPrivateIPv4 = privatev4
b.GetPublicIPv6 = publicv6
// read the source fragements
for i, data := range srcs {
b.Sources = append(b.Sources, Source{
b.Sources = append(b.Sources, FileSource{
Name: fmt.Sprintf("src-%d.%s", i, format),
Format: format,
Data: data,
})
}
for i, data := range tails {
b.Tail = append(b.Tail, Source{
b.Tail = append(b.Tail, FileSource{
Name: fmt.Sprintf("tail-%d.%s", i, format),
Format: format,
Data: data,
@ -4297,12 +4365,10 @@ func testConfig(t *testing.T, tests []configTest, dataDir string) {
if err != nil && tt.err != "" && !strings.Contains(err.Error(), tt.err) {
t.Fatalf("error %q does not contain %q", err.Error(), tt.err)
}
require.Equal(t, tt.warns, b.Warnings, "warnings")
// stop if we expected an error
if tt.err != "" {
return
}
require.Equal(t, tt.warns, b.Warnings, "warnings")
// build a default configuration, then patch the fields we expect to change
// and compare it with the generated configuration. Since the expected
@ -4311,9 +4377,9 @@ func testConfig(t *testing.T, tests []configTest, dataDir string) {
if err != nil {
t.Fatal(err)
}
x.Hostname = b.Hostname
x.GetPrivateIPv4 = func() ([]*net.IPAddr, error) { return []*net.IPAddr{ipAddr("10.0.0.1")}, nil }
x.GetPublicIPv6 = func() ([]*net.IPAddr, error) { return []*net.IPAddr{ipAddr("dead:beef::1")}, nil }
x.hostname = b.hostname
x.getPrivateIPv4 = func() ([]*net.IPAddr, error) { return []*net.IPAddr{ipAddr("10.0.0.1")}, nil }
x.getPublicIPv6 = func() ([]*net.IPAddr, error) { return []*net.IPAddr{ipAddr("dead:beef::1")}, nil }
expected, err := x.Build()
if err != nil {
t.Fatalf("build default failed: %s", err)
@ -4321,12 +4387,25 @@ func testConfig(t *testing.T, tests []configTest, dataDir string) {
if tt.patch != nil {
tt.patch(&expected)
}
// both DataDir fields should always be the same, so test for the
// invariant, and than updated the expected, so that every test
// case does not need to set this field.
require.Equal(t, actual.DataDir, actual.ACLTokens.DataDir)
expected.ACLTokens.DataDir = actual.ACLTokens.DataDir
require.Equal(t, expected, actual)
})
}
}
}
func TestNewBuilder_InvalidConfigFormat(t *testing.T) {
_, err := NewBuilder(BuilderOpts{ConfigFormat: "yaml"})
require.Error(t, err)
require.Contains(t, err.Error(), "-config-format must be either 'hcl' or 'json'")
}
// TestFullConfig tests the conversion from a fully populated JSON or
// HCL config file to a RuntimeConfig structure. All fields must be set
// to a unique non-zero value.
@ -4344,13 +4423,14 @@ func testConfig(t *testing.T, tests []configTest, dataDir string) {
//
func TestFullConfig(t *testing.T) {
dataDir := testutil.TempDir(t, "consul")
defer os.RemoveAll(dataDir)
cidr := func(s string) *net.IPNet {
_, n, _ := net.ParseCIDR(s)
return n
}
defaultEntMeta := structs.DefaultEnterpriseMeta()
flagSrc := []string{`-dev`}
src := map[string]string{
"json": `{
@ -5684,7 +5764,7 @@ func TestFullConfig(t *testing.T) {
tail := map[string][]Source{
"json": {
{
FileSource{
Name: "tail.non-user.json",
Format: "json",
Data: `
@ -5703,7 +5783,7 @@ func TestFullConfig(t *testing.T) {
"sync_coordinate_rate_target": 137.81
}`,
},
{
FileSource{
Name: "tail.consul.json",
Format: "json",
Data: `
@ -5727,7 +5807,7 @@ func TestFullConfig(t *testing.T) {
},
},
"hcl": {
{
FileSource{
Name: "tail.non-user.hcl",
Format: "hcl",
Data: `
@ -5745,7 +5825,7 @@ func TestFullConfig(t *testing.T) {
sync_coordinate_rate_target = 137.81
`,
},
{
FileSource{
Name: "tail.consul.hcl",
Format: "hcl",
Data: `
@ -5807,20 +5887,24 @@ func TestFullConfig(t *testing.T) {
// user configurable values
ACLAgentMasterToken: "64fd0e08",
ACLTokens: token.Config{
EnablePersistence: true,
DataDir: dataDir,
ACLDefaultToken: "418fdff1",
ACLAgentToken: "bed2377c",
ACLAgentMasterToken: "64fd0e08",
ACLReplicationToken: "5795983a",
},
ACLsEnabled: true,
ACLDatacenter: "ejtmd43d",
ACLDefaultPolicy: "72c2e7a0",
ACLDownPolicy: "03eb2aee",
ACLEnableKeyListPolicy: true,
ACLEnableTokenPersistence: true,
ACLMasterToken: "8a19ac27",
ACLReplicationToken: "5795983a",
ACLTokenTTL: 3321 * time.Second,
ACLPolicyTTL: 1123 * time.Second,
ACLRoleTTL: 9876 * time.Second,
ACLToken: "418fdff1",
ACLTokenReplication: true,
AdvertiseAddrLAN: ipAddr("17.99.29.16"),
AdvertiseAddrWAN: ipAddr("78.63.37.19"),
@ -5926,6 +6010,7 @@ func TestFullConfig(t *testing.T) {
&structs.ProxyConfigEntry{
Kind: structs.ProxyDefaults,
Name: structs.ProxyConfigGlobal,
EnterpriseMeta: *defaultEntMeta,
Config: map[string]interface{}{
"foo": "bar",
// has to be a float due to being a map[string]interface
@ -6025,7 +6110,6 @@ func TestFullConfig(t *testing.T) {
EnableDebug: true,
EnableRemoteScriptChecks: true,
EnableLocalScriptChecks: true,
EnableSyslog: true,
EnableUI: true,
EncryptKey: "A4wELWqH",
EncryptVerifyIncoming: true,
@ -6046,8 +6130,12 @@ func TestFullConfig(t *testing.T) {
KVMaxValueSize: 1234567800000000,
LeaveDrainTime: 8265 * time.Second,
LeaveOnTerm: true,
Logging: logging.Config{
LogLevel: "k1zo9Spt",
LogJSON: true,
EnableSyslog: true,
SyslogFacility: "hHv79Uia",
},
MaxQueryTime: 18237 * time.Second,
NodeID: types.NodeID("AsUIlw99"),
NodeMeta: map[string]string{"5mgGQMBk": "mJLtVMSG", "A7ynFMJB": "0Nx6RGab"},
@ -6383,7 +6471,6 @@ func TestFullConfig(t *testing.T) {
SkipLeaveOnInt: true,
StartJoinAddrsLAN: []string{"LR3hGDoG", "MwVpZ4Up"},
StartJoinAddrsWAN: []string{"EbFSc3nA", "kwXTh623"},
SyslogFacility: "hHv79Uia",
Telemetry: lib.TelemetryConfig{
CirconusAPIApp: "p4QOTe9j",
CirconusAPIToken: "E3j35V23",
@ -6446,9 +6533,10 @@ func TestFullConfig(t *testing.T) {
"args": []interface{}{"dltjDJ2a", "flEa7C2d"},
},
},
EnterpriseRuntimeConfig: entFullRuntimeConfig,
}
entFullRuntimeConfig(&want)
warns := []string{
`The 'acl_datacenter' field is deprecated. Use the 'primary_datacenter' field instead.`,
`bootstrap_expect > 0: expecting 53 servers`,
@ -6482,7 +6570,7 @@ func TestFullConfig(t *testing.T) {
if err != nil {
t.Fatalf("NewBuilder: %s", err)
}
b.Sources = append(b.Sources, Source{Name: "full." + format, Data: data, Format: format})
b.Sources = append(b.Sources, FileSource{Name: "full." + format, Data: data, Format: format})
b.Tail = append(b.Tail, tail[format]...)
b.Tail = append(b.Tail, VersionSource("JNtPSav3", "R909Hblt", "ZT1JOQLn"))
@ -6765,21 +6853,25 @@ func TestSanitize(t *testing.T) {
}
rtJSON := `{
"ACLTokens": {
` + entTokenConfigSanitize + `
"ACLAgentMasterToken": "hidden",
"ACLAgentToken": "hidden",
"ACLDefaultToken": "hidden",
"ACLReplicationToken": "hidden",
"DataDir": "",
"EnablePersistence": false
},
"ACLDatacenter": "",
"ACLDefaultPolicy": "",
"ACLDisabledTTL": "0s",
"ACLDownPolicy": "",
"ACLEnableKeyListPolicy": false,
"ACLEnableTokenPersistence": false,
"ACLMasterToken": "hidden",
"ACLPolicyTTL": "0s",
"ACLReplicationToken": "hidden",
"ACLRoleTTL": "0s",
"ACLTokenReplication": false,
"ACLTokenTTL": "0s",
"ACLToken": "hidden",
"ACLsEnabled": false,
"AEInterval": "0s",
"AdvertiseAddrLAN": "",
@ -6913,7 +7005,6 @@ func TestSanitize(t *testing.T) {
"EnableCentralServiceConfig": false,
"EnableLocalScriptChecks": false,
"EnableRemoteScriptChecks": false,
"EnableSyslog": false,
"EnableUI": false,
"EncryptKey": "hidden",
"EncryptVerifyIncoming": false,
@ -6939,12 +7030,17 @@ func TestSanitize(t *testing.T) {
"KVMaxValueSize": 1234567800000000,
"LeaveDrainTime": "0s",
"LeaveOnTerm": false,
"Logging": {
"EnableSyslog": false,
"LogLevel": "",
"LogJSON": false,
"LogFile": "",
"LogFilePath": "",
"LogRotateBytes": 0,
"LogRotateDuration": "0s",
"LogRotateMaxFiles": 0,
"Name": "",
"SyslogFacility": ""
},
"MaxQueryTime": "0s",
"NodeID": "",
"NodeMeta": {},
@ -7051,7 +7147,6 @@ func TestSanitize(t *testing.T) {
"StartJoinAddrsWAN": [],
"SyncCoordinateIntervalMin": "0s",
"SyncCoordinateRateTarget": 0,
"SyslogFacility": "",
"TLSCipherSuites": [],
"TLSMinVersion": "",
"TLSPreferServerCipherSuites": false,
@ -7072,6 +7167,7 @@ func TestSanitize(t *testing.T) {
"CirconusCheckTags": "",
"CirconusSubmissionInterval": "",
"CirconusSubmissionURL": "",
"Disable": false,
"DisableHostname": false,
"DogstatsdAddr": "",
"DogstatsdTags": [],

View File

@ -3,7 +3,6 @@
package config
import (
"os"
"testing"
"github.com/hashicorp/consul/sdk/testutil"
@ -11,7 +10,6 @@ import (
func TestSegments(t *testing.T) {
dataDir := testutil.TempDir(t, "consul")
defer os.RemoveAll(dataDir)
tests := []configTest{
{

View File

@ -2143,7 +2143,7 @@ func vetNodeTxnOp(op *structs.TxnNodeOp, rule acl.Authorizer) error {
var authzContext acl.AuthorizerContext
op.FillAuthzContext(&authzContext)
if rule != nil && rule.NodeWrite(op.Node.Node, &authzContext) != acl.Allow {
if rule.NodeWrite(op.Node.Node, &authzContext) != acl.Allow {
return acl.ErrPermissionDenied
}

View File

@ -1225,9 +1225,7 @@ func (a *ACL) PolicyDelete(args *structs.ACLPolicyDeleteRequest, reply *string)
return respErr
}
if policy != nil {
*reply = policy.Name
}
return nil
}
@ -1692,9 +1690,7 @@ func (a *ACL) RoleDelete(args *structs.ACLRoleDeleteRequest, reply *string) erro
return respErr
}
if role != nil {
*reply = role.Name
}
return nil
}
@ -2525,9 +2521,7 @@ func (a *ACL) Logout(args *structs.ACLLogoutRequest, reply *bool) error {
}
// Purge the identity from the cache to prevent using the previous definition of the identity
if token != nil {
a.srv.acls.cache.RemoveIdentity(tokenSecretCacheID(token.SecretID))
}
if respErr, ok := resp.(error); ok {
return respErr

View File

@ -229,7 +229,8 @@ func (r *aclPolicyReplicator) UpdateLocalBatch(ctx context.Context, srv *Server,
if err != nil {
return err
}
if respErr, ok := resp.(error); ok && err != nil {
if respErr, ok := resp.(error); ok {
return respErr
}
@ -336,9 +337,11 @@ func (r *aclRoleReplicator) DeleteLocalBatch(srv *Server, batch []string) error
if err != nil {
return err
}
if respErr, ok := resp.(error); ok && err != nil {
if respErr, ok := resp.(error); ok {
return respErr
}
return nil
}
@ -364,7 +367,8 @@ func (r *aclRoleReplicator) UpdateLocalBatch(ctx context.Context, srv *Server, s
if err != nil {
return err
}
if respErr, ok := resp.(error); ok && err != nil {
if respErr, ok := resp.(error); ok {
return respErr
}

View File

@ -147,10 +147,10 @@ func (s *Server) LocalTokensEnabled() bool {
}
if !s.config.ACLTokenReplication || s.tokens.ReplicationToken() == "" {
// token replication is off so local tokens are disabled
return false
}
// token replication is off so local tokens are disabled
return true
}

View File

@ -1639,8 +1639,8 @@ func TestACLResolver_Client(t *testing.T) {
// effectively disable caching - so the only way we end up with 1 token read is if they were
// being resolved concurrently
config.Config.ACLTokenTTL = 0 * time.Second
config.Config.ACLPolicyTTL = 30 * time.Millisecond
config.Config.ACLRoleTTL = 30 * time.Millisecond
config.Config.ACLPolicyTTL = 30 * time.Second
config.Config.ACLRoleTTL = 30 * time.Second
config.Config.ACLDownPolicy = "extend-cache"
})

View File

@ -7,7 +7,6 @@ import (
"io/ioutil"
"math/rand"
"net"
"os"
"path"
"testing"
"time"
@ -394,8 +393,6 @@ func TestAutoConfig_updateTLSSettingsInConfig(t *testing.T) {
require.NoError(t, err)
dir := testutil.TempDir(t, "auto-config-tls-settings")
t.Cleanup(func() { os.RemoveAll(dir) })
cafile := path.Join(dir, "cacert.pem")
err = ioutil.WriteFile(cafile, []byte(cacert), 0600)
require.NoError(t, err)
@ -602,8 +599,6 @@ func TestAutoConfig_updateTLSCertificatesInConfig(t *testing.T) {
// this is necessary but creation of the tlsutil.Configurator
// will error if it cannot load the CA certificate from disk
dir := testutil.TempDir(t, "auto-config-tls-certificate")
t.Cleanup(func() { os.RemoveAll(dir) })
cafile := path.Join(dir, "cacert.pem")
err = ioutil.WriteFile(cafile, []byte(cacert), 0600)
require.NoError(t, err)

View File

@ -1,239 +0,0 @@
package consul
import (
"context"
"fmt"
"net"
"strings"
"time"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-hclog"
"github.com/miekg/dns"
)
const (
dummyTrustDomain = "dummy.trustdomain"
retryJitterWindow = 30 * time.Second
)
func (c *Client) autoEncryptCSR(extraDNSSANs []string, extraIPSANs []net.IP) (string, string, error) {
// We don't provide the correct host here, because we don't know any
// better at this point. Apart from the domain, we would need the
// ClusterID, which we don't have. This is why we go with
// dummyTrustDomain the first time. Subsequent CSRs will have the
// correct TrustDomain.
id := &connect.SpiffeIDAgent{
Host: dummyTrustDomain,
Datacenter: c.config.Datacenter,
Agent: c.config.NodeName,
}
conf, err := c.config.CAConfig.GetCommonConfig()
if err != nil {
return "", "", err
}
if conf.PrivateKeyType == "" {
conf.PrivateKeyType = connect.DefaultPrivateKeyType
}
if conf.PrivateKeyBits == 0 {
conf.PrivateKeyBits = connect.DefaultPrivateKeyBits
}
// Create a new private key
pk, pkPEM, err := connect.GeneratePrivateKeyWithConfig(conf.PrivateKeyType, conf.PrivateKeyBits)
if err != nil {
return "", "", err
}
dnsNames := append([]string{"localhost"}, extraDNSSANs...)
ipAddresses := append([]net.IP{net.ParseIP("127.0.0.1"), net.ParseIP("::1")}, extraIPSANs...)
// Create a CSR.
//
// The Common Name includes the dummy trust domain for now but Server will
// override this when it is signed anyway so it's OK.
cn := connect.AgentCN(c.config.NodeName, dummyTrustDomain)
csr, err := connect.CreateCSR(id, cn, pk, dnsNames, ipAddresses)
if err != nil {
return "", "", err
}
return pkPEM, csr, nil
}
func (c *Client) RequestAutoEncryptCerts(ctx context.Context, servers []string, port int, token string, extraDNSSANs []string, extraIPSANs []net.IP) (*structs.SignedResponse, error) {
errFn := func(err error) (*structs.SignedResponse, error) {
return nil, err
}
// Check if we know about a server already through gossip. Depending on
// how the agent joined, there might already be one. Also in case this
// gets called because the cert expired.
server := c.routers.FindServer()
if server != nil {
servers = []string{server.Addr.String()}
}
if len(servers) == 0 {
return errFn(fmt.Errorf("No servers to request AutoEncrypt.Sign"))
}
pkPEM, csr, err := c.autoEncryptCSR(extraDNSSANs, extraIPSANs)
if err != nil {
return errFn(err)
}
// Prepare request and response so that it can be passed to
// RPCInsecure.
args := structs.CASignRequest{
WriteRequest: structs.WriteRequest{Token: token},
Datacenter: c.config.Datacenter,
CSR: csr,
}
var reply structs.SignedResponse
// Retry implementation modeled after https://github.com/hashicorp/consul/pull/5228.
// TLDR; there is a 30s window from which a random time is picked.
// Repeat until the call is successful.
attempts := 0
for {
select {
case <-ctx.Done():
return errFn(fmt.Errorf("aborting AutoEncrypt because interrupted: %w", ctx.Err()))
default:
}
// Translate host to net.TCPAddr to make life easier for
// RPCInsecure.
for _, s := range servers {
ips, err := resolveAddr(s, c.logger)
if err != nil {
c.logger.Warn("AutoEncrypt resolveAddr failed", "error", err)
continue
}
for _, ip := range ips {
addr := net.TCPAddr{IP: ip, Port: port}
if err = c.connPool.RPC(c.config.Datacenter, c.config.NodeName, &addr, "AutoEncrypt.Sign", &args, &reply); err == nil {
reply.IssuedCert.PrivateKeyPEM = pkPEM
return &reply, nil
} else {
c.logger.Warn("AutoEncrypt failed", "error", err)
}
}
}
attempts++
delay := lib.RandomStagger(retryJitterWindow)
interval := (time.Duration(attempts) * delay) + delay
c.logger.Warn("retrying AutoEncrypt", "retry_interval", interval)
select {
case <-time.After(interval):
continue
case <-ctx.Done():
return errFn(fmt.Errorf("aborting AutoEncrypt because interrupted: %w", ctx.Err()))
case <-c.shutdownCh:
return errFn(fmt.Errorf("aborting AutoEncrypt because shutting down"))
}
}
}
func missingPortError(host string, err error) bool {
return err != nil && err.Error() == fmt.Sprintf("address %s: missing port in address", host)
}
// resolveAddr is used to resolve the host into IPs and error.
func resolveAddr(rawHost string, logger hclog.Logger) ([]net.IP, error) {
host, _, err := net.SplitHostPort(rawHost)
if err != nil {
// In case we encounter this error, we proceed with the
// rawHost. This is fine since -start-join and -retry-join
// take only hosts anyways and this is an expected case.
if missingPortError(rawHost, err) {
host = rawHost
} else {
return nil, err
}
}
if ip := net.ParseIP(host); ip != nil {
return []net.IP{ip}, nil
}
// First try TCP so we have the best chance for the largest list of
// hosts to join. If this fails it's not fatal since this isn't a standard
// way to query DNS, and we have a fallback below.
if ips, err := tcpLookupIP(host, logger); err != nil {
logger.Debug("TCP-first lookup failed for host, falling back to UDP", "host", host, "error", err)
} else if len(ips) > 0 {
return ips, nil
}
// If TCP didn't yield anything then use the normal Go resolver which
// will try UDP, then might possibly try TCP again if the UDP response
// indicates it was truncated.
ips, err := net.LookupIP(host)
if err != nil {
return nil, err
}
return ips, nil
}
// tcpLookupIP is a helper to initiate a TCP-based DNS lookup for the given host.
// The built-in Go resolver will do a UDP lookup first, and will only use TCP if
// the response has the truncate bit set, which isn't common on DNS servers like
// Consul's. By doing the TCP lookup directly, we get the best chance for the
// largest list of hosts to join. Since joins are relatively rare events, it's ok
// to do this rather expensive operation.
func tcpLookupIP(host string, logger hclog.Logger) ([]net.IP, error) {
// Don't attempt any TCP lookups against non-fully qualified domain
// names, since those will likely come from the resolv.conf file.
if !strings.Contains(host, ".") {
return nil, nil
}
// Make sure the domain name is terminated with a dot (we know there's
// at least one character at this point).
dn := host
if dn[len(dn)-1] != '.' {
dn = dn + "."
}
// See if we can find a server to try.
cc, err := dns.ClientConfigFromFile("/etc/resolv.conf")
if err != nil {
return nil, err
}
if len(cc.Servers) > 0 {
// Do the lookup.
c := new(dns.Client)
c.Net = "tcp"
msg := new(dns.Msg)
msg.SetQuestion(dn, dns.TypeANY)
in, _, err := c.Exchange(msg, cc.Servers[0])
if err != nil {
return nil, err
}
// Handle any IPs we get back that we can attempt to join.
var ips []net.IP
for _, r := range in.Answer {
switch rr := r.(type) {
case (*dns.A):
ips = append(ips, rr.A)
case (*dns.AAAA):
ips = append(ips, rr.AAAA)
case (*dns.CNAME):
logger.Debug("Ignoring CNAME RR in TCP-first answer for host", "host", host)
}
}
return ips, nil
}
return nil, nil
}

View File

@ -1,205 +0,0 @@
package consul
import (
"context"
"crypto/x509"
"crypto/x509/pkix"
"encoding/asn1"
"net"
"net/url"
"os"
"testing"
"time"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/go-hclog"
"github.com/stretchr/testify/require"
)
func TestAutoEncrypt_resolveAddr(t *testing.T) {
type args struct {
rawHost string
logger hclog.Logger
}
logger := testutil.Logger(t)
tests := []struct {
name string
args args
ips []net.IP
wantErr bool
}{
{
name: "host without port",
args: args{
"127.0.0.1",
logger,
},
ips: []net.IP{net.IPv4(127, 0, 0, 1)},
wantErr: false,
},
{
name: "host with port",
args: args{
"127.0.0.1:1234",
logger,
},
ips: []net.IP{net.IPv4(127, 0, 0, 1)},
wantErr: false,
},
{
name: "host with broken port",
args: args{
"127.0.0.1:xyz",
logger,
},
ips: []net.IP{net.IPv4(127, 0, 0, 1)},
wantErr: false,
},
{
name: "not an address",
args: args{
"abc",
logger,
},
ips: nil,
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ips, err := resolveAddr(tt.args.rawHost, tt.args.logger)
if (err != nil) != tt.wantErr {
t.Errorf("resolveAddr error: %v, wantErr: %v", err, tt.wantErr)
return
}
require.Equal(t, tt.ips, ips)
})
}
}
func TestAutoEncrypt_missingPortError(t *testing.T) {
host := "127.0.0.1"
_, _, err := net.SplitHostPort(host)
require.True(t, missingPortError(host, err))
host = "127.0.0.1:1234"
_, _, err = net.SplitHostPort(host)
require.False(t, missingPortError(host, err))
}
func TestAutoEncrypt_RequestAutoEncryptCerts(t *testing.T) {
dir1, c1 := testClient(t)
defer os.RemoveAll(dir1)
defer c1.Shutdown()
servers := []string{"localhost"}
port := 8301
token := ""
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(75*time.Millisecond))
defer cancel()
doneCh := make(chan struct{})
var err error
go func() {
_, err = c1.RequestAutoEncryptCerts(ctx, servers, port, token, nil, nil)
close(doneCh)
}()
select {
case <-doneCh:
// since there are no servers at this port, we shouldn't be
// done and this should be an error of some sorts that happened
// in the setup phase before entering the for loop in
// RequestAutoEncryptCerts.
require.NoError(t, err)
case <-ctx.Done():
// this is the happy case since auto encrypt is in its loop to
// try to request certs.
}
}
func TestAutoEncrypt_autoEncryptCSR(t *testing.T) {
type testCase struct {
conf *Config
extraDNSSANs []string
extraIPSANs []net.IP
err string
// to validate the csr
expectedSubject pkix.Name
expectedSigAlg x509.SignatureAlgorithm
expectedPubAlg x509.PublicKeyAlgorithm
expectedDNSNames []string
expectedIPs []net.IP
expectedURIs []*url.URL
}
cases := map[string]testCase{
"sans": {
conf: &Config{
Datacenter: "dc1",
NodeName: "test-node",
CAConfig: &structs.CAConfiguration{},
},
extraDNSSANs: []string{"foo.local", "bar.local"},
extraIPSANs: []net.IP{net.IPv4(198, 18, 0, 1), net.IPv4(198, 18, 0, 2)},
expectedSubject: pkix.Name{
CommonName: connect.AgentCN("test-node", dummyTrustDomain),
Names: []pkix.AttributeTypeAndValue{
{
// 2,5,4,3 is the CommonName type ASN1 identifier
Type: asn1.ObjectIdentifier{2, 5, 4, 3},
Value: "testnode.agnt.dummy.tr.consul",
},
},
},
expectedSigAlg: x509.ECDSAWithSHA256,
expectedPubAlg: x509.ECDSA,
expectedDNSNames: []string{
"localhost",
"foo.local",
"bar.local",
},
expectedIPs: []net.IP{
{127, 0, 0, 1},
net.ParseIP("::1"),
{198, 18, 0, 1},
{198, 18, 0, 2},
},
expectedURIs: []*url.URL{
{
Scheme: "spiffe",
Host: dummyTrustDomain,
Path: "/agent/client/dc/dc1/id/test-node",
},
},
},
}
for name, tcase := range cases {
t.Run(name, func(t *testing.T) {
client := Client{config: tcase.conf}
_, csr, err := client.autoEncryptCSR(tcase.extraDNSSANs, tcase.extraIPSANs)
if tcase.err == "" {
require.NoError(t, err)
request, err := connect.ParseCSR(csr)
require.NoError(t, err)
require.NotNil(t, request)
require.Equal(t, tcase.expectedSubject, request.Subject)
require.Equal(t, tcase.expectedSigAlg, request.SignatureAlgorithm)
require.Equal(t, tcase.expectedPubAlg, request.PublicKeyAlgorithm)
require.Equal(t, tcase.expectedDNSNames, request.DNSNames)
require.Equal(t, tcase.expectedIPs, request.IPAddresses)
require.Equal(t, tcase.expectedURIs, request.URIs)
} else {
require.Error(t, err)
require.Empty(t, csr)
}
})
}
}

View File

@ -401,7 +401,6 @@ func TestAutopilot_PromoteNonVoter(t *testing.T) {
func TestAutopilot_MinQuorum(t *testing.T) {
dc := "dc1"
closeMap := make(map[string]chan struct{})
conf := func(c *Config) {
c.Datacenter = dc
c.Bootstrap = false
@ -409,13 +408,6 @@ func TestAutopilot_MinQuorum(t *testing.T) {
c.AutopilotConfig.MinQuorum = 3
c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(2)
c.AutopilotInterval = 100 * time.Millisecond
//Let us know when a server is actually gone
ch := make(chan struct{})
c.NotifyShutdown = func() {
t.Logf("%v is shutdown", c.NodeName)
close(ch)
}
closeMap[c.NodeName] = ch
}
dir1, s1 := testServerWithConfig(t, conf)
defer os.RemoveAll(dir1)
@ -463,8 +455,7 @@ func TestAutopilot_MinQuorum(t *testing.T) {
if dead == nil {
t.Fatalf("no members set")
}
dead.Shutdown()
<-closeMap[dead.config.NodeName]
require.NoError(t, dead.Shutdown())
retry.Run(t, func(r *retry.R) {
leader := findStatus(true)
if leader == nil {
@ -480,10 +471,7 @@ func TestAutopilot_MinQuorum(t *testing.T) {
delete(servers, dead.config.NodeName)
//Autopilot should not take this one into left
dead = findStatus(false)
if err := dead.Shutdown(); err != nil {
t.Fatalf("could not shut down %s, error %v", dead.config.NodeName, err)
}
<-closeMap[dead.config.NodeName]
require.NoError(t, dead.Shutdown())
retry.Run(t, func(r *retry.R) {
leader := findStatus(true)
@ -496,5 +484,4 @@ func TestAutopilot_MinQuorum(t *testing.T) {
}
}
})
}

View File

@ -3,7 +3,6 @@ package consul
import (
"fmt"
"io"
"os"
"strconv"
"sync"
"sync/atomic"
@ -16,6 +15,7 @@ import (
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/serf/serf"
"golang.org/x/time/rate"
@ -60,9 +60,9 @@ type Client struct {
// Connection pool to consul servers
connPool *pool.ConnPool
// routers is responsible for the selection and maintenance of
// router is responsible for the selection and maintenance of
// Consul servers this agent uses for RPC requests
routers *router.Manager
router *router.Router
// rpcLimiter is used to rate limit the total number of RPCs initiated
// from an agent.
@ -89,58 +89,31 @@ type Client struct {
tlsConfigurator *tlsutil.Configurator
}
// NewClient is used to construct a new Consul client from the configuration,
// potentially returning an error.
// NewClient only used to help setting up a client for testing. Normal code
// exercises NewClientLogger.
func NewClient(config *Config) (*Client, error) {
c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil)
if err != nil {
return nil, err
}
return NewClientLogger(config, nil, c)
}
func NewClientWithOptions(config *Config, options ...ConsulOption) (*Client, error) {
// NewClient creates and returns a Client
func NewClient(config *Config, options ...ConsulOption) (*Client, error) {
flat := flattenConsulOptions(options)
logger := flat.logger
tlsConfigurator := flat.tlsConfigurator
connPool := flat.connPool
// Check the protocol version
if err := config.CheckProtocolVersion(); err != nil {
return nil, err
}
// Check for a data directory!
if config.DataDir == "" {
return nil, fmt.Errorf("Config must provide a DataDir")
}
// Sanity check the ACLs
if err := config.CheckACL(); err != nil {
return nil, err
}
// Ensure we have a log output
if config.LogOutput == nil {
config.LogOutput = os.Stderr
}
// Create a logger
if logger == nil {
logger = hclog.NewInterceptLogger(&hclog.LoggerOptions{
Level: hclog.Debug,
Output: config.LogOutput,
})
if flat.logger == nil {
return nil, fmt.Errorf("logger is required")
}
if connPool == nil {
connPool = &pool.ConnPool{
Server: false,
SrcAddr: config.RPCSrcAddr,
LogOutput: config.LogOutput,
Logger: flat.logger.StandardLogger(&hclog.StandardLoggerOptions{InferLevels: true}),
MaxTime: clientRPCConnMaxIdle,
MaxStreams: clientMaxStreams,
TLSConfigurator: tlsConfigurator,
@ -148,12 +121,14 @@ func NewClientWithOptions(config *Config, options ...ConsulOption) (*Client, err
}
}
logger := flat.logger.NamedIntercept(logging.ConsulClient)
// Create client
c := &Client{
config: config,
connPool: connPool,
eventCh: make(chan serf.Event, serfEventBacklog),
logger: logger.NamedIntercept(logging.ConsulClient),
logger: logger,
shutdownCh: make(chan struct{}),
tlsConfigurator: tlsConfigurator,
}
@ -188,15 +163,22 @@ func NewClientWithOptions(config *Config, options ...ConsulOption) (*Client, err
return nil, fmt.Errorf("Failed to start lan serf: %v", err)
}
// Start maintenance task for servers
c.routers = router.New(c.logger, c.shutdownCh, c.serf, c.connPool, "")
go c.routers.Start()
rpcRouter := flat.router
if rpcRouter == nil {
rpcRouter = router.NewRouter(logger, config.Datacenter, fmt.Sprintf("%s.%s", config.NodeName, config.Datacenter))
}
if err := rpcRouter.AddArea(types.AreaLAN, c.serf, c.connPool); err != nil {
c.Shutdown()
return nil, fmt.Errorf("Failed to add LAN area to the RPC router: %w", err)
}
c.router = rpcRouter
// Start LAN event handlers after the router is complete since the event
// handlers depend on the router and the router depends on Serf.
go c.lanEventHandler()
// This needs to happen after initializing c.routers to prevent a race
// This needs to happen after initializing c.router to prevent a race
// condition where the router manager is used when the pointer is nil
if c.acls.ACLsEnabled() {
go c.monitorACLMode()
@ -210,10 +192,6 @@ func NewClientWithOptions(config *Config, options ...ConsulOption) (*Client, err
return c, nil
}
func NewClientLogger(config *Config, logger hclog.InterceptLogger, tlsConfigurator *tlsutil.Configurator) (*Client, error) {
return NewClientWithOptions(config, WithLogger(logger), WithTLSConfigurator(tlsConfigurator))
}
// Shutdown is used to shutdown the client
func (c *Client) Shutdown() error {
c.logger.Info("shutting down client")
@ -308,7 +286,7 @@ func (c *Client) RPC(method string, args interface{}, reply interface{}) error {
firstCheck := time.Now()
TRY:
server := c.routers.FindServer()
manager, server := c.router.FindLANRoute()
if server == nil {
return structs.ErrNoServers
}
@ -333,7 +311,7 @@ TRY:
"error", rpcErr,
)
metrics.IncrCounterWithLabels([]string{"client", "rpc", "failed"}, 1, []metrics.Label{{Name: "server", Value: server.Name}})
c.routers.NotifyFailedServer(server)
manager.NotifyFailedServer(server)
if retry := canRetry(args, rpcErr); !retry {
return rpcErr
}
@ -355,7 +333,7 @@ TRY:
// operation.
func (c *Client) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer,
replyFn structs.SnapshotReplyFn) error {
server := c.routers.FindServer()
manager, server := c.router.FindLANRoute()
if server == nil {
return structs.ErrNoServers
}
@ -371,6 +349,7 @@ func (c *Client) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io
var reply structs.SnapshotResponse
snap, err := SnapshotRPC(c.connPool, c.config.Datacenter, server.ShortName, server.Addr, args, in, &reply)
if err != nil {
manager.NotifyFailedServer(server)
return err
}
defer func() {
@ -399,7 +378,7 @@ func (c *Client) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io
// Stats is used to return statistics for debugging and insight
// for various sub-systems
func (c *Client) Stats() map[string]map[string]string {
numServers := c.routers.NumServers()
numServers := c.router.GetLANManager().NumServers()
toString := func(v uint64) string {
return strconv.FormatUint(v, 10)

View File

@ -9,6 +9,7 @@ import (
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/serf/serf"
)
@ -115,7 +116,7 @@ func (c *Client) nodeJoin(me serf.MemberEvent) {
continue
}
c.logger.Info("adding server", "server", parts)
c.routers.AddServer(parts)
c.router.AddServer(types.AreaLAN, parts)
// Trigger the callback
if c.config.ServerUp != nil {
@ -139,7 +140,7 @@ func (c *Client) nodeUpdate(me serf.MemberEvent) {
continue
}
c.logger.Info("updating server", "server", parts.String())
c.routers.AddServer(parts)
c.router.AddServer(types.AreaLAN, parts)
}
}
@ -151,7 +152,7 @@ func (c *Client) nodeFail(me serf.MemberEvent) {
continue
}
c.logger.Info("removing server", "server", parts.String())
c.routers.RemoveServer(parts)
c.router.RemoveServer(types.AreaLAN, parts)
}
}

View File

@ -26,15 +26,9 @@ func testClientConfig(t *testing.T) (string, *Config) {
config := DefaultConfig()
ports := freeport.MustTake(2)
returnPortsFn := func() {
// The method of plumbing this into the client shutdown hook doesn't
// cover all exit points, so we insulate this against multiple
// invocations and then it's safe to call it a bunch of times.
t.Cleanup(func() {
freeport.Return(ports)
config.NotifyShutdown = nil // self-erasing
}
config.NotifyShutdown = returnPortsFn
})
config.Datacenter = "dc1"
config.DataDir = dir
@ -48,8 +42,6 @@ func testClientConfig(t *testing.T) (string, *Config) {
config.SerfLANConfig.MemberlistConfig.ProbeTimeout = 200 * time.Millisecond
config.SerfLANConfig.MemberlistConfig.ProbeInterval = time.Second
config.SerfLANConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond
config.LogOutput = testutil.NewLogBuffer(t)
return dir, config
}
@ -72,15 +64,10 @@ func testClientWithConfigWithErr(t *testing.T, cb func(c *Config)) (string, *Cli
if cb != nil {
cb(config)
}
w := config.LogOutput
if w == nil {
w = os.Stderr
}
logger := hclog.NewInterceptLogger(&hclog.LoggerOptions{
Name: config.NodeName,
Level: hclog.Debug,
Output: w,
Output: testutil.NewLogBuffer(t),
})
tlsConf, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), logger)
@ -88,10 +75,7 @@ func testClientWithConfigWithErr(t *testing.T, cb func(c *Config)) (string, *Cli
t.Fatalf("err: %v", err)
}
client, err := NewClientLogger(config, logger, tlsConf)
if err != nil {
config.NotifyShutdown()
}
client, err := NewClient(config, WithLogger(logger), WithTLSConfigurator(tlsConf))
return dir, client, err
}
@ -128,7 +112,7 @@ func TestClient_JoinLAN(t *testing.T) {
joinLAN(t, c1, s1)
testrpc.WaitForTestAgent(t, c1.RPC, "dc1")
retry.Run(t, func(r *retry.R) {
if got, want := c1.routers.NumServers(), 1; got != want {
if got, want := c1.router.GetLANManager().NumServers(), 1; got != want {
r.Fatalf("got %d servers want %d", got, want)
}
if got, want := len(s1.LANMembers()), 2; got != want {
@ -166,7 +150,7 @@ func TestClient_LANReap(t *testing.T) {
// Check the router has both
retry.Run(t, func(r *retry.R) {
server := c1.routers.FindServer()
server := c1.router.FindLANServer()
require.NotNil(t, server)
require.Equal(t, s1.config.NodeName, server.Name)
})
@ -176,7 +160,7 @@ func TestClient_LANReap(t *testing.T) {
retry.Run(t, func(r *retry.R) {
require.Len(r, c1.LANMembers(), 1)
server := c1.routers.FindServer()
server := c1.router.FindLANServer()
require.Nil(t, server)
})
}
@ -406,7 +390,7 @@ func TestClient_RPC_ConsulServerPing(t *testing.T) {
}
// Sleep to allow Serf to sync, shuffle, and let the shuffle complete
c.routers.ResetRebalanceTimer()
c.router.GetLANManager().ResetRebalanceTimer()
time.Sleep(time.Second)
if len(c.LANMembers()) != numServers+numClients {
@ -422,7 +406,7 @@ func TestClient_RPC_ConsulServerPing(t *testing.T) {
var pingCount int
for range servers {
time.Sleep(200 * time.Millisecond)
s := c.routers.FindServer()
m, s := c.router.FindLANRoute()
ok, err := c.connPool.Ping(s.Datacenter, s.ShortName, s.Addr)
if !ok {
t.Errorf("Unable to ping server %v: %s", s.String(), err)
@ -431,7 +415,7 @@ func TestClient_RPC_ConsulServerPing(t *testing.T) {
// Artificially fail the server in order to rotate the server
// list
c.routers.NotifyFailedServer(s)
m.NotifyFailedServer(s)
}
if pingCount != numServers {
@ -441,27 +425,20 @@ func TestClient_RPC_ConsulServerPing(t *testing.T) {
func TestClient_RPC_TLS(t *testing.T) {
t.Parallel()
dir1, conf1 := testServerConfig(t)
_, conf1 := testServerConfig(t)
conf1.VerifyIncoming = true
conf1.VerifyOutgoing = true
configureTLS(conf1)
s1, err := NewServer(conf1)
s1, err := newServer(t, conf1)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, conf2 := testClientConfig(t)
defer conf2.NotifyShutdown()
_, conf2 := testClientConfig(t)
conf2.VerifyOutgoing = true
configureTLS(conf2)
c1, err := NewClient(conf2)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir2)
defer c1.Shutdown()
c1 := newClient(t, conf2)
// Try an RPC
var out struct{}
@ -486,27 +463,38 @@ func TestClient_RPC_TLS(t *testing.T) {
})
}
func newClient(t *testing.T, config *Config) *Client {
t.Helper()
c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil)
require.NoError(t, err, "failed to create tls configuration")
logger := hclog.NewInterceptLogger(&hclog.LoggerOptions{
Level: hclog.Debug,
Output: testutil.NewLogBuffer(t),
})
client, err := NewClient(config, WithLogger(logger), WithTLSConfigurator(c))
require.NoError(t, err, "failed to create client")
t.Cleanup(func() {
client.Shutdown()
})
return client
}
func TestClient_RPC_RateLimit(t *testing.T) {
t.Parallel()
dir1, conf1 := testServerConfig(t)
s1, err := NewServer(conf1)
_, conf1 := testServerConfig(t)
s1, err := newServer(t, conf1)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
dir2, conf2 := testClientConfig(t)
defer conf2.NotifyShutdown()
_, conf2 := testClientConfig(t)
conf2.RPCRate = 2
conf2.RPCMaxBurst = 2
c1, err := NewClient(conf2)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir2)
defer c1.Shutdown()
c1 := newClient(t, conf2)
joinLAN(t, c1, s1)
retry.Run(t, func(r *retry.R) {
@ -536,7 +524,7 @@ func TestClient_SnapshotRPC(t *testing.T) {
// Wait until we've got a healthy server.
retry.Run(t, func(r *retry.R) {
if got, want := c1.routers.NumServers(), 1; got != want {
if got, want := c1.router.GetLANManager().NumServers(), 1; got != want {
r.Fatalf("got %d servers want %d", got, want)
}
})
@ -560,25 +548,18 @@ func TestClient_SnapshotRPC(t *testing.T) {
func TestClient_SnapshotRPC_RateLimit(t *testing.T) {
t.Parallel()
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
_, s1 := testServer(t)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
dir2, conf1 := testClientConfig(t)
defer conf1.NotifyShutdown()
_, conf1 := testClientConfig(t)
conf1.RPCRate = 2
conf1.RPCMaxBurst = 2
c1, err := NewClient(conf1)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir2)
defer c1.Shutdown()
c1 := newClient(t, conf1)
joinLAN(t, c1, s1)
retry.Run(t, func(r *retry.R) {
if got, want := c1.routers.NumServers(), 1; got != want {
if got, want := c1.router.GetLANManager().NumServers(), 1; got != want {
r.Fatalf("got %d servers want %d", got, want)
}
})
@ -597,27 +578,20 @@ func TestClient_SnapshotRPC_RateLimit(t *testing.T) {
func TestClient_SnapshotRPC_TLS(t *testing.T) {
t.Parallel()
dir1, conf1 := testServerConfig(t)
_, conf1 := testServerConfig(t)
conf1.VerifyIncoming = true
conf1.VerifyOutgoing = true
configureTLS(conf1)
s1, err := NewServer(conf1)
s1, err := newServer(t, conf1)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, conf2 := testClientConfig(t)
defer conf2.NotifyShutdown()
_, conf2 := testClientConfig(t)
conf2.VerifyOutgoing = true
configureTLS(conf2)
c1, err := NewClient(conf2)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir2)
defer c1.Shutdown()
c1 := newClient(t, conf2)
// Wait for the leader
testrpc.WaitForLeader(t, s1.RPC, "dc1")
@ -633,7 +607,7 @@ func TestClient_SnapshotRPC_TLS(t *testing.T) {
}
// Wait until we've got a healthy server.
if got, want := c1.routers.NumServers(), 1; got != want {
if got, want := c1.router.GetLANManager().NumServers(), 1; got != want {
r.Fatalf("got %d servers want %d", got, want)
}
})

View File

@ -2,7 +2,6 @@ package consul
import (
"fmt"
"io"
"net"
"os"
"time"
@ -120,9 +119,6 @@ type Config struct {
// configured at this point.
NotifyListen func()
// NotifyShutdown is called after Server is completely Shutdown.
NotifyShutdown func()
// RPCAddr is the RPC address used by Consul. This should be reachable
// by the WAN and LAN
RPCAddr *net.TCPAddr
@ -161,13 +157,6 @@ type Config struct {
// leader election.
ReconcileInterval time.Duration
// LogLevel is the level of the logs to write. Defaults to "INFO".
LogLevel string
// LogOutput is the location to write logs to. If this is not set,
// logs will go to stderr.
LogOutput io.Writer
// ProtocolVersion is the protocol version to speak. This must be between
// ProtocolVersionMin and ProtocolVersionMax.
ProtocolVersion uint8
@ -454,6 +443,10 @@ type Config struct {
// dead servers.
AutopilotInterval time.Duration
// MetricsReportingInterval is the frequency with which the server will
// report usage metrics to the configured go-metrics Sinks.
MetricsReportingInterval time.Duration
// ConnectEnabled is whether to enable Connect features such as the CA.
ConnectEnabled bool
@ -477,6 +470,9 @@ type Config struct {
// AutoEncrypt.Sign requests.
AutoEncryptAllowTLS bool
// TODO: godoc, set this value from Agent
EnableGRPCServer bool
// Embedded Consul Enterprise specific configuration
*EnterpriseConfig
}
@ -600,10 +596,15 @@ func DefaultConfig() *Config {
},
},
// Stay under the 10 second aggregation interval of
// go-metrics. This ensures we always report the
// usage metrics in each cycle.
MetricsReportingInterval: 9 * time.Second,
ServerHealthInterval: 2 * time.Second,
AutopilotInterval: 10 * time.Second,
DefaultQueryTime: 300 * time.Second,
MaxQueryTime: 600 * time.Second,
EnterpriseConfig: DefaultEnterpriseConfig(),
}

View File

@ -79,7 +79,8 @@ func (s *Server) reconcileLocalConfig(ctx context.Context, configs []structs.Con
if err != nil {
return false, fmt.Errorf("Failed to apply config %s: %v", op, err)
}
if respErr, ok := resp.(error); ok && err != nil {
if respErr, ok := resp.(error); ok {
return false, fmt.Errorf("Failed to apply config %s: %v", op, respErr)
}

View File

@ -10,6 +10,7 @@ import (
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
)
@ -161,23 +162,32 @@ func (c *Coordinate) Update(args *structs.CoordinateUpdateRequest, reply *struct
// ListDatacenters returns the list of datacenters and their respective nodes
// and the raw coordinates of those nodes (if no coordinates are available for
// any of the nodes, the node list may be empty).
// any of the nodes, the node list may be empty). This endpoint will not return
// information about the LAN network area.
func (c *Coordinate) ListDatacenters(args *struct{}, reply *[]structs.DatacenterMap) error {
maps, err := c.srv.router.GetDatacenterMaps()
if err != nil {
return err
}
var out []structs.DatacenterMap
// Strip the datacenter suffixes from all the node names.
for i := range maps {
suffix := fmt.Sprintf(".%s", maps[i].Datacenter)
for j := range maps[i].Coordinates {
node := maps[i].Coordinates[j].Node
maps[i].Coordinates[j].Node = strings.TrimSuffix(node, suffix)
}
for _, dcMap := range maps {
if dcMap.AreaID == types.AreaLAN {
continue
}
*reply = maps
suffix := fmt.Sprintf(".%s", dcMap.Datacenter)
for j := range dcMap.Coordinates {
node := dcMap.Coordinates[j].Node
dcMap.Coordinates[j].Node = strings.TrimSuffix(node, suffix)
}
out = append(out, dcMap)
}
*reply = out
return nil
}

View File

@ -707,6 +707,7 @@ func (c *compiler) getSplitterNode(sid structs.ServiceID) (*structs.DiscoveryGra
// sanely if there is some sort of graph loop below.
c.recordNode(splitNode)
var hasLB bool
for _, split := range splitter.Splits {
compiledSplit := &structs.DiscoverySplit{
Weight: split.Weight,
@ -739,6 +740,17 @@ func (c *compiler) getSplitterNode(sid structs.ServiceID) (*structs.DiscoveryGra
return nil, err
}
compiledSplit.NextNode = node.MapKey()
// There exists the possibility that a splitter may split between two distinct service names
// with distinct hash-based load balancer configs specified in their service resolvers.
// We cannot apply multiple hash policies to a splitter node's route action.
// Therefore, we attach the first hash-based load balancer config we encounter.
if !hasLB {
if lb := node.LoadBalancer; lb != nil && lb.IsHashBased() {
splitNode.LoadBalancer = node.LoadBalancer
hasLB = true
}
}
}
c.usesAdvancedRoutingFeatures = true
@ -851,6 +863,7 @@ RESOLVE_AGAIN:
Target: target.ID,
ConnectTimeout: connectTimeout,
},
LoadBalancer: resolver.LoadBalancer,
}
target.Subset = resolver.Subsets[target.ServiceSubset]
@ -1009,10 +1022,5 @@ func defaultIfEmpty(val, defaultVal string) string {
}
func enableAdvancedRoutingForProtocol(protocol string) bool {
switch protocol {
case "http", "http2", "grpc":
return true
default:
return false
}
return structs.IsProtocolHTTPLike(protocol)
}

View File

@ -51,6 +51,8 @@ func TestCompile(t *testing.T) {
"default resolver with external sni": testcase_DefaultResolver_ExternalSNI(),
"resolver with no entries and inferring defaults": testcase_DefaultResolver(),
"default resolver with proxy defaults": testcase_DefaultResolver_WithProxyDefaults(),
"loadbalancer splitter and resolver": testcase_LBSplitterAndResolver(),
"loadbalancer resolver": testcase_LBResolver(),
"service redirect to service with default resolver is not a default chain": testcase_RedirectToDefaultResolverIsNotDefaultChain(),
"all the bells and whistles": testcase_AllBellsAndWhistles(),
@ -1760,6 +1762,17 @@ func testcase_AllBellsAndWhistles() compileTestCase {
"prod": {Filter: "ServiceMeta.env == prod"},
"qa": {Filter: "ServiceMeta.env == qa"},
},
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 100,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
&structs.ServiceResolverConfigEntry{
Kind: "service-resolver",
@ -1821,6 +1834,17 @@ func testcase_AllBellsAndWhistles() compileTestCase {
NextNode: "resolver:v3.main.default.dc1",
},
},
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 100,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
"resolver:prod.redirected.default.dc1": {
Type: structs.DiscoveryGraphNodeTypeResolver,
@ -1829,6 +1853,17 @@ func testcase_AllBellsAndWhistles() compileTestCase {
ConnectTimeout: 5 * time.Second,
Target: "prod.redirected.default.dc1",
},
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 100,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
"resolver:v1.main.default.dc1": {
Type: structs.DiscoveryGraphNodeTypeResolver,
@ -2219,6 +2254,231 @@ func testcase_CircularSplit() compileTestCase {
}
}
func testcase_LBSplitterAndResolver() compileTestCase {
entries := newEntries()
setServiceProtocol(entries, "foo", "http")
setServiceProtocol(entries, "bar", "http")
setServiceProtocol(entries, "baz", "http")
entries.AddSplitters(
&structs.ServiceSplitterConfigEntry{
Kind: "service-splitter",
Name: "main",
Splits: []structs.ServiceSplit{
{Weight: 60, Service: "foo"},
{Weight: 20, Service: "bar"},
{Weight: 20, Service: "baz"},
},
},
)
entries.AddResolvers(
&structs.ServiceResolverConfigEntry{
Kind: "service-resolver",
Name: "foo",
LoadBalancer: &structs.LoadBalancer{
Policy: "least_request",
LeastRequestConfig: &structs.LeastRequestConfig{
ChoiceCount: 3,
},
},
},
&structs.ServiceResolverConfigEntry{
Kind: "service-resolver",
Name: "bar",
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 101,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
&structs.ServiceResolverConfigEntry{
Kind: "service-resolver",
Name: "baz",
LoadBalancer: &structs.LoadBalancer{
Policy: "maglev",
HashPolicies: []structs.HashPolicy{
{
Field: "cookie",
FieldValue: "chocolate-chip",
CookieConfig: &structs.CookieConfig{
TTL: 2 * time.Minute,
Path: "/bowl",
},
Terminal: true,
},
},
},
},
)
expect := &structs.CompiledDiscoveryChain{
Protocol: "http",
StartNode: "splitter:main.default",
Nodes: map[string]*structs.DiscoveryGraphNode{
"splitter:main.default": {
Type: structs.DiscoveryGraphNodeTypeSplitter,
Name: "main.default",
Splits: []*structs.DiscoverySplit{
{
Weight: 60,
NextNode: "resolver:foo.default.dc1",
},
{
Weight: 20,
NextNode: "resolver:bar.default.dc1",
},
{
Weight: 20,
NextNode: "resolver:baz.default.dc1",
},
},
// The LB config from bar is attached because splitters only care about hash-based policies,
// and it's the config from bar not baz because we pick the first one we encounter in the Splits.
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 101,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
// Each service's LB config is passed down from the service-resolver to the resolver node
"resolver:foo.default.dc1": {
Type: structs.DiscoveryGraphNodeTypeResolver,
Name: "foo.default.dc1",
Resolver: &structs.DiscoveryResolver{
Default: false,
ConnectTimeout: 5 * time.Second,
Target: "foo.default.dc1",
},
LoadBalancer: &structs.LoadBalancer{
Policy: "least_request",
LeastRequestConfig: &structs.LeastRequestConfig{
ChoiceCount: 3,
},
},
},
"resolver:bar.default.dc1": {
Type: structs.DiscoveryGraphNodeTypeResolver,
Name: "bar.default.dc1",
Resolver: &structs.DiscoveryResolver{
Default: false,
ConnectTimeout: 5 * time.Second,
Target: "bar.default.dc1",
},
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 101,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
"resolver:baz.default.dc1": {
Type: structs.DiscoveryGraphNodeTypeResolver,
Name: "baz.default.dc1",
Resolver: &structs.DiscoveryResolver{
Default: false,
ConnectTimeout: 5 * time.Second,
Target: "baz.default.dc1",
},
LoadBalancer: &structs.LoadBalancer{
Policy: "maglev",
HashPolicies: []structs.HashPolicy{
{
Field: "cookie",
FieldValue: "chocolate-chip",
CookieConfig: &structs.CookieConfig{
TTL: 2 * time.Minute,
Path: "/bowl",
},
Terminal: true,
},
},
},
},
},
Targets: map[string]*structs.DiscoveryTarget{
"foo.default.dc1": newTarget("foo", "", "default", "dc1", nil),
"bar.default.dc1": newTarget("bar", "", "default", "dc1", nil),
"baz.default.dc1": newTarget("baz", "", "default", "dc1", nil),
},
}
return compileTestCase{entries: entries, expect: expect}
}
// ensure chain with LB cfg in resolver isn't a default chain (!IsDefault)
func testcase_LBResolver() compileTestCase {
entries := newEntries()
setServiceProtocol(entries, "main", "http")
entries.AddResolvers(
&structs.ServiceResolverConfigEntry{
Kind: "service-resolver",
Name: "main",
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 101,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
)
expect := &structs.CompiledDiscoveryChain{
Protocol: "http",
StartNode: "resolver:main.default.dc1",
Nodes: map[string]*structs.DiscoveryGraphNode{
"resolver:main.default.dc1": {
Type: structs.DiscoveryGraphNodeTypeResolver,
Name: "main.default.dc1",
Resolver: &structs.DiscoveryResolver{
Default: false,
ConnectTimeout: 5 * time.Second,
Target: "main.default.dc1",
},
LoadBalancer: &structs.LoadBalancer{
Policy: "ring_hash",
RingHashConfig: &structs.RingHashConfig{
MaximumRingSize: 101,
},
HashPolicies: []structs.HashPolicy{
{
SourceIP: true,
},
},
},
},
},
Targets: map[string]*structs.DiscoveryTarget{
"main.default.dc1": newTarget("main", "", "default", "dc1", nil),
},
}
return compileTestCase{entries: entries, expect: expect}
}
func newSimpleRoute(name string, muts ...func(*structs.ServiceRoute)) structs.ServiceRoute {
r := structs.ServiceRoute{
Match: &structs.ServiceRouteMatch{

View File

@ -158,7 +158,8 @@ func (r *FederationStateReplicator) PerformDeletions(ctx context.Context, deleti
if err != nil {
return false, err
}
if respErr, ok := resp.(error); ok && err != nil {
if respErr, ok := resp.(error); ok {
return false, respErr
}
@ -202,7 +203,8 @@ func (r *FederationStateReplicator) PerformUpdates(ctx context.Context, updatesR
if err != nil {
return false, err
}
if respErr, ok := resp.(error); ok && err != nil {
if respErr, ok := resp.(error); ok {
return false, respErr
}

View File

@ -99,6 +99,7 @@ func (s *snapshot) persistNodes(sink raft.SnapshotSink,
Address: n.Address,
TaggedAddresses: n.TaggedAddresses,
NodeMeta: n.Meta,
RaftIndex: n.RaftIndex,
}
// Register the node itself

Some files were not shown because too many files have changed in this diff Show More