consul/agent/operator_endpoint.go
hashicorp-copywrite[bot] 5fb9df1640
[COMPLIANCE] License changes (#18443)
* Adding explicit MPL license for sub-package

This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository.

* Adding explicit MPL license for sub-package

This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository.

* Updating the license from MPL to Business Source License

Going forward, this project will be licensed under the Business Source License v1.1. Please see our blog post for more details at <Blog URL>, FAQ at www.hashicorp.com/licensing-faq, and details of the license at www.hashicorp.com/bsl.

* add missing license headers

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

* Update copyright file headers to BUSL-1.1

---------

Co-authored-by: hashicorp-copywrite[bot] <110428419+hashicorp-copywrite[bot]@users.noreply.github.com>
2023-08-11 09:12:13 -04:00

457 lines
14 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package agent
import (
"fmt"
"net/http"
"strconv"
"time"
"github.com/armon/go-metrics"
external "github.com/hashicorp/consul/agent/grpc-external"
"github.com/hashicorp/consul/proto/private/pboperator"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/raft"
autopilot "github.com/hashicorp/raft-autopilot"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api"
)
// OperatorRaftConfiguration is used to inspect the current Raft configuration.
// This supports the stale query mode in case the cluster doesn't have a leader.
func (s *HTTPHandlers) OperatorRaftConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
var args structs.DCSpecificRequest
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
return nil, nil
}
var reply structs.RaftConfigurationResponse
if err := s.agent.RPC(req.Context(), "Operator.RaftGetConfiguration", &args, &reply); err != nil {
return nil, err
}
return reply, nil
}
// OperatorRaftTransferLeader is used to transfer raft cluster leadership to another node
func (s *HTTPHandlers) OperatorRaftTransferLeader(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
var entMeta acl.EnterpriseMeta
if err := s.parseEntMetaPartition(req, &entMeta); err != nil {
return nil, err
}
params := req.URL.Query()
_, hasID := params["id"]
ID := ""
if hasID {
ID = params.Get("id")
}
args := pboperator.TransferLeaderRequest{
ID: ID,
}
var token string
s.parseToken(req, &token)
ctx, err := external.ContextWithQueryOptions(req.Context(), structs.QueryOptions{Token: token})
if err != nil {
return nil, err
}
result, err := s.agent.rpcClientOperator.TransferLeader(ctx, &args)
if err != nil {
return nil, err
}
if result.Success != true {
return nil, HTTPError{StatusCode: http.StatusNotFound, Reason: fmt.Sprintf("Failed to transfer Leader: %s", err.Error())}
}
reply := new(api.TransferLeaderResponse)
pboperator.TransferLeaderResponseToAPI(result, reply)
return reply, nil
}
// OperatorRaftPeer supports actions on Raft peers. Currently we only support
// removing peers by address.
func (s *HTTPHandlers) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
var args structs.RaftRemovePeerRequest
s.parseDC(req, &args.Datacenter)
s.parseToken(req, &args.Token)
params := req.URL.Query()
_, hasID := params["id"]
if hasID {
args.ID = raft.ServerID(params.Get("id"))
}
_, hasAddress := params["address"]
if hasAddress {
args.Address = raft.ServerAddress(params.Get("address"))
}
if !hasID && !hasAddress {
return nil, HTTPError{
StatusCode: http.StatusBadRequest,
Reason: "Must specify either ?id with the server's ID or ?address with IP:port of peer to remove",
}
}
if hasID && hasAddress {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: "Must specify only one of ?id or ?address"}
}
var reply struct{}
method := "Operator.RaftRemovePeerByID"
if hasAddress {
method = "Operator.RaftRemovePeerByAddress"
}
if err := s.agent.RPC(req.Context(), method, &args, &reply); err != nil {
return nil, err
}
return nil, nil
}
type keyringArgs struct {
Key string
Token string
RelayFactor uint8
LocalOnly bool // ?local-only; only used for GET requests
}
// OperatorKeyringEndpoint handles keyring operations (install, list, use, remove)
func (s *HTTPHandlers) OperatorKeyringEndpoint(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
var args keyringArgs
if req.Method == "POST" || req.Method == "PUT" || req.Method == "DELETE" {
if err := decodeBody(req.Body, &args); err != nil {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Request decode failed: %v", err)}
}
}
s.parseToken(req, &args.Token)
// Parse relay factor
if relayFactor := req.URL.Query().Get("relay-factor"); relayFactor != "" {
n, err := strconv.Atoi(relayFactor)
if err != nil {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing relay factor: %v", err)}
}
args.RelayFactor, err = ParseRelayFactor(n)
if err != nil {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Invalid relay-factor: %v", err)}
}
}
// Parse local-only. local-only can only be used in GET requests.
if localOnly := req.URL.Query().Get("local-only"); localOnly != "" {
var err error
args.LocalOnly, err = strconv.ParseBool(localOnly)
if err != nil {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing local-only: %v", err)}
}
err = ValidateLocalOnly(args.LocalOnly, req.Method == "GET")
if err != nil {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Invalid use of local-only: %v", err)}
}
}
// Switch on the method
switch req.Method {
case "GET":
return s.KeyringList(resp, req, &args)
case "POST":
return s.KeyringInstall(resp, req, &args)
case "PUT":
return s.KeyringUse(resp, req, &args)
case "DELETE":
return s.KeyringRemove(resp, req, &args)
default:
return nil, MethodNotAllowedError{req.Method, []string{"GET", "POST", "PUT", "DELETE"}}
}
}
// KeyringInstall is used to install a new gossip encryption key into the cluster
func (s *HTTPHandlers) KeyringInstall(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
responses, err := s.agent.InstallKey(args.Key, args.Token, args.RelayFactor)
if err != nil {
return nil, err
}
return nil, keyringErrorsOrNil(responses.Responses)
}
// KeyringList is used to list the keys installed in the cluster
func (s *HTTPHandlers) KeyringList(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
responses, err := s.agent.ListKeys(args.Token, args.LocalOnly, args.RelayFactor)
if err != nil {
return nil, err
}
return responses.Responses, keyringErrorsOrNil(responses.Responses)
}
// KeyringRemove is used to list the keys installed in the cluster
func (s *HTTPHandlers) KeyringRemove(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
responses, err := s.agent.RemoveKey(args.Key, args.Token, args.RelayFactor)
if err != nil {
return nil, err
}
return nil, keyringErrorsOrNil(responses.Responses)
}
// KeyringUse is used to change the primary gossip encryption key
func (s *HTTPHandlers) KeyringUse(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
responses, err := s.agent.UseKey(args.Key, args.Token, args.RelayFactor)
if err != nil {
return nil, err
}
return nil, keyringErrorsOrNil(responses.Responses)
}
func keyringErrorsOrNil(responses []*structs.KeyringResponse) error {
var errs error
for _, response := range responses {
if response.Error != "" {
pool := response.Datacenter + " (LAN)"
if response.WAN {
pool = "WAN"
}
if response.Segment != "" {
pool += " [segment: " + response.Segment + "]"
} else if !acl.IsDefaultPartition(response.Partition) {
pool += " [partition: " + response.Partition + "]"
}
errs = multierror.Append(errs, fmt.Errorf("%s error: %s", pool, response.Error))
for key, message := range response.Messages {
errs = multierror.Append(errs, fmt.Errorf("%s: %s", key, message))
}
}
}
return errs
}
// OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration.
// This supports the stale query mode in case the cluster doesn't have a leader.
func (s *HTTPHandlers) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Switch on the method
switch req.Method {
case "GET":
var args structs.DCSpecificRequest
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
return nil, nil
}
var reply structs.AutopilotConfig
if err := s.agent.RPC(req.Context(), "Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
return nil, err
}
out := api.AutopilotConfiguration{
CleanupDeadServers: reply.CleanupDeadServers,
LastContactThreshold: api.NewReadableDuration(reply.LastContactThreshold),
MaxTrailingLogs: reply.MaxTrailingLogs,
MinQuorum: reply.MinQuorum,
ServerStabilizationTime: api.NewReadableDuration(reply.ServerStabilizationTime),
RedundancyZoneTag: reply.RedundancyZoneTag,
DisableUpgradeMigration: reply.DisableUpgradeMigration,
UpgradeVersionTag: reply.UpgradeVersionTag,
CreateIndex: reply.CreateIndex,
ModifyIndex: reply.ModifyIndex,
}
return out, nil
case "PUT":
var args structs.AutopilotSetConfigRequest
s.parseDC(req, &args.Datacenter)
s.parseToken(req, &args.Token)
conf := api.NewAutopilotConfiguration()
if err := decodeBody(req.Body, &conf); err != nil {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing autopilot config: %v", err)}
}
args.Config = structs.AutopilotConfig{
CleanupDeadServers: conf.CleanupDeadServers,
LastContactThreshold: conf.LastContactThreshold.Duration(),
MaxTrailingLogs: conf.MaxTrailingLogs,
MinQuorum: conf.MinQuorum,
ServerStabilizationTime: conf.ServerStabilizationTime.Duration(),
RedundancyZoneTag: conf.RedundancyZoneTag,
DisableUpgradeMigration: conf.DisableUpgradeMigration,
UpgradeVersionTag: conf.UpgradeVersionTag,
}
// Check for cas value
params := req.URL.Query()
if _, ok := params["cas"]; ok {
casVal, err := strconv.ParseUint(params.Get("cas"), 10, 64)
if err != nil {
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing cas value: %v", err)}
}
args.Config.ModifyIndex = casVal
args.CAS = true
}
var reply bool
if err := s.agent.RPC(req.Context(), "Operator.AutopilotSetConfiguration", &args, &reply); err != nil {
return nil, err
}
// Only use the out value if this was a CAS
if !args.CAS {
return true, nil
}
return reply, nil
default:
return nil, MethodNotAllowedError{req.Method, []string{"GET", "PUT"}}
}
}
// OperatorServerHealth is used to get the health of the servers in the local DC
func (s *HTTPHandlers) OperatorServerHealth(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
var args structs.DCSpecificRequest
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
return nil, nil
}
var reply structs.AutopilotHealthReply
if err := s.agent.RPC(req.Context(), "Operator.ServerHealth", &args, &reply); err != nil {
return nil, err
}
// Reply with status 429 if something is unhealthy
if !reply.Healthy {
resp.WriteHeader(http.StatusTooManyRequests)
}
out := &api.OperatorHealthReply{
Healthy: reply.Healthy,
FailureTolerance: reply.FailureTolerance,
}
for _, server := range reply.Servers {
out.Servers = append(out.Servers, api.ServerHealth{
ID: server.ID,
Name: server.Name,
Address: server.Address,
Version: server.Version,
Leader: server.Leader,
SerfStatus: server.SerfStatus.String(),
LastContact: api.NewReadableDuration(server.LastContact),
LastTerm: server.LastTerm,
LastIndex: server.LastIndex,
Healthy: server.Healthy,
Voter: server.Voter,
StableSince: server.StableSince.Round(time.Second).UTC(),
})
}
return out, nil
}
func (s *HTTPHandlers) OperatorAutopilotState(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
var args structs.DCSpecificRequest
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
return nil, nil
}
var reply autopilot.State
if err := s.agent.RPC(req.Context(), "Operator.AutopilotState", &args, &reply); err != nil {
return nil, err
}
out := autopilotToAPIState(&reply)
return out, nil
}
func (s *HTTPHandlers) OperatorUsage(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
metrics.IncrCounterWithLabels([]string{"client", "api", "operator_usage"}, 1,
s.nodeMetricsLabels())
var args structs.OperatorUsageRequest
if err := s.parseEntMetaNoWildcard(req, &args.EnterpriseMeta); err != nil {
return nil, err
}
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
return nil, nil
}
if _, ok := req.URL.Query()["global"]; ok {
args.Global = true
}
// Make the RPC request
var out structs.Usage
defer setMeta(resp, &out.QueryMeta)
RETRY_ONCE:
err := s.agent.RPC(req.Context(), "Operator.Usage", &args, &out)
if err != nil {
metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "operator_usage"}, 1,
s.nodeMetricsLabels())
return nil, err
}
if args.QueryOptions.AllowStale && args.MaxStaleDuration > 0 && args.MaxStaleDuration < out.LastContact {
args.AllowStale = false
args.MaxStaleDuration = 0
goto RETRY_ONCE
}
out.ConsistencyLevel = args.QueryOptions.ConsistencyLevel()
metrics.IncrCounterWithLabels([]string{"client", "api", "success", "operator_usage"}, 1,
s.nodeMetricsLabels())
return out, nil
}
func stringIDs(ids []raft.ServerID) []string {
out := make([]string, len(ids))
for i, id := range ids {
out[i] = string(id)
}
return out
}
func autopilotToAPIState(state *autopilot.State) *api.AutopilotState {
out := &api.AutopilotState{
Healthy: state.Healthy,
FailureTolerance: state.FailureTolerance,
Leader: string(state.Leader),
Voters: stringIDs(state.Voters),
Servers: make(map[string]api.AutopilotServer),
}
for id, srv := range state.Servers {
out.Servers[string(id)] = autopilotToAPIServer(srv)
}
autopilotToAPIStateEnterprise(state, out)
return out
}
func autopilotToAPIServer(srv *autopilot.ServerState) api.AutopilotServer {
apiSrv := api.AutopilotServer{
ID: string(srv.Server.ID),
Name: srv.Server.Name,
Address: string(srv.Server.Address),
NodeStatus: string(srv.Server.NodeStatus),
Version: srv.Server.Version,
LastContact: api.NewReadableDuration(srv.Stats.LastContact),
LastTerm: srv.Stats.LastTerm,
LastIndex: srv.Stats.LastIndex,
Healthy: srv.Health.Healthy,
StableSince: srv.Health.StableSince,
Status: api.AutopilotServerStatus(srv.State),
Meta: srv.Server.Meta,
NodeType: api.AutopilotServerType(srv.Server.NodeType),
}
autopilotToAPIServerEnterprise(srv, &apiSrv)
return apiSrv
}