2023-03-28 22:48:58 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
Add storage backend interface and in-memory implementation (#16538)
Introduces `storage.Backend`, which will serve as the interface between the
Resource Service and the underlying storage system (Raft today, but in the
future, who knows!).
The primary design goal of this interface is to keep its surface area small,
and push as much functionality as possible into the layers above, so that new
implementations can be added with little effort, and easily proven to be
correct. To that end, we also provide a suite of "conformance" tests that can
be run against a backend implementation to check it behaves correctly.
In this commit, we introduce an initial in-memory storage backend, which is
suitable for tests and when running Consul in development mode. This backend is
a thin wrapper around the `Store` type, which implements a resource database
using go-memdb and our internal pub/sub system. `Store` will also be used to
handle reads in our Raft backend, and in the future, used as a local cache for
external storage systems.
2023-03-27 09:30:53 +00:00
|
|
|
package storage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
"github.com/hashicorp/consul/proto-public/pbresource"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Wildcard can be given as Tenancy fields in List and Watch calls, to enumerate
|
|
|
|
// resources across multiple partitions, peers, namespaces, etc.
|
|
|
|
const Wildcard = "*"
|
|
|
|
|
|
|
|
var (
|
|
|
|
// ErrNotFound indicates that the resource could not be found.
|
|
|
|
ErrNotFound = errors.New("resource not found")
|
|
|
|
|
|
|
|
// ErrCASFailure indicates that the attempted write failed because the given
|
|
|
|
// version does not match what is currently stored.
|
|
|
|
ErrCASFailure = errors.New("CAS operation failed because the given version doesn't match what is stored")
|
|
|
|
|
|
|
|
// ErrWrongUid indicates that the attempted write failed because the resource's
|
|
|
|
// Uid doesn't match what is currently stored (e.g. the caller is trying to
|
|
|
|
// operate on a deleted resource with the same name).
|
|
|
|
ErrWrongUid = errors.New("write failed because the given uid doesn't match what is stored")
|
|
|
|
|
|
|
|
// ErrInconsistent indicates that the attempted write or consistent read could
|
|
|
|
// not be achieved because of a consistency or availability issue (e.g. loss of
|
|
|
|
// quorum, or when interacting with a Raft follower).
|
|
|
|
ErrInconsistent = errors.New("cannot satisfy consistency requirements")
|
|
|
|
)
|
|
|
|
|
|
|
|
// ReadConsistency is used to specify the required consistency guarantees for
|
|
|
|
// a read operation.
|
|
|
|
type ReadConsistency int
|
|
|
|
|
|
|
|
const (
|
|
|
|
// EventualConsistency provides a weak set of guarantees, but is much cheaper
|
|
|
|
// than using StrongConsistency and therefore should be treated as the default.
|
|
|
|
//
|
|
|
|
// It guarantees [monotonic reads]. That is, a read will always return results
|
|
|
|
// that are as up-to-date as an earlier read, provided both happen on the same
|
|
|
|
// Consul server. But does not make any such guarantee about writes.
|
|
|
|
//
|
|
|
|
// In other words, reads won't necessarily reflect earlier writes, even when
|
|
|
|
// made against the same server.
|
|
|
|
//
|
|
|
|
// Operations that don't allow the caller to specify the consistency mode will
|
|
|
|
// hold the same guarantees as EventualConsistency, but check the method docs
|
|
|
|
// for caveats.
|
|
|
|
//
|
|
|
|
// [monotonic reads]: https://jepsen.io/consistency/models/monotonic-reads
|
|
|
|
EventualConsistency ReadConsistency = iota
|
|
|
|
|
|
|
|
// StrongConsistency provides a very strong set of guarantees but is much more
|
|
|
|
// expensive, so should be used sparingly.
|
|
|
|
//
|
|
|
|
// It guarantees full [linearizability], such that a read will always return
|
|
|
|
// the most up-to-date version of a resource, without caveat.
|
|
|
|
//
|
|
|
|
// [linearizability]: https://jepsen.io/consistency/models/linearizable
|
|
|
|
StrongConsistency
|
|
|
|
)
|
|
|
|
|
|
|
|
// String implements the fmt.Stringer interface.
|
|
|
|
func (c ReadConsistency) String() string {
|
|
|
|
switch c {
|
|
|
|
case EventualConsistency:
|
|
|
|
return "Eventual Consistency"
|
|
|
|
case StrongConsistency:
|
|
|
|
return "Strong Consistency"
|
|
|
|
}
|
|
|
|
panic(fmt.Sprintf("unknown ReadConsistency (%d)", c))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Backend provides the low-level storage substrate for resources. It can be
|
|
|
|
// implemented using internal (i.e. Raft+MemDB) or external (e.g. DynamoDB)
|
|
|
|
// storage systems.
|
|
|
|
//
|
|
|
|
// Refer to the method comments for details of the behaviors and invariants
|
|
|
|
// provided, which are also verified by the conformance test suite in the
|
|
|
|
// internal/storage/conformance package.
|
|
|
|
//
|
|
|
|
// Cross-cutting concerns:
|
|
|
|
//
|
|
|
|
// # UIDs
|
|
|
|
//
|
|
|
|
// Users identify resources with a name of their choice (e.g. service "billing")
|
|
|
|
// but internally, we add our own identifier in the Uid field to disambiguate
|
|
|
|
// references when resources are deleted and re-created with the same name.
|
|
|
|
//
|
|
|
|
// # GroupVersion
|
|
|
|
//
|
|
|
|
// In order to support automatic translation between schema versions, we only
|
|
|
|
// store a single version of a resource, and treat types with the same Group
|
|
|
|
// and Kind, but different GroupVersions, as equivalent.
|
|
|
|
//
|
|
|
|
// # Read-Modify-Write Patterns
|
|
|
|
//
|
|
|
|
// All writes at the storage backend level are CAS (Compare-And-Swap) operations
|
|
|
|
// where the caller must provide the resource in its entirety, with the current
|
|
|
|
// version string.
|
|
|
|
//
|
|
|
|
// Non-CAS writes should be implemented at a higher level (i.e. in the Resource
|
|
|
|
// Service) by reading the resource, applying the user's requested modifications,
|
|
|
|
// and writing it back. This allows us to ensure we're correctly carrying over
|
|
|
|
// the resource's Status and Uid, without requiring support for partial update
|
|
|
|
// or "patch" operations from external storage systems.
|
|
|
|
//
|
|
|
|
// In cases where there are concurrent interleaving writes made to a resource,
|
|
|
|
// it's likely that a CAS operation will fail, so callers may need to put their
|
|
|
|
// Read-Modify-Write cycle in a retry loop.
|
|
|
|
type Backend interface {
|
|
|
|
// Read a resource using its ID.
|
|
|
|
//
|
|
|
|
// # UIDs
|
|
|
|
//
|
|
|
|
// If id.Uid is empty, Read will ignore it and return whatever resource is
|
|
|
|
// stored with the given name. This is the desired behavior for user-initiated
|
|
|
|
// reads.
|
|
|
|
//
|
|
|
|
// If id.Uid is non-empty, Read will only return a resource if its Uid matches,
|
|
|
|
// otherwise it'll return ErrNotFound. This is the desired behaviour for reads
|
|
|
|
// initiated by controllers, which tend to operate on a specific lifetime of a
|
|
|
|
// resource.
|
|
|
|
//
|
|
|
|
// See Backend docs for more details.
|
|
|
|
//
|
|
|
|
// # GroupVersion
|
|
|
|
//
|
|
|
|
// If id.Type.GroupVersion doesn't match what is stored, Read will return a
|
|
|
|
// GroupVersionMismatchError which contains a pointer to the stored resource.
|
|
|
|
//
|
|
|
|
// See Backend docs for more details.
|
|
|
|
//
|
|
|
|
// # Consistency
|
|
|
|
//
|
|
|
|
// Read supports both EventualConsistency and StrongConsistency.
|
|
|
|
Read(ctx context.Context, consistency ReadConsistency, id *pbresource.ID) (*pbresource.Resource, error)
|
|
|
|
|
|
|
|
// WriteCAS performs an atomic CAS (Compare-And-Swap) write of a resource based
|
|
|
|
// on its version. The given version will be compared to what is stored, and if
|
|
|
|
// it does not match, ErrCASFailure will be returned. To create new resources,
|
|
|
|
// set version to an empty string.
|
|
|
|
//
|
|
|
|
// If a write cannot be performed because of a consistency or availability
|
|
|
|
// issue (e.g. when interacting with a Raft follower, or when quorum is lost)
|
|
|
|
// ErrInconsistent will be returned.
|
|
|
|
//
|
|
|
|
// # UIDs
|
|
|
|
//
|
|
|
|
// UIDs are immutable, so if the given resource's Uid field doesn't match what
|
|
|
|
// is stored, ErrWrongUid will be returned.
|
|
|
|
//
|
|
|
|
// See Backend docs for more details.
|
|
|
|
//
|
|
|
|
// # GroupVersion
|
|
|
|
//
|
|
|
|
// Write does not validate the GroupVersion and allows you to overwrite a
|
|
|
|
// resource stored in an older form with a newer, and vice versa.
|
|
|
|
//
|
|
|
|
// See Backend docs for more details.
|
|
|
|
WriteCAS(ctx context.Context, res *pbresource.Resource) (*pbresource.Resource, error)
|
|
|
|
|
|
|
|
// DeleteCAS performs an atomic CAS (Compare-And-Swap) deletion of a resource
|
|
|
|
// based on its version. The given version will be compared to what is stored,
|
|
|
|
// and if it does not match, ErrCASFailure will be returned.
|
|
|
|
//
|
|
|
|
// If the resource does not exist (i.e. has already been deleted) no error will
|
|
|
|
// be returned.
|
|
|
|
//
|
|
|
|
// If a deletion cannot be performed because of a consistency or availability
|
|
|
|
// issue (e.g. when interacting with a Raft follower, or when quorum is lost)
|
|
|
|
// ErrInconsistent will be returned.
|
|
|
|
//
|
|
|
|
// # UIDs
|
|
|
|
//
|
|
|
|
// If the given id's Uid does not match what is stored, the deletion will be a
|
|
|
|
// no-op (i.e. it is considered to be a different resource).
|
|
|
|
//
|
|
|
|
// See Backend docs for more details.
|
|
|
|
//
|
|
|
|
// # GroupVersion
|
|
|
|
//
|
|
|
|
// Delete does not check or refer to the GroupVersion. Resources of the same
|
|
|
|
// Group and Kind are considered equivalent, so requests to delete a resource
|
|
|
|
// using a new GroupVersion will delete a resource even if it's stored with an
|
|
|
|
// old GroupVersion.
|
|
|
|
//
|
|
|
|
// See Backend docs for more details.
|
|
|
|
DeleteCAS(ctx context.Context, id *pbresource.ID, version string) error
|
|
|
|
|
|
|
|
// List resources of the given type, tenancy, and optionally matching the given
|
|
|
|
// name prefix.
|
|
|
|
//
|
|
|
|
// # Tenancy Wildcard
|
|
|
|
//
|
|
|
|
// In order to list resources across multiple tenancy units (e.g. namespaces)
|
|
|
|
// pass the Wildcard sentinel value in tenancy fields.
|
|
|
|
//
|
|
|
|
// # GroupVersion
|
|
|
|
//
|
|
|
|
// The resType argument contains only the Group and Kind, to reflect the fact
|
|
|
|
// that resources may be stored in a mix of old and new forms. As such, it's
|
|
|
|
// the caller's responsibility to check the resource's GroupVersion and
|
|
|
|
// translate or filter accordingly.
|
|
|
|
//
|
|
|
|
// # Consistency
|
|
|
|
//
|
|
|
|
// Generally, List only supports EventualConsistency. However, for backward
|
|
|
|
// compatability with our v1 APIs, the Raft backend supports StrongConsistency
|
|
|
|
// for list operations.
|
|
|
|
//
|
|
|
|
// When the v1 APIs finally goes away, so will this consistency parameter, so
|
|
|
|
// it should not be depended on outside of the backward compatability layer.
|
|
|
|
List(ctx context.Context, consistency ReadConsistency, resType UnversionedType, tenancy *pbresource.Tenancy, namePrefix string) ([]*pbresource.Resource, error)
|
|
|
|
|
|
|
|
// WatchList watches resources of the given type, tenancy, and optionally
|
|
|
|
// matching the given name prefix. Upsert events for the current state of the
|
|
|
|
// world (i.e. existing resources that match the given filters) will be emitted
|
|
|
|
// immediately, and will be followed by delta events whenever resources are
|
|
|
|
// written or deleted.
|
|
|
|
//
|
|
|
|
// # Consistency
|
|
|
|
//
|
|
|
|
// WatchList makes no guarantees about event timeliness (e.g. an event for a
|
|
|
|
// write may not be received immediately), but it does guarantee that events
|
|
|
|
// will be emitted in the correct order.
|
|
|
|
//
|
|
|
|
// There's also a guarantee of [monotonic reads] between Read and WatchList,
|
|
|
|
// such that Read will never return data that is older than the most recent
|
|
|
|
// event you received. Note: this guarantee holds at the (in-process) storage
|
|
|
|
// backend level, only. Controllers and other users of the Resource Service API
|
|
|
|
// must remain connected to the same Consul server process to avoid receiving
|
|
|
|
// events about writes that they then cannot read. In other words, it is *not*
|
|
|
|
// linearizable.
|
|
|
|
//
|
|
|
|
// There's a similar guarantee between WatchList and OwnerReferences, see the
|
|
|
|
// OwnerReferences docs for more information.
|
|
|
|
//
|
|
|
|
// See List docs for details about Tenancy Wildcard and GroupVersion.
|
|
|
|
//
|
|
|
|
// [monotonic reads]: https://jepsen.io/consistency/models/monotonic-reads
|
|
|
|
WatchList(ctx context.Context, resType UnversionedType, tenancy *pbresource.Tenancy, namePrefix string) (Watch, error)
|
|
|
|
|
|
|
|
// OwnerReferences returns the IDs of resources owned by the resource with the
|
|
|
|
// given ID. It is typically used to implement cascading deletion.
|
|
|
|
//
|
|
|
|
// # Consistency
|
|
|
|
//
|
|
|
|
// OwnerReferences may return stale results, but guarnantees [monotonic reads]
|
|
|
|
// with events received from WatchList. In practice, this means that if you
|
|
|
|
// learn that a resource has been deleted through a watch event, the results
|
|
|
|
// you receive from OwnerReferences will contain all references that existed
|
|
|
|
// at the time the owner was deleted. It doesn't make any guarantees about
|
|
|
|
// references that are created *after* the owner was deleted, though, so you
|
|
|
|
// must either prevent that from happening (e.g. by performing a consistent
|
|
|
|
// read of the owner in the write-path, which has its own ordering/correctness
|
|
|
|
// challenges), or by calling OwnerReferences after the expected window of
|
|
|
|
// inconsistency (e.g. deferring cascading deletion, or doing a second pass
|
|
|
|
// an hour later).
|
|
|
|
//
|
|
|
|
// [montonic reads]: https://jepsen.io/consistency/models/monotonic-reads
|
|
|
|
OwnerReferences(ctx context.Context, id *pbresource.ID) ([]*pbresource.ID, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Watch represents a watch on a given set of resources. Call Next to get the
|
|
|
|
// next event (i.e. upsert or deletion).
|
|
|
|
type Watch interface {
|
|
|
|
// Next returns the next event (i.e. upsert or deletion)
|
|
|
|
Next(ctx context.Context) (*pbresource.WatchEvent, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnversionedType represents a pbresource.Type as it is stored without the
|
|
|
|
// GroupVersion.
|
|
|
|
type UnversionedType struct {
|
|
|
|
Group string
|
|
|
|
Kind string
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnversionedTypeFrom creates an UnversionedType from the given *pbresource.Type.
|
|
|
|
func UnversionedTypeFrom(t *pbresource.Type) UnversionedType {
|
|
|
|
return UnversionedType{
|
|
|
|
Group: t.Group,
|
|
|
|
Kind: t.Kind,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// GroupVersionMismatchError is returned when a resource is stored as a type
|
|
|
|
// with a different GroupVersion than was requested.
|
|
|
|
type GroupVersionMismatchError struct {
|
|
|
|
// RequestedType is the type that was requested.
|
|
|
|
RequestedType *pbresource.Type
|
|
|
|
|
|
|
|
// Stored is the resource as it is stored.
|
|
|
|
Stored *pbresource.Resource
|
|
|
|
}
|
|
|
|
|
|
|
|
// Error implements the error interface.
|
|
|
|
func (e GroupVersionMismatchError) Error() string {
|
|
|
|
return fmt.Sprintf(
|
|
|
|
"resource was requested with GroupVersion=%q, but stored with GroupVersion=%q",
|
|
|
|
e.RequestedType.GroupVersion,
|
|
|
|
e.Stored.Id.Type.GroupVersion,
|
|
|
|
)
|
|
|
|
}
|