consul/internal/storage/storage.go

308 lines
12 KiB
Go

package storage
import (
"context"
"errors"
"fmt"
"github.com/hashicorp/consul/proto-public/pbresource"
)
// Wildcard can be given as Tenancy fields in List and Watch calls, to enumerate
// resources across multiple partitions, peers, namespaces, etc.
const Wildcard = "*"
var (
// ErrNotFound indicates that the resource could not be found.
ErrNotFound = errors.New("resource not found")
// ErrCASFailure indicates that the attempted write failed because the given
// version does not match what is currently stored.
ErrCASFailure = errors.New("CAS operation failed because the given version doesn't match what is stored")
// ErrWrongUid indicates that the attempted write failed because the resource's
// Uid doesn't match what is currently stored (e.g. the caller is trying to
// operate on a deleted resource with the same name).
ErrWrongUid = errors.New("write failed because the given uid doesn't match what is stored")
// ErrInconsistent indicates that the attempted write or consistent read could
// not be achieved because of a consistency or availability issue (e.g. loss of
// quorum, or when interacting with a Raft follower).
ErrInconsistent = errors.New("cannot satisfy consistency requirements")
)
// ReadConsistency is used to specify the required consistency guarantees for
// a read operation.
type ReadConsistency int
const (
// EventualConsistency provides a weak set of guarantees, but is much cheaper
// than using StrongConsistency and therefore should be treated as the default.
//
// It guarantees [monotonic reads]. That is, a read will always return results
// that are as up-to-date as an earlier read, provided both happen on the same
// Consul server. But does not make any such guarantee about writes.
//
// In other words, reads won't necessarily reflect earlier writes, even when
// made against the same server.
//
// Operations that don't allow the caller to specify the consistency mode will
// hold the same guarantees as EventualConsistency, but check the method docs
// for caveats.
//
// [monotonic reads]: https://jepsen.io/consistency/models/monotonic-reads
EventualConsistency ReadConsistency = iota
// StrongConsistency provides a very strong set of guarantees but is much more
// expensive, so should be used sparingly.
//
// It guarantees full [linearizability], such that a read will always return
// the most up-to-date version of a resource, without caveat.
//
// [linearizability]: https://jepsen.io/consistency/models/linearizable
StrongConsistency
)
// String implements the fmt.Stringer interface.
func (c ReadConsistency) String() string {
switch c {
case EventualConsistency:
return "Eventual Consistency"
case StrongConsistency:
return "Strong Consistency"
}
panic(fmt.Sprintf("unknown ReadConsistency (%d)", c))
}
// Backend provides the low-level storage substrate for resources. It can be
// implemented using internal (i.e. Raft+MemDB) or external (e.g. DynamoDB)
// storage systems.
//
// Refer to the method comments for details of the behaviors and invariants
// provided, which are also verified by the conformance test suite in the
// internal/storage/conformance package.
//
// Cross-cutting concerns:
//
// # UIDs
//
// Users identify resources with a name of their choice (e.g. service "billing")
// but internally, we add our own identifier in the Uid field to disambiguate
// references when resources are deleted and re-created with the same name.
//
// # GroupVersion
//
// In order to support automatic translation between schema versions, we only
// store a single version of a resource, and treat types with the same Group
// and Kind, but different GroupVersions, as equivalent.
//
// # Read-Modify-Write Patterns
//
// All writes at the storage backend level are CAS (Compare-And-Swap) operations
// where the caller must provide the resource in its entirety, with the current
// version string.
//
// Non-CAS writes should be implemented at a higher level (i.e. in the Resource
// Service) by reading the resource, applying the user's requested modifications,
// and writing it back. This allows us to ensure we're correctly carrying over
// the resource's Status and Uid, without requiring support for partial update
// or "patch" operations from external storage systems.
//
// In cases where there are concurrent interleaving writes made to a resource,
// it's likely that a CAS operation will fail, so callers may need to put their
// Read-Modify-Write cycle in a retry loop.
type Backend interface {
// Read a resource using its ID.
//
// # UIDs
//
// If id.Uid is empty, Read will ignore it and return whatever resource is
// stored with the given name. This is the desired behavior for user-initiated
// reads.
//
// If id.Uid is non-empty, Read will only return a resource if its Uid matches,
// otherwise it'll return ErrNotFound. This is the desired behaviour for reads
// initiated by controllers, which tend to operate on a specific lifetime of a
// resource.
//
// See Backend docs for more details.
//
// # GroupVersion
//
// If id.Type.GroupVersion doesn't match what is stored, Read will return a
// GroupVersionMismatchError which contains a pointer to the stored resource.
//
// See Backend docs for more details.
//
// # Consistency
//
// Read supports both EventualConsistency and StrongConsistency.
Read(ctx context.Context, consistency ReadConsistency, id *pbresource.ID) (*pbresource.Resource, error)
// WriteCAS performs an atomic CAS (Compare-And-Swap) write of a resource based
// on its version. The given version will be compared to what is stored, and if
// it does not match, ErrCASFailure will be returned. To create new resources,
// set version to an empty string.
//
// If a write cannot be performed because of a consistency or availability
// issue (e.g. when interacting with a Raft follower, or when quorum is lost)
// ErrInconsistent will be returned.
//
// # UIDs
//
// UIDs are immutable, so if the given resource's Uid field doesn't match what
// is stored, ErrWrongUid will be returned.
//
// See Backend docs for more details.
//
// # GroupVersion
//
// Write does not validate the GroupVersion and allows you to overwrite a
// resource stored in an older form with a newer, and vice versa.
//
// See Backend docs for more details.
WriteCAS(ctx context.Context, res *pbresource.Resource) (*pbresource.Resource, error)
// DeleteCAS performs an atomic CAS (Compare-And-Swap) deletion of a resource
// based on its version. The given version will be compared to what is stored,
// and if it does not match, ErrCASFailure will be returned.
//
// If the resource does not exist (i.e. has already been deleted) no error will
// be returned.
//
// If a deletion cannot be performed because of a consistency or availability
// issue (e.g. when interacting with a Raft follower, or when quorum is lost)
// ErrInconsistent will be returned.
//
// # UIDs
//
// If the given id's Uid does not match what is stored, the deletion will be a
// no-op (i.e. it is considered to be a different resource).
//
// See Backend docs for more details.
//
// # GroupVersion
//
// Delete does not check or refer to the GroupVersion. Resources of the same
// Group and Kind are considered equivalent, so requests to delete a resource
// using a new GroupVersion will delete a resource even if it's stored with an
// old GroupVersion.
//
// See Backend docs for more details.
DeleteCAS(ctx context.Context, id *pbresource.ID, version string) error
// List resources of the given type, tenancy, and optionally matching the given
// name prefix.
//
// # Tenancy Wildcard
//
// In order to list resources across multiple tenancy units (e.g. namespaces)
// pass the Wildcard sentinel value in tenancy fields.
//
// # GroupVersion
//
// The resType argument contains only the Group and Kind, to reflect the fact
// that resources may be stored in a mix of old and new forms. As such, it's
// the caller's responsibility to check the resource's GroupVersion and
// translate or filter accordingly.
//
// # Consistency
//
// Generally, List only supports EventualConsistency. However, for backward
// compatability with our v1 APIs, the Raft backend supports StrongConsistency
// for list operations.
//
// When the v1 APIs finally goes away, so will this consistency parameter, so
// it should not be depended on outside of the backward compatability layer.
List(ctx context.Context, consistency ReadConsistency, resType UnversionedType, tenancy *pbresource.Tenancy, namePrefix string) ([]*pbresource.Resource, error)
// WatchList watches resources of the given type, tenancy, and optionally
// matching the given name prefix. Upsert events for the current state of the
// world (i.e. existing resources that match the given filters) will be emitted
// immediately, and will be followed by delta events whenever resources are
// written or deleted.
//
// # Consistency
//
// WatchList makes no guarantees about event timeliness (e.g. an event for a
// write may not be received immediately), but it does guarantee that events
// will be emitted in the correct order.
//
// There's also a guarantee of [monotonic reads] between Read and WatchList,
// such that Read will never return data that is older than the most recent
// event you received. Note: this guarantee holds at the (in-process) storage
// backend level, only. Controllers and other users of the Resource Service API
// must remain connected to the same Consul server process to avoid receiving
// events about writes that they then cannot read. In other words, it is *not*
// linearizable.
//
// There's a similar guarantee between WatchList and OwnerReferences, see the
// OwnerReferences docs for more information.
//
// See List docs for details about Tenancy Wildcard and GroupVersion.
//
// [monotonic reads]: https://jepsen.io/consistency/models/monotonic-reads
WatchList(ctx context.Context, resType UnversionedType, tenancy *pbresource.Tenancy, namePrefix string) (Watch, error)
// OwnerReferences returns the IDs of resources owned by the resource with the
// given ID. It is typically used to implement cascading deletion.
//
// # Consistency
//
// OwnerReferences may return stale results, but guarnantees [monotonic reads]
// with events received from WatchList. In practice, this means that if you
// learn that a resource has been deleted through a watch event, the results
// you receive from OwnerReferences will contain all references that existed
// at the time the owner was deleted. It doesn't make any guarantees about
// references that are created *after* the owner was deleted, though, so you
// must either prevent that from happening (e.g. by performing a consistent
// read of the owner in the write-path, which has its own ordering/correctness
// challenges), or by calling OwnerReferences after the expected window of
// inconsistency (e.g. deferring cascading deletion, or doing a second pass
// an hour later).
//
// [montonic reads]: https://jepsen.io/consistency/models/monotonic-reads
OwnerReferences(ctx context.Context, id *pbresource.ID) ([]*pbresource.ID, error)
}
// Watch represents a watch on a given set of resources. Call Next to get the
// next event (i.e. upsert or deletion).
type Watch interface {
// Next returns the next event (i.e. upsert or deletion)
Next(ctx context.Context) (*pbresource.WatchEvent, error)
}
// UnversionedType represents a pbresource.Type as it is stored without the
// GroupVersion.
type UnversionedType struct {
Group string
Kind string
}
// UnversionedTypeFrom creates an UnversionedType from the given *pbresource.Type.
func UnversionedTypeFrom(t *pbresource.Type) UnversionedType {
return UnversionedType{
Group: t.Group,
Kind: t.Kind,
}
}
// GroupVersionMismatchError is returned when a resource is stored as a type
// with a different GroupVersion than was requested.
type GroupVersionMismatchError struct {
// RequestedType is the type that was requested.
RequestedType *pbresource.Type
// Stored is the resource as it is stored.
Stored *pbresource.Resource
}
// Error implements the error interface.
func (e GroupVersionMismatchError) Error() string {
return fmt.Sprintf(
"resource was requested with GroupVersion=%q, but stored with GroupVersion=%q",
e.RequestedType.GroupVersion,
e.Stored.Id.Type.GroupVersion,
)
}