2023-05-30 10:49:52 -04:00

818 lines
21 KiB
Go

// Package cid implements the Content-IDentifiers specification
// (https://github.com/ipld/cid) in Go. CIDs are
// self-describing content-addressed identifiers useful for
// distributed information systems. CIDs are used in the IPFS
// (https://ipfs.io) project ecosystem.
//
// CIDs have two major versions. A CIDv0 corresponds to a multihash of type
// DagProtobuf, is deprecated and exists for compatibility reasons. Usually,
// CIDv1 should be used.
//
// A CIDv1 has four parts:
//
// <cidv1> ::= <multibase-prefix><cid-version><multicodec-packed-content-type><multihash-content-address>
//
// As shown above, the CID implementation relies heavily on Multiformats,
// particularly Multibase
// (https://github.com/multiformats/go-multibase), Multicodec
// (https://github.com/multiformats/multicodec) and Multihash
// implementations (https://github.com/multiformats/go-multihash).
package cid
import (
"bytes"
"encoding"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
"strings"
mbase "github.com/multiformats/go-multibase"
mh "github.com/multiformats/go-multihash"
varint "github.com/multiformats/go-varint"
)
// UnsupportedVersionString just holds an error message
const UnsupportedVersionString = "<unsupported cid version>"
// ErrInvalidCid is an error that indicates that a CID is invalid.
type ErrInvalidCid struct {
Err error
}
func (e ErrInvalidCid) Error() string {
return fmt.Sprintf("invalid cid: %s", e.Err)
}
func (e ErrInvalidCid) Unwrap() error {
return e.Err
}
func (e ErrInvalidCid) Is(err error) bool {
switch err.(type) {
case ErrInvalidCid, *ErrInvalidCid:
return true
default:
return false
}
}
var (
// ErrCidTooShort means that the cid passed to decode was not long
// enough to be a valid Cid
ErrCidTooShort = ErrInvalidCid{errors.New("cid too short")}
// ErrInvalidEncoding means that selected encoding is not supported
// by this Cid version
ErrInvalidEncoding = errors.New("invalid base encoding")
)
// Consts below are DEPRECATED and left only for legacy reasons:
// <https://github.com/ipfs/go-cid/pull/137>
// Modern code should use consts from go-multicodec instead:
// <https://github.com/multiformats/go-multicodec>
const (
// common ones
Raw = 0x55
DagProtobuf = 0x70 // https://ipld.io/docs/codecs/known/dag-pb/
DagCBOR = 0x71 // https://ipld.io/docs/codecs/known/dag-cbor/
DagJSON = 0x0129 // https://ipld.io/docs/codecs/known/dag-json/
Libp2pKey = 0x72 // https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md#peer-ids
// other
GitRaw = 0x78
DagJOSE = 0x85 // https://ipld.io/specs/codecs/dag-jose/spec/
EthBlock = 0x90
EthBlockList = 0x91
EthTxTrie = 0x92
EthTx = 0x93
EthTxReceiptTrie = 0x94
EthTxReceipt = 0x95
EthStateTrie = 0x96
EthAccountSnapshot = 0x97
EthStorageTrie = 0x98
BitcoinBlock = 0xb0
BitcoinTx = 0xb1
ZcashBlock = 0xc0
ZcashTx = 0xc1
DecredBlock = 0xe0
DecredTx = 0xe1
DashBlock = 0xf0
DashTx = 0xf1
FilCommitmentUnsealed = 0xf101
FilCommitmentSealed = 0xf102
)
// tryNewCidV0 tries to convert a multihash into a CIDv0 CID and returns an
// error on failure.
func tryNewCidV0(mhash mh.Multihash) (Cid, error) {
// Need to make sure hash is valid for CidV0 otherwise we will
// incorrectly detect it as CidV1 in the Version() method
dec, err := mh.Decode(mhash)
if err != nil {
return Undef, ErrInvalidCid{err}
}
if dec.Code != mh.SHA2_256 || dec.Length != 32 {
return Undef, ErrInvalidCid{fmt.Errorf("invalid hash for cidv0 %d-%d", dec.Code, dec.Length)}
}
return Cid{string(mhash)}, nil
}
// NewCidV0 returns a Cid-wrapped multihash.
// They exist to allow IPFS to work with Cids while keeping
// compatibility with the plain-multihash format used used in IPFS.
// NewCidV1 should be used preferentially.
//
// Panics if the multihash isn't sha2-256.
func NewCidV0(mhash mh.Multihash) Cid {
c, err := tryNewCidV0(mhash)
if err != nil {
panic(err)
}
return c
}
// NewCidV1 returns a new Cid using the given multicodec-packed
// content type.
//
// Panics if the multihash is invalid.
func NewCidV1(codecType uint64, mhash mh.Multihash) Cid {
hashlen := len(mhash)
// Two 8 bytes (max) numbers plus hash.
// We use strings.Builder to only allocate once.
var b strings.Builder
b.Grow(1 + varint.UvarintSize(codecType) + hashlen)
b.WriteByte(1)
var buf [binary.MaxVarintLen64]byte
n := varint.PutUvarint(buf[:], codecType)
b.Write(buf[:n])
cn, _ := b.Write(mhash)
if cn != hashlen {
panic("copy hash length is inconsistent")
}
return Cid{b.String()}
}
var (
_ encoding.BinaryMarshaler = Cid{}
_ encoding.BinaryUnmarshaler = (*Cid)(nil)
_ encoding.TextMarshaler = Cid{}
_ encoding.TextUnmarshaler = (*Cid)(nil)
)
// Cid represents a self-describing content addressed
// identifier. It is formed by a Version, a Codec (which indicates
// a multicodec-packed content type) and a Multihash.
type Cid struct{ str string }
// Undef can be used to represent a nil or undefined Cid, using Cid{}
// directly is also acceptable.
var Undef = Cid{}
// Defined returns true if a Cid is defined
// Calling any other methods on an undefined Cid will result in
// undefined behavior.
func (c Cid) Defined() bool {
return c.str != ""
}
// Parse is a short-hand function to perform Decode, Cast etc... on
// a generic interface{} type.
func Parse(v interface{}) (Cid, error) {
switch v2 := v.(type) {
case string:
if strings.Contains(v2, "/ipfs/") {
return Decode(strings.Split(v2, "/ipfs/")[1])
}
return Decode(v2)
case []byte:
return Cast(v2)
case mh.Multihash:
return tryNewCidV0(v2)
case Cid:
return v2, nil
default:
return Undef, ErrInvalidCid{fmt.Errorf("can't parse %+v as Cid", v2)}
}
}
// MustParse calls Parse but will panic on error.
func MustParse(v interface{}) Cid {
c, err := Parse(v)
if err != nil {
panic(err)
}
return c
}
// Decode parses a Cid-encoded string and returns a Cid object.
// For CidV1, a Cid-encoded string is primarily a multibase string:
//
// <multibase-type-code><base-encoded-string>
//
// The base-encoded string represents a:
//
// <version><codec-type><multihash>
//
// Decode will also detect and parse CidV0 strings. Strings
// starting with "Qm" are considered CidV0 and treated directly
// as B58-encoded multihashes.
func Decode(v string) (Cid, error) {
if len(v) < 2 {
return Undef, ErrCidTooShort
}
if len(v) == 46 && v[:2] == "Qm" {
hash, err := mh.FromB58String(v)
if err != nil {
return Undef, ErrInvalidCid{err}
}
return tryNewCidV0(hash)
}
_, data, err := mbase.Decode(v)
if err != nil {
return Undef, ErrInvalidCid{err}
}
return Cast(data)
}
// Extract the encoding from a Cid. If Decode on the same string did
// not return an error neither will this function.
func ExtractEncoding(v string) (mbase.Encoding, error) {
if len(v) < 2 {
return -1, ErrCidTooShort
}
if len(v) == 46 && v[:2] == "Qm" {
return mbase.Base58BTC, nil
}
encoding := mbase.Encoding(v[0])
// check encoding is valid
_, err := mbase.NewEncoder(encoding)
if err != nil {
return -1, ErrInvalidCid{err}
}
return encoding, nil
}
// Cast takes a Cid data slice, parses it and returns a Cid.
// For CidV1, the data buffer is in the form:
//
// <version><codec-type><multihash>
//
// CidV0 are also supported. In particular, data buffers starting
// with length 34 bytes, which starts with bytes [18,32...] are considered
// binary multihashes.
//
// Please use decode when parsing a regular Cid string, as Cast does not
// expect multibase-encoded data. Cast accepts the output of Cid.Bytes().
func Cast(data []byte) (Cid, error) {
nr, c, err := CidFromBytes(data)
if err != nil {
return Undef, ErrInvalidCid{err}
}
if nr != len(data) {
return Undef, ErrInvalidCid{fmt.Errorf("trailing bytes in data buffer passed to cid Cast")}
}
return c, nil
}
// UnmarshalBinary is equivalent to Cast(). It implements the
// encoding.BinaryUnmarshaler interface.
func (c *Cid) UnmarshalBinary(data []byte) error {
casted, err := Cast(data)
if err != nil {
return err
}
c.str = casted.str
return nil
}
// UnmarshalText is equivalent to Decode(). It implements the
// encoding.TextUnmarshaler interface.
func (c *Cid) UnmarshalText(text []byte) error {
decodedCid, err := Decode(string(text))
if err != nil {
return err
}
c.str = decodedCid.str
return nil
}
// Version returns the Cid version.
func (c Cid) Version() uint64 {
if len(c.str) == 34 && c.str[0] == 18 && c.str[1] == 32 {
return 0
}
return 1
}
// Type returns the multicodec-packed content type of a Cid.
func (c Cid) Type() uint64 {
if c.Version() == 0 {
return DagProtobuf
}
_, n, _ := uvarint(c.str)
codec, _, _ := uvarint(c.str[n:])
return codec
}
// String returns the default string representation of a
// Cid. Currently, Base32 is used for CIDV1 as the encoding for the
// multibase string, Base58 is used for CIDV0.
func (c Cid) String() string {
switch c.Version() {
case 0:
return c.Hash().B58String()
case 1:
mbstr, err := mbase.Encode(mbase.Base32, c.Bytes())
if err != nil {
panic("should not error with hardcoded mbase: " + err.Error())
}
return mbstr
default:
panic("not possible to reach this point")
}
}
// String returns the string representation of a Cid
// encoded is selected base
func (c Cid) StringOfBase(base mbase.Encoding) (string, error) {
switch c.Version() {
case 0:
if base != mbase.Base58BTC {
return "", ErrInvalidEncoding
}
return c.Hash().B58String(), nil
case 1:
return mbase.Encode(base, c.Bytes())
default:
panic("not possible to reach this point")
}
}
// Encode return the string representation of a Cid in a given base
// when applicable. Version 0 Cid's are always in Base58 as they do
// not take a multibase prefix.
func (c Cid) Encode(base mbase.Encoder) string {
switch c.Version() {
case 0:
return c.Hash().B58String()
case 1:
return base.Encode(c.Bytes())
default:
panic("not possible to reach this point")
}
}
// Hash returns the multihash contained by a Cid.
func (c Cid) Hash() mh.Multihash {
bytes := c.Bytes()
if c.Version() == 0 {
return mh.Multihash(bytes)
}
// skip version length
_, n1, _ := varint.FromUvarint(bytes)
// skip codec length
_, n2, _ := varint.FromUvarint(bytes[n1:])
return mh.Multihash(bytes[n1+n2:])
}
// Bytes returns the byte representation of a Cid.
// The output of bytes can be parsed back into a Cid
// with Cast().
//
// If c.Defined() == false, it return a nil slice and may not
// be parsable with Cast().
func (c Cid) Bytes() []byte {
if !c.Defined() {
return nil
}
return []byte(c.str)
}
// ByteLen returns the length of the CID in bytes.
// It's equivalent to `len(c.Bytes())`, but works without an allocation,
// and should therefore be preferred.
//
// (See also the WriteTo method for other important operations that work without allocation.)
func (c Cid) ByteLen() int {
return len(c.str)
}
// WriteBytes writes the CID bytes to the given writer.
// This method works without incurring any allocation.
//
// (See also the ByteLen method for other important operations that work without allocation.)
func (c Cid) WriteBytes(w io.Writer) (int, error) {
n, err := io.WriteString(w, c.str)
if err != nil {
return n, err
}
if n != len(c.str) {
return n, fmt.Errorf("failed to write entire cid string")
}
return n, nil
}
// MarshalBinary is equivalent to Bytes(). It implements the
// encoding.BinaryMarshaler interface.
func (c Cid) MarshalBinary() ([]byte, error) {
return c.Bytes(), nil
}
// MarshalText is equivalent to String(). It implements the
// encoding.TextMarshaler interface.
func (c Cid) MarshalText() ([]byte, error) {
return []byte(c.String()), nil
}
// Equals checks that two Cids are the same.
// In order for two Cids to be considered equal, the
// Version, the Codec and the Multihash must match.
func (c Cid) Equals(o Cid) bool {
return c == o
}
// UnmarshalJSON parses the JSON representation of a Cid.
func (c *Cid) UnmarshalJSON(b []byte) error {
if len(b) < 2 {
return ErrInvalidCid{fmt.Errorf("invalid cid json blob")}
}
obj := struct {
CidTarget string `json:"/"`
}{}
objptr := &obj
err := json.Unmarshal(b, &objptr)
if err != nil {
return ErrInvalidCid{err}
}
if objptr == nil {
*c = Cid{}
return nil
}
if obj.CidTarget == "" {
return ErrInvalidCid{fmt.Errorf("cid was incorrectly formatted")}
}
out, err := Decode(obj.CidTarget)
if err != nil {
return ErrInvalidCid{err}
}
*c = out
return nil
}
// MarshalJSON procudes a JSON representation of a Cid, which looks as follows:
//
// { "/": "<cid-string>" }
//
// Note that this formatting comes from the IPLD specification
// (https://github.com/ipld/specs/tree/master/ipld)
func (c Cid) MarshalJSON() ([]byte, error) {
if !c.Defined() {
return []byte("null"), nil
}
return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil
}
// KeyString returns the binary representation of the Cid as a string
func (c Cid) KeyString() string {
return c.str
}
// Loggable returns a Loggable (as defined by
// https://godoc.org/github.com/ipfs/go-log).
func (c Cid) Loggable() map[string]interface{} {
return map[string]interface{}{
"cid": c,
}
}
// Prefix builds and returns a Prefix out of a Cid.
func (c Cid) Prefix() Prefix {
if c.Version() == 0 {
return Prefix{
MhType: mh.SHA2_256,
MhLength: 32,
Version: 0,
Codec: DagProtobuf,
}
}
offset := 0
version, n, _ := uvarint(c.str[offset:])
offset += n
codec, n, _ := uvarint(c.str[offset:])
offset += n
mhtype, n, _ := uvarint(c.str[offset:])
offset += n
mhlen, _, _ := uvarint(c.str[offset:])
return Prefix{
MhType: mhtype,
MhLength: int(mhlen),
Version: version,
Codec: codec,
}
}
// Prefix represents all the metadata of a Cid,
// that is, the Version, the Codec, the Multihash type
// and the Multihash length. It does not contains
// any actual content information.
// NOTE: The use -1 in MhLength to mean default length is deprecated,
//
// use the V0Builder or V1Builder structures instead
type Prefix struct {
Version uint64
Codec uint64
MhType uint64
MhLength int
}
// Sum uses the information in a prefix to perform a multihash.Sum()
// and return a newly constructed Cid with the resulting multihash.
func (p Prefix) Sum(data []byte) (Cid, error) {
length := p.MhLength
if p.MhType == mh.IDENTITY {
length = -1
}
if p.Version == 0 && (p.MhType != mh.SHA2_256 ||
(p.MhLength != 32 && p.MhLength != -1)) {
return Undef, ErrInvalidCid{fmt.Errorf("invalid v0 prefix")}
}
hash, err := mh.Sum(data, p.MhType, length)
if err != nil {
return Undef, ErrInvalidCid{err}
}
switch p.Version {
case 0:
return NewCidV0(hash), nil
case 1:
return NewCidV1(p.Codec, hash), nil
default:
return Undef, ErrInvalidCid{fmt.Errorf("invalid cid version")}
}
}
// Bytes returns a byte representation of a Prefix. It looks like:
//
// <version><codec><mh-type><mh-length>
func (p Prefix) Bytes() []byte {
size := varint.UvarintSize(p.Version)
size += varint.UvarintSize(p.Codec)
size += varint.UvarintSize(p.MhType)
size += varint.UvarintSize(uint64(p.MhLength))
buf := make([]byte, size)
n := varint.PutUvarint(buf, p.Version)
n += varint.PutUvarint(buf[n:], p.Codec)
n += varint.PutUvarint(buf[n:], p.MhType)
n += varint.PutUvarint(buf[n:], uint64(p.MhLength))
if n != size {
panic("size mismatch")
}
return buf
}
// PrefixFromBytes parses a Prefix-byte representation onto a
// Prefix.
func PrefixFromBytes(buf []byte) (Prefix, error) {
r := bytes.NewReader(buf)
vers, err := varint.ReadUvarint(r)
if err != nil {
return Prefix{}, ErrInvalidCid{err}
}
codec, err := varint.ReadUvarint(r)
if err != nil {
return Prefix{}, ErrInvalidCid{err}
}
mhtype, err := varint.ReadUvarint(r)
if err != nil {
return Prefix{}, ErrInvalidCid{err}
}
mhlen, err := varint.ReadUvarint(r)
if err != nil {
return Prefix{}, ErrInvalidCid{err}
}
return Prefix{
Version: vers,
Codec: codec,
MhType: mhtype,
MhLength: int(mhlen),
}, nil
}
func CidFromBytes(data []byte) (int, Cid, error) {
if len(data) > 2 && data[0] == mh.SHA2_256 && data[1] == 32 {
if len(data) < 34 {
return 0, Undef, ErrInvalidCid{fmt.Errorf("not enough bytes for cid v0")}
}
h, err := mh.Cast(data[:34])
if err != nil {
return 0, Undef, ErrInvalidCid{err}
}
return 34, Cid{string(h)}, nil
}
vers, n, err := varint.FromUvarint(data)
if err != nil {
return 0, Undef, ErrInvalidCid{err}
}
if vers != 1 {
return 0, Undef, ErrInvalidCid{fmt.Errorf("expected 1 as the cid version number, got: %d", vers)}
}
_, cn, err := varint.FromUvarint(data[n:])
if err != nil {
return 0, Undef, ErrInvalidCid{err}
}
mhnr, _, err := mh.MHFromBytes(data[n+cn:])
if err != nil {
return 0, Undef, ErrInvalidCid{err}
}
l := n + cn + mhnr
return l, Cid{string(data[0:l])}, nil
}
func toBufByteReader(r io.Reader, dst []byte) *bufByteReader {
// If the reader already implements ByteReader, use it directly.
// Otherwise, use a fallback that does 1-byte Reads.
if br, ok := r.(io.ByteReader); ok {
return &bufByteReader{direct: br, dst: dst}
}
return &bufByteReader{fallback: r, dst: dst}
}
type bufByteReader struct {
direct io.ByteReader
fallback io.Reader
dst []byte
}
func (r *bufByteReader) ReadByte() (byte, error) {
// The underlying reader has ReadByte; use it.
if br := r.direct; br != nil {
b, err := br.ReadByte()
if err != nil {
return 0, err
}
r.dst = append(r.dst, b)
return b, nil
}
// Fall back to a one-byte Read.
// TODO: consider reading straight into dst,
// once we have benchmarks and if they prove that to be faster.
var p [1]byte
if _, err := io.ReadFull(r.fallback, p[:]); err != nil {
return 0, err
}
r.dst = append(r.dst, p[0])
return p[0], nil
}
// CidFromReader reads a precise number of bytes for a CID from a given reader.
// It returns the number of bytes read, the CID, and any error encountered.
// The number of bytes read is accurate even if a non-nil error is returned.
//
// It's recommended to supply a reader that buffers and implements io.ByteReader,
// as CidFromReader has to do many single-byte reads to decode varints.
// If the argument only implements io.Reader, single-byte Read calls are used instead.
//
// If the Reader is found to yield zero bytes, an io.EOF error is returned directly, in all
// other error cases, an ErrInvalidCid, wrapping the original error, is returned.
func CidFromReader(r io.Reader) (int, Cid, error) {
// 64 bytes is enough for any CIDv0,
// and it's enough for most CIDv1s in practice.
// If the digest is too long, we'll allocate more.
br := toBufByteReader(r, make([]byte, 0, 64))
// We read the first varint, to tell if this is a CIDv0 or a CIDv1.
// The varint package wants a io.ByteReader, so we must wrap our io.Reader.
vers, err := varint.ReadUvarint(br)
if err != nil {
if err == io.EOF {
// First-byte read in ReadUvarint errors with io.EOF, so reader has no data.
// Subsequent reads with an EOF will return io.ErrUnexpectedEOF and be wrapped here.
return 0, Undef, err
}
return len(br.dst), Undef, ErrInvalidCid{err}
}
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
if vers == mh.SHA2_256 {
if n, err := io.ReadFull(r, br.dst[1:34]); err != nil {
return len(br.dst) + n, Undef, ErrInvalidCid{err}
}
br.dst = br.dst[:34]
h, err := mh.Cast(br.dst)
if err != nil {
return len(br.dst), Undef, ErrInvalidCid{err}
}
return len(br.dst), Cid{string(h)}, nil
}
if vers != 1 {
return len(br.dst), Undef, ErrInvalidCid{fmt.Errorf("expected 1 as the cid version number, got: %d", vers)}
}
// CID block encoding multicodec.
_, err = varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, ErrInvalidCid{err}
}
// We could replace most of the code below with go-multihash's ReadMultihash.
// Note that it would save code, but prevent reusing buffers.
// Plus, we already have a ByteReader now.
mhStart := len(br.dst)
// Multihash hash function code.
_, err = varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, ErrInvalidCid{err}
}
// Multihash digest length.
mhl, err := varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, ErrInvalidCid{err}
}
// Refuse to make large allocations to prevent OOMs due to bugs.
const maxDigestAlloc = 32 << 20 // 32MiB
if mhl > maxDigestAlloc {
return len(br.dst), Undef, ErrInvalidCid{fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)}
}
// Fine to convert mhl to int, given maxDigestAlloc.
prefixLength := len(br.dst)
cidLength := prefixLength + int(mhl)
if cidLength > cap(br.dst) {
// If the multihash digest doesn't fit in our initial 64 bytes,
// efficiently extend the slice via append+make.
br.dst = append(br.dst, make([]byte, cidLength-len(br.dst))...)
} else {
// The multihash digest fits inside our buffer,
// so just extend its capacity.
br.dst = br.dst[:cidLength]
}
if n, err := io.ReadFull(r, br.dst[prefixLength:cidLength]); err != nil {
// We can't use len(br.dst) here,
// as we've only read n bytes past prefixLength.
return prefixLength + n, Undef, ErrInvalidCid{err}
}
// This simply ensures the multihash is valid.
// TODO: consider removing this bit, as it's probably redundant;
// for now, it helps ensure consistency with CidFromBytes.
_, _, err = mh.MHFromBytes(br.dst[mhStart:])
if err != nil {
return len(br.dst), Undef, ErrInvalidCid{err}
}
return len(br.dst), Cid{string(br.dst)}, nil
}