// Package multihash is the Go implementation of // https://github.com/multiformats/multihash, or self-describing // hashes. package multihash import ( "encoding/hex" "errors" "fmt" "math" b58 "github.com/mr-tron/base58/base58" "github.com/multiformats/go-varint" ) // errors var ( ErrUnknownCode = errors.New("unknown multihash code") ErrTooShort = errors.New("multihash too short. must be >= 2 bytes") ErrTooLong = errors.New("multihash too long. must be < 129 bytes") ErrLenNotSupported = errors.New("multihash does not yet support digests longer than 127 bytes") ErrInvalidMultihash = errors.New("input isn't valid multihash") ErrVarintBufferShort = errors.New("uvarint: buffer too small") ErrVarintTooLong = errors.New("uvarint: varint too big (max 64bit)") ) // ErrInconsistentLen is returned when a decoded multihash has an inconsistent length type ErrInconsistentLen struct { dm DecodedMultihash lengthFound int } func (e ErrInconsistentLen) Error() string { return fmt.Sprintf("multihash length inconsistent: expected %d; got %d", e.dm.Length, e.lengthFound) } // constants const ( IDENTITY = 0x00 // Deprecated: use IDENTITY ID = IDENTITY SHA1 = 0x11 SHA2_256 = 0x12 SHA2_512 = 0x13 SHA3_224 = 0x17 SHA3_256 = 0x16 SHA3_384 = 0x15 SHA3_512 = 0x14 SHA3 = SHA3_512 KECCAK_224 = 0x1A KECCAK_256 = 0x1B KECCAK_384 = 0x1C KECCAK_512 = 0x1D BLAKE3 = 0x1E SHAKE_128 = 0x18 SHAKE_256 = 0x19 BLAKE2B_MIN = 0xb201 BLAKE2B_MAX = 0xb240 BLAKE2S_MIN = 0xb241 BLAKE2S_MAX = 0xb260 MD5 = 0xd5 DBL_SHA2_256 = 0x56 MURMUR3X64_64 = 0x22 // Deprecated: use MURMUR3X64_64 MURMUR3 = MURMUR3X64_64 SHA2_256_TRUNC254_PADDED = 0x1012 X11 = 0x1100 POSEIDON_BLS12_381_A1_FC1 = 0xb401 ) func init() { // Add blake2b (64 codes) for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ { n := c - BLAKE2B_MIN + 1 name := fmt.Sprintf("blake2b-%d", n*8) Names[name] = c Codes[c] = name } // Add blake2s (32 codes) for c := uint64(BLAKE2S_MIN); c <= BLAKE2S_MAX; c++ { n := c - BLAKE2S_MIN + 1 name := fmt.Sprintf("blake2s-%d", n*8) Names[name] = c Codes[c] = name } } // Names maps the name of a hash to the code var Names = map[string]uint64{ "identity": IDENTITY, "sha1": SHA1, "sha2-256": SHA2_256, "sha2-512": SHA2_512, "sha3": SHA3_512, "sha3-224": SHA3_224, "sha3-256": SHA3_256, "sha3-384": SHA3_384, "sha3-512": SHA3_512, "dbl-sha2-256": DBL_SHA2_256, "murmur3-x64-64": MURMUR3X64_64, "keccak-224": KECCAK_224, "keccak-256": KECCAK_256, "keccak-384": KECCAK_384, "keccak-512": KECCAK_512, "blake3": BLAKE3, "shake-128": SHAKE_128, "shake-256": SHAKE_256, "sha2-256-trunc254-padded": SHA2_256_TRUNC254_PADDED, "x11": X11, "md5": MD5, "poseidon-bls12_381-a2-fc1": POSEIDON_BLS12_381_A1_FC1, } // Codes maps a hash code to it's name var Codes = map[uint64]string{ IDENTITY: "identity", SHA1: "sha1", SHA2_256: "sha2-256", SHA2_512: "sha2-512", SHA3_224: "sha3-224", SHA3_256: "sha3-256", SHA3_384: "sha3-384", SHA3_512: "sha3-512", DBL_SHA2_256: "dbl-sha2-256", MURMUR3X64_64: "murmur3-x64-64", KECCAK_224: "keccak-224", KECCAK_256: "keccak-256", KECCAK_384: "keccak-384", KECCAK_512: "keccak-512", BLAKE3: "blake3", SHAKE_128: "shake-128", SHAKE_256: "shake-256", SHA2_256_TRUNC254_PADDED: "sha2-256-trunc254-padded", X11: "x11", POSEIDON_BLS12_381_A1_FC1: "poseidon-bls12_381-a2-fc1", MD5: "md5", } // reads a varint from buf and returns bytes read. func uvarint(buf []byte) (uint64, []byte, error) { n, c, err := varint.FromUvarint(buf) if err != nil { return n, buf, err } if c == 0 { return n, buf, ErrVarintBufferShort } else if c < 0 { return n, buf[-c:], ErrVarintTooLong } else { return n, buf[c:], nil } } // DecodedMultihash represents a parsed multihash and allows // easy access to the different parts of a multihash. type DecodedMultihash struct { Code uint64 Name string Length int // Length is just int as it is type of len() opearator Digest []byte // Digest holds the raw multihash bytes } // Multihash is byte slice with the following form: // . // See the spec for more information. type Multihash []byte // HexString returns the hex-encoded representation of a multihash. func (m Multihash) HexString() string { return hex.EncodeToString([]byte(m)) } // String is an alias to HexString(). func (m Multihash) String() string { return m.HexString() } // FromHexString parses a hex-encoded multihash. func FromHexString(s string) (Multihash, error) { b, err := hex.DecodeString(s) if err != nil { return Multihash{}, err } return Cast(b) } // B58String returns the B58-encoded representation of a multihash. func (m Multihash) B58String() string { return b58.Encode([]byte(m)) } // FromB58String parses a B58-encoded multihash. func FromB58String(s string) (m Multihash, err error) { b, err := b58.Decode(s) if err != nil { return Multihash{}, ErrInvalidMultihash } return Cast(b) } // Cast casts a buffer onto a multihash, and returns an error // if it does not work. func Cast(buf []byte) (Multihash, error) { _, err := Decode(buf) if err != nil { return Multihash{}, err } return Multihash(buf), nil } // Decode parses multihash bytes into a DecodedMultihash. func Decode(buf []byte) (*DecodedMultihash, error) { // outline decode allowing the &dm expression to be inlined into the caller. // This moves the heap allocation into the caller and if the caller doesn't // leak dm the compiler will use a stack allocation instead. // If you do not outline this &dm always heap allocate since the pointer is // returned which cause a heap allocation because Decode's stack frame is // about to disapear. dm, err := decode(buf) if err != nil { return nil, err } return &dm, nil } func decode(buf []byte) (dm DecodedMultihash, err error) { rlen, code, hdig, err := readMultihashFromBuf(buf) if err != nil { return DecodedMultihash{}, err } dm = DecodedMultihash{ Code: code, Name: Codes[code], Length: len(hdig), Digest: hdig, } if len(buf) != rlen { return dm, ErrInconsistentLen{dm, rlen} } return dm, nil } // Encode a hash digest along with the specified function code. // Note: the length is derived from the length of the digest itself. // // The error return is legacy; it is always nil. func Encode(buf []byte, code uint64) ([]byte, error) { // FUTURE: this function always causes heap allocs... but when used, this value is almost always going to be appended to another buffer (either as part of CID creation, or etc) -- should this whole function be rethought and alternatives offered? newBuf := make([]byte, varint.UvarintSize(code)+varint.UvarintSize(uint64(len(buf)))+len(buf)) n := varint.PutUvarint(newBuf, code) n += varint.PutUvarint(newBuf[n:], uint64(len(buf))) copy(newBuf[n:], buf) return newBuf, nil } // EncodeName is like Encode() but providing a string name // instead of a numeric code. See Names for allowed values. func EncodeName(buf []byte, name string) ([]byte, error) { return Encode(buf, Names[name]) } // readMultihashFromBuf reads a multihash from the given buffer, returning the // individual pieces of the multihash. // Note: the returned digest is a slice over the passed in data and should be // copied if the buffer will be reused func readMultihashFromBuf(buf []byte) (int, uint64, []byte, error) { initBufLength := len(buf) if initBufLength < 2 { return 0, 0, nil, ErrTooShort } var err error var code, length uint64 code, buf, err = uvarint(buf) if err != nil { return 0, 0, nil, err } length, buf, err = uvarint(buf) if err != nil { return 0, 0, nil, err } if length > math.MaxInt32 { return 0, 0, nil, errors.New("digest too long, supporting only <= 2^31-1") } if int(length) > len(buf) { return 0, 0, nil, errors.New("length greater than remaining number of bytes in buffer") } // rlen is the advertised size of the CID rlen := (initBufLength - len(buf)) + int(length) return rlen, code, buf[:length], nil } // MHFromBytes reads a multihash from the given byte buffer, returning the // number of bytes read as well as the multihash func MHFromBytes(buf []byte) (int, Multihash, error) { nr, _, _, err := readMultihashFromBuf(buf) if err != nil { return 0, nil, err } return nr, Multihash(buf[:nr]), nil }