2018-07-04 13:51:47 +03:00
// Package multihash is the Go implementation of
// https://github.com/multiformats/multihash, or self-describing
// hashes.
package multihash
import (
"encoding/hex"
"errors"
"fmt"
"math"
b58 "github.com/mr-tron/base58/base58"
2021-06-16 16:19:45 -04:00
"github.com/multiformats/go-varint"
2018-07-04 13:51:47 +03:00
)
// errors
var (
ErrUnknownCode = errors . New ( "unknown multihash code" )
ErrTooShort = errors . New ( "multihash too short. must be >= 2 bytes" )
ErrTooLong = errors . New ( "multihash too long. must be < 129 bytes" )
ErrLenNotSupported = errors . New ( "multihash does not yet support digests longer than 127 bytes" )
ErrInvalidMultihash = errors . New ( "input isn't valid multihash" )
ErrVarintBufferShort = errors . New ( "uvarint: buffer too small" )
ErrVarintTooLong = errors . New ( "uvarint: varint too big (max 64bit)" )
)
// ErrInconsistentLen is returned when a decoded multihash has an inconsistent length
type ErrInconsistentLen struct {
2023-06-30 09:41:32 -04:00
dm DecodedMultihash
2022-11-04 09:57:20 -04:00
lengthFound int
2018-07-04 13:51:47 +03:00
}
func ( e ErrInconsistentLen ) Error ( ) string {
2022-11-04 09:57:20 -04:00
return fmt . Sprintf ( "multihash length inconsistent: expected %d; got %d" , e . dm . Length , e . lengthFound )
2018-07-04 13:51:47 +03:00
}
// constants
const (
2019-10-04 17:21:24 +02:00
IDENTITY = 0x00
// Deprecated: use IDENTITY
ID = IDENTITY
2018-07-04 13:51:47 +03:00
SHA1 = 0x11
SHA2_256 = 0x12
SHA2_512 = 0x13
SHA3_224 = 0x17
SHA3_256 = 0x16
SHA3_384 = 0x15
SHA3_512 = 0x14
SHA3 = SHA3_512
KECCAK_224 = 0x1A
KECCAK_256 = 0x1B
KECCAK_384 = 0x1C
KECCAK_512 = 0x1D
2022-08-19 12:34:07 -04:00
BLAKE3 = 0x1E
2018-07-04 13:51:47 +03:00
SHAKE_128 = 0x18
SHAKE_256 = 0x19
BLAKE2B_MIN = 0xb201
BLAKE2B_MAX = 0xb240
BLAKE2S_MIN = 0xb241
BLAKE2S_MAX = 0xb260
2019-06-09 09:24:20 +02:00
MD5 = 0xd5
2018-07-04 13:51:47 +03:00
DBL_SHA2_256 = 0x56
2022-08-19 12:34:07 -04:00
MURMUR3X64_64 = 0x22
// Deprecated: use MURMUR3X64_64
MURMUR3 = MURMUR3X64_64
2019-06-09 09:24:20 +02:00
2021-06-16 16:19:45 -04:00
SHA2_256_TRUNC254_PADDED = 0x1012
X11 = 0x1100
POSEIDON_BLS12_381_A1_FC1 = 0xb401
2018-07-04 13:51:47 +03:00
)
func init ( ) {
// Add blake2b (64 codes)
for c := uint64 ( BLAKE2B_MIN ) ; c <= BLAKE2B_MAX ; c ++ {
n := c - BLAKE2B_MIN + 1
name := fmt . Sprintf ( "blake2b-%d" , n * 8 )
Names [ name ] = c
Codes [ c ] = name
}
// Add blake2s (32 codes)
for c := uint64 ( BLAKE2S_MIN ) ; c <= BLAKE2S_MAX ; c ++ {
n := c - BLAKE2S_MIN + 1
name := fmt . Sprintf ( "blake2s-%d" , n * 8 )
Names [ name ] = c
Codes [ c ] = name
}
}
// Names maps the name of a hash to the code
var Names = map [ string ] uint64 {
2021-06-16 16:19:45 -04:00
"identity" : IDENTITY ,
"sha1" : SHA1 ,
"sha2-256" : SHA2_256 ,
"sha2-512" : SHA2_512 ,
"sha3" : SHA3_512 ,
"sha3-224" : SHA3_224 ,
"sha3-256" : SHA3_256 ,
"sha3-384" : SHA3_384 ,
"sha3-512" : SHA3_512 ,
"dbl-sha2-256" : DBL_SHA2_256 ,
2022-08-19 12:34:07 -04:00
"murmur3-x64-64" : MURMUR3X64_64 ,
2021-06-16 16:19:45 -04:00
"keccak-224" : KECCAK_224 ,
"keccak-256" : KECCAK_256 ,
"keccak-384" : KECCAK_384 ,
"keccak-512" : KECCAK_512 ,
2022-08-19 12:34:07 -04:00
"blake3" : BLAKE3 ,
2021-06-16 16:19:45 -04:00
"shake-128" : SHAKE_128 ,
"shake-256" : SHAKE_256 ,
"sha2-256-trunc254-padded" : SHA2_256_TRUNC254_PADDED ,
"x11" : X11 ,
"md5" : MD5 ,
"poseidon-bls12_381-a2-fc1" : POSEIDON_BLS12_381_A1_FC1 ,
2018-07-04 13:51:47 +03:00
}
// Codes maps a hash code to it's name
var Codes = map [ uint64 ] string {
2021-06-16 16:19:45 -04:00
IDENTITY : "identity" ,
SHA1 : "sha1" ,
SHA2_256 : "sha2-256" ,
SHA2_512 : "sha2-512" ,
SHA3_224 : "sha3-224" ,
SHA3_256 : "sha3-256" ,
SHA3_384 : "sha3-384" ,
SHA3_512 : "sha3-512" ,
DBL_SHA2_256 : "dbl-sha2-256" ,
2022-08-19 12:34:07 -04:00
MURMUR3X64_64 : "murmur3-x64-64" ,
2021-06-16 16:19:45 -04:00
KECCAK_224 : "keccak-224" ,
KECCAK_256 : "keccak-256" ,
KECCAK_384 : "keccak-384" ,
KECCAK_512 : "keccak-512" ,
2022-08-19 12:34:07 -04:00
BLAKE3 : "blake3" ,
2021-06-16 16:19:45 -04:00
SHAKE_128 : "shake-128" ,
SHAKE_256 : "shake-256" ,
SHA2_256_TRUNC254_PADDED : "sha2-256-trunc254-padded" ,
X11 : "x11" ,
POSEIDON_BLS12_381_A1_FC1 : "poseidon-bls12_381-a2-fc1" ,
MD5 : "md5" ,
2018-07-04 13:51:47 +03:00
}
2022-11-04 09:57:20 -04:00
// reads a varint from buf and returns bytes read.
2018-07-04 13:51:47 +03:00
func uvarint ( buf [ ] byte ) ( uint64 , [ ] byte , error ) {
2021-06-16 16:19:45 -04:00
n , c , err := varint . FromUvarint ( buf )
if err != nil {
return n , buf , err
}
2018-07-04 13:51:47 +03:00
if c == 0 {
return n , buf , ErrVarintBufferShort
} else if c < 0 {
return n , buf [ - c : ] , ErrVarintTooLong
} else {
return n , buf [ c : ] , nil
}
}
// DecodedMultihash represents a parsed multihash and allows
// easy access to the different parts of a multihash.
type DecodedMultihash struct {
Code uint64
Name string
Length int // Length is just int as it is type of len() opearator
Digest [ ] byte // Digest holds the raw multihash bytes
}
// Multihash is byte slice with the following form:
// <hash function code><digest size><hash function output>.
// See the spec for more information.
type Multihash [ ] byte
// HexString returns the hex-encoded representation of a multihash.
2022-08-19 12:34:07 -04:00
func ( m Multihash ) HexString ( ) string {
return hex . EncodeToString ( [ ] byte ( m ) )
2018-07-04 13:51:47 +03:00
}
// String is an alias to HexString().
2022-08-19 12:34:07 -04:00
func ( m Multihash ) String ( ) string {
2018-07-04 13:51:47 +03:00
return m . HexString ( )
}
// FromHexString parses a hex-encoded multihash.
func FromHexString ( s string ) ( Multihash , error ) {
b , err := hex . DecodeString ( s )
if err != nil {
return Multihash { } , err
}
return Cast ( b )
}
// B58String returns the B58-encoded representation of a multihash.
func ( m Multihash ) B58String ( ) string {
return b58 . Encode ( [ ] byte ( m ) )
}
// FromB58String parses a B58-encoded multihash.
func FromB58String ( s string ) ( m Multihash , err error ) {
b , err := b58 . Decode ( s )
if err != nil {
return Multihash { } , ErrInvalidMultihash
}
return Cast ( b )
}
// Cast casts a buffer onto a multihash, and returns an error
// if it does not work.
func Cast ( buf [ ] byte ) ( Multihash , error ) {
2021-06-28 08:53:50 +02:00
_ , err := Decode ( buf )
2018-07-04 13:51:47 +03:00
if err != nil {
return Multihash { } , err
}
return Multihash ( buf ) , nil
}
// Decode parses multihash bytes into a DecodedMultihash.
func Decode ( buf [ ] byte ) ( * DecodedMultihash , error ) {
2023-06-30 09:41:32 -04:00
// outline decode allowing the &dm expression to be inlined into the caller.
// This moves the heap allocation into the caller and if the caller doesn't
// leak dm the compiler will use a stack allocation instead.
// If you do not outline this &dm always heap allocate since the pointer is
// returned which cause a heap allocation because Decode's stack frame is
// about to disapear.
dm , err := decode ( buf )
2018-07-04 13:51:47 +03:00
if err != nil {
return nil , err
}
2023-06-30 09:41:32 -04:00
return & dm , nil
}
func decode ( buf [ ] byte ) ( dm DecodedMultihash , err error ) {
rlen , code , hdig , err := readMultihashFromBuf ( buf )
if err != nil {
return DecodedMultihash { } , err
}
2018-07-04 13:51:47 +03:00
2023-06-30 09:41:32 -04:00
dm = DecodedMultihash {
2018-07-04 13:51:47 +03:00
Code : code ,
Name : Codes [ code ] ,
2021-06-16 16:19:45 -04:00
Length : len ( hdig ) ,
Digest : hdig ,
2018-07-04 13:51:47 +03:00
}
2021-06-16 16:19:45 -04:00
if len ( buf ) != rlen {
2023-06-30 09:41:32 -04:00
return dm , ErrInconsistentLen { dm , rlen }
2018-07-04 13:51:47 +03:00
}
return dm , nil
}
// Encode a hash digest along with the specified function code.
// Note: the length is derived from the length of the digest itself.
2021-06-28 08:53:50 +02:00
//
// The error return is legacy; it is always nil.
2018-07-04 13:51:47 +03:00
func Encode ( buf [ ] byte , code uint64 ) ( [ ] byte , error ) {
2021-06-28 08:53:50 +02:00
// FUTURE: this function always causes heap allocs... but when used, this value is almost always going to be appended to another buffer (either as part of CID creation, or etc) -- should this whole function be rethought and alternatives offered?
2021-06-16 16:19:45 -04:00
newBuf := make ( [ ] byte , varint . UvarintSize ( code ) + varint . UvarintSize ( uint64 ( len ( buf ) ) ) + len ( buf ) )
n := varint . PutUvarint ( newBuf , code )
n += varint . PutUvarint ( newBuf [ n : ] , uint64 ( len ( buf ) ) )
2018-07-04 13:51:47 +03:00
2021-06-16 16:19:45 -04:00
copy ( newBuf [ n : ] , buf )
return newBuf , nil
2018-07-04 13:51:47 +03:00
}
// EncodeName is like Encode() but providing a string name
// instead of a numeric code. See Names for allowed values.
func EncodeName ( buf [ ] byte , name string ) ( [ ] byte , error ) {
return Encode ( buf , Names [ name ] )
}
2021-06-16 16:19:45 -04:00
// readMultihashFromBuf reads a multihash from the given buffer, returning the
// individual pieces of the multihash.
// Note: the returned digest is a slice over the passed in data and should be
// copied if the buffer will be reused
func readMultihashFromBuf ( buf [ ] byte ) ( int , uint64 , [ ] byte , error ) {
2022-11-04 09:57:20 -04:00
initBufLength := len ( buf )
if initBufLength < 2 {
2021-06-16 16:19:45 -04:00
return 0 , 0 , nil , ErrTooShort
}
var err error
var code , length uint64
code , buf , err = uvarint ( buf )
if err != nil {
return 0 , 0 , nil , err
}
length , buf , err = uvarint ( buf )
if err != nil {
return 0 , 0 , nil , err
}
if length > math . MaxInt32 {
return 0 , 0 , nil , errors . New ( "digest too long, supporting only <= 2^31-1" )
}
if int ( length ) > len ( buf ) {
return 0 , 0 , nil , errors . New ( "length greater than remaining number of bytes in buffer" )
}
2022-11-04 09:57:20 -04:00
// rlen is the advertised size of the CID
rlen := ( initBufLength - len ( buf ) ) + int ( length )
2021-06-16 16:19:45 -04:00
return rlen , code , buf [ : length ] , nil
}
// MHFromBytes reads a multihash from the given byte buffer, returning the
// number of bytes read as well as the multihash
func MHFromBytes ( buf [ ] byte ) ( int , Multihash , error ) {
nr , _ , _ , err := readMultihashFromBuf ( buf )
if err != nil {
return 0 , nil , err
}
return nr , Multihash ( buf [ : nr ] ) , nil
}