diff --git a/go.mod b/go.mod index 2ab5451a5..d6d393907 100644 --- a/go.mod +++ b/go.mod @@ -81,6 +81,7 @@ require ( github.com/Masterminds/squirrel v1.5.4 github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5 github.com/andybalholm/brotli v1.0.5 + github.com/bits-and-blooms/bloom/v3 v3.7.0 github.com/cenkalti/backoff/v4 v4.2.1 github.com/gorilla/sessions v1.2.1 github.com/ipfs/go-log/v2 v2.5.1 @@ -131,7 +132,7 @@ require ( github.com/benbjohnson/clock v1.3.5 // indirect github.com/benbjohnson/immutable v0.3.0 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/bits-and-blooms/bitset v1.2.0 // indirect + github.com/bits-and-blooms/bitset v1.13.0 // indirect github.com/bradfitz/iter v0.0.0-20191230175014-e8f45d346db8 // indirect github.com/btcsuite/btcd v0.22.1 // indirect github.com/btcsuite/btcd/btcec/v2 v2.3.2 // indirect diff --git a/go.sum b/go.sum index b31902829..43e769cea 100644 --- a/go.sum +++ b/go.sum @@ -411,8 +411,12 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= -github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJRUA0wFAVE= +github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bloom/v3 v3.7.0 h1:VfknkqV4xI+PsaDIsoHueyxVDZrfvMn56jeWUzvzdls= +github.com/bits-and-blooms/bloom/v3 v3.7.0/go.mod h1:VKlUSvp0lFIYqxJjzdnSsZEw4iHb1kOL2tfHTgyJBHg= github.com/bkaradzic/go-lz4 v1.0.0/go.mod h1:0YdlkowM3VswSROI7qDxhRvJ3sLhlFrRRwjwegp5jy4= github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= @@ -2109,6 +2113,8 @@ github.com/tsenart/tb v0.0.0-20181025101425-0d2499c8b6e9 h1:kjbwitOGH46vD01f2s3l github.com/tsenart/tb v0.0.0-20181025101425-0d2499c8b6e9/go.mod h1:EcGP24b8DY+bWHnpfJDP7fM+o8Nmz4fYH0l2xTtNr3I= github.com/ttacon/chalk v0.0.0-20160626202418-22c06c80ed31/go.mod h1:onvgF043R+lC5RZ8IT9rBXDaEDnpnw/Cl+HFiw+v/7Q= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM= +github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= +github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/tyler-smith/go-bip39 v1.0.1-0.20181017060643-dbb3b84ba2ef/go.mod h1:sJ5fKU0s6JVwZjjcUEX2zFOnvq0ASQ2K9Zr6cf67kNs= github.com/tyler-smith/go-bip39 v1.1.0 h1:5eUemwrMargf3BSLRRCalXT93Ns6pQJIjYQN2nyfOP8= github.com/tyler-smith/go-bip39 v1.1.0/go.mod h1:gUYDtqQw1JS3ZJ8UWVcGTGqqr6YIN3CWg+kkNaLt55U= diff --git a/vendor/github.com/bits-and-blooms/bitset/README.md b/vendor/github.com/bits-and-blooms/bitset/README.md index 97e83071e..fe7bca65e 100644 --- a/vendor/github.com/bits-and-blooms/bitset/README.md +++ b/vendor/github.com/bits-and-blooms/bitset/README.md @@ -7,6 +7,15 @@ [![PkgGoDev](https://pkg.go.dev/badge/github.com/bits-and-blooms/bitset?tab=doc)](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc) +This library is part of the [awesome go collection](https://github.com/avelino/awesome-go). It is used in production by several important systems: + +* [beego](https://github.com/beego/beego) +* [CubeFS](https://github.com/cubefs/cubefs) +* [Amazon EKS Distro](https://github.com/aws/eks-distro) +* [sourcegraph](https://github.com/sourcegraph/sourcegraph) +* [torrent](https://github.com/anacrolix/torrent) + + ## Description Package bitset implements bitsets, a mapping between non-negative integers and boolean values. @@ -60,19 +69,76 @@ func main() { } ``` -As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc +## Serialization + + +You may serialize a bitset safely and portably to a stream +of bytes as follows: +```Go + const length = 9585 + const oneEvery = 97 + bs := bitset.New(length) + // Add some bits + for i := uint(0); i < length; i += oneEvery { + bs = bs.Set(i) + } + + var buf bytes.Buffer + n, err := bs.WriteTo(&buf) + if err != nil { + // failure + } + // Here n == buf.Len() +``` +You can later deserialize the result as follows: + +```Go + // Read back from buf + bs = bitset.New() + n, err = bs.ReadFrom(&buf) + if err != nil { + // error + } + // n is the number of bytes read +``` + +The `ReadFrom` function attempts to read the data into the existing +BitSet instance, to minimize memory allocations. + + +*Performance tip*: +When reading and writing to a file or a network connection, you may get better performance by +wrapping your streams with `bufio` instances. + +E.g., +```Go + f, err := os.Create("myfile") + w := bufio.NewWriter(f) +``` +```Go + f, err := os.Open("myfile") + r := bufio.NewReader(f) +``` + ## Memory Usage -The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). +The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). + +The `roaring` library allows you to go back and forth between compressed Roaring bitmaps and the conventional bitset instances: +```Go + mybitset := roaringbitmap.ToBitSet() + newroaringbitmap := roaring.FromBitSet(mybitset) +``` + ## Implementation Note Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed. -It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped. +It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `uint64`). If so, the version will be bumped. ## Installation diff --git a/vendor/github.com/bits-and-blooms/bitset/SECURITY.md b/vendor/github.com/bits-and-blooms/bitset/SECURITY.md new file mode 100644 index 000000000..f888420c3 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bitset/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +You can report privately a vulnerability by email at daniel@lemire.me (current maintainer). diff --git a/vendor/github.com/bits-and-blooms/bitset/bitset.go b/vendor/github.com/bits-and-blooms/bitset/bitset.go index d688806a5..9f38ed3a9 100644 --- a/vendor/github.com/bits-and-blooms/bitset/bitset.go +++ b/vendor/github.com/bits-and-blooms/bitset/bitset.go @@ -33,12 +33,10 @@ Example use: As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. - */ package bitset import ( - "bufio" "bytes" "encoding/base64" "encoding/binary" @@ -52,6 +50,9 @@ import ( // the wordSize of a bit set const wordSize = uint(64) +// the wordSize of a bit set in bytes +const wordBytes = wordSize / 8 + // log2WordSize is lg(wordSize) const log2WordSize = uint(6) @@ -87,12 +88,23 @@ func (b *BitSet) safeSet() []uint64 { return b.set } -// From is a constructor used to create a BitSet from an array of integers -func From(buf []uint64) *BitSet { - return &BitSet{uint(len(buf)) * 64, buf} +// SetBitsetFrom fills the bitset with an array of integers without creating a new BitSet instance +func (b *BitSet) SetBitsetFrom(buf []uint64) { + b.length = uint(len(buf)) * 64 + b.set = buf } -// Bytes returns the bitset as array of integers +// From is a constructor used to create a BitSet from an array of words +func From(buf []uint64) *BitSet { + return FromWithLength(uint(len(buf))*64, buf) +} + +// FromWithLength constructs from an array of words and length. +func FromWithLength(len uint, set []uint64) *BitSet { + return &BitSet{len, set} +} + +// Bytes returns the bitset as array of words func (b *BitSet) Bytes() []uint64 { return b.set } @@ -105,6 +117,17 @@ func wordsNeeded(i uint) int { return int((i + (wordSize - 1)) >> log2WordSize) } +// wordsNeededUnbound calculates the number of words needed for i bits, possibly exceeding the capacity. +// This function is useful if you know that the capacity cannot be exceeded (e.g., you have an existing bitmap). +func wordsNeededUnbound(i uint) int { + return int((i + (wordSize - 1)) >> log2WordSize) +} + +// wordsIndex calculates the index of words in a `uint64` +func wordsIndex(i uint) uint { + return i & (wordSize - 1) +} + // New creates a new BitSet with a hint that length bits will be required func New(length uint) (bset *BitSet) { defer func() { @@ -135,24 +158,22 @@ func (b *BitSet) Len() uint { return b.length } -// extendSetMaybe adds additional words to incorporate new bits if needed -func (b *BitSet) extendSetMaybe(i uint) { - if i >= b.length { // if we need more bits, make 'em - if i >= Cap() { - panic("You are exceeding the capacity") - } - nsize := wordsNeeded(i + 1) - if b.set == nil { - b.set = make([]uint64, nsize) - } else if cap(b.set) >= nsize { - b.set = b.set[:nsize] // fast resize - } else if len(b.set) < nsize { - newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x - copy(newset, b.set) - b.set = newset - } - b.length = i + 1 +// extendSet adds additional words to incorporate new bits if needed +func (b *BitSet) extendSet(i uint) { + if i >= Cap() { + panic("You are exceeding the capacity") } + nsize := wordsNeeded(i + 1) + if b.set == nil { + b.set = make([]uint64, nsize) + } else if cap(b.set) >= nsize { + b.set = b.set[:nsize] // fast resize + } else if len(b.set) < nsize { + newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x + copy(newset, b.set) + b.set = newset + } + b.length = i + 1 } // Test whether bit i is set. @@ -160,7 +181,7 @@ func (b *BitSet) Test(i uint) bool { if i >= b.length { return false } - return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 + return b.set[i>>log2WordSize]&(1<>log2WordSize] |= 1 << (i & (wordSize - 1)) + if i >= b.length { // if we need more bits, make 'em + b.extendSet(i) + } + b.set[i>>log2WordSize] |= 1 << wordsIndex(i) return b } @@ -180,7 +203,7 @@ func (b *BitSet) Clear(i uint) *BitSet { if i >= b.length { return b } - b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1)) + b.set[i>>log2WordSize] &^= 1 << wordsIndex(i) return b } @@ -205,7 +228,7 @@ func (b *BitSet) Flip(i uint) *BitSet { if i >= b.length { return b.Set(i) } - b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1)) + b.set[i>>log2WordSize] ^= 1 << wordsIndex(i) return b } @@ -218,15 +241,23 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { if start >= end { return b } - - b.extendSetMaybe(end - 1) + if end-1 >= b.length { // if we need more bits, make 'em + b.extendSet(end - 1) + } var startWord uint = start >> log2WordSize var endWord uint = end >> log2WordSize - b.set[startWord] ^= ^(^uint64(0) << (start & (wordSize - 1))) - for i := startWord; i < endWord; i++ { - b.set[i] = ^b.set[i] + b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start)) + if endWord > 0 { + // bounds check elimination + data := b.set + _ = data[endWord-1] + for i := startWord; i < endWord; i++ { + data[i] = ^data[i] + } + } + if end&(wordSize-1) != 0 { + b.set[endWord] ^= ^uint64(0) >> wordsIndex(-end) } - b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1)) return b } @@ -254,7 +285,10 @@ func (b *BitSet) Shrink(lastbitindex uint) *BitSet { copy(shrunk, b.set[:idx]) b.set = shrunk b.length = length - b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) + lastWordUsedBits := length % 64 + if lastWordUsedBits != 0 { + b.set[idx-1] &= allBits >> uint64(64-wordsIndex(lastWordUsedBits)) + } return b } @@ -283,7 +317,7 @@ func (b *BitSet) Compact() *BitSet { // this method could be extremely slow and in some cases might cause the entire BitSet // to be recopied. func (b *BitSet) InsertAt(idx uint) *BitSet { - insertAtElement := (idx >> log2WordSize) + insertAtElement := idx >> log2WordSize // if length of set is a multiple of wordSize we need to allocate more space first if b.isLenExactMultiple() { @@ -302,13 +336,13 @@ func (b *BitSet) InsertAt(idx uint) *BitSet { // generate a mask to extract the data that we need to shift left // within the element where we insert a bit - dataMask := ^(uint64(1)<> (i & (wordSize - 1)) + w = w >> wordsIndex(i) if w != 0 { return i + trailingZeroes64(w), true } - x = x + 1 + x++ + // bounds check elimination in the loop + if x < 0 { + return 0, false + } for x < len(b.set) { if b.set[x] != 0 { return uint(x)*wordSize + trailingZeroes64(b.set[x]), true } - x = x + 1 + x++ } return 0, false @@ -413,21 +451,20 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { // including possibly the current index and up to cap(buffer). // If the returned slice has len zero, then no more set bits were found // -// buffer := make([]uint, 256) // this should be reused -// j := uint(0) -// j, buffer = bitmap.NextSetMany(j, buffer) -// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { -// for k := range buffer { -// do something with buffer[k] -// } -// j += 1 -// } -// +// buffer := make([]uint, 256) // this should be reused +// j := uint(0) +// j, buffer = bitmap.NextSetMany(j, buffer) +// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { +// for k := range buffer { +// do something with buffer[k] +// } +// j += 1 +// } // // It is possible to retrieve all set bits as follow: // -// indices := make([]uint, bitmap.Count()) -// bitmap.NextSetMany(0, indices) +// indices := make([]uint, bitmap.Count()) +// bitmap.NextSetMany(0, indices) // // However if bitmap.Count() is large, it might be preferable to // use several calls to NextSetMany, for performance reasons. @@ -438,7 +475,7 @@ func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { if x >= len(b.set) || capacity == 0 { return 0, myanswer[:0] } - skip := i & (wordSize - 1) + skip := wordsIndex(i) word := b.set[x] >> skip myanswer = myanswer[:capacity] size := int(0) @@ -481,17 +518,23 @@ func (b *BitSet) NextClear(i uint) (uint, bool) { return 0, false } w := b.set[x] - w = w >> (i & (wordSize - 1)) - wA := allBits >> (i & (wordSize - 1)) + w = w >> wordsIndex(i) + wA := allBits >> wordsIndex(i) index := i + trailingZeroes64(^w) if w != wA && index < b.length { return index, true } x++ + // bounds check elimination in the loop + if x < 0 { + return 0, false + } for x < len(b.set) { - index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) - if b.set[x] != allBits && index < b.length { - return index, true + if b.set[x] != allBits { + index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) + if index < b.length { + return index, true + } } x++ } @@ -508,9 +551,21 @@ func (b *BitSet) ClearAll() *BitSet { return b } +// SetAll sets the entire BitSet +func (b *BitSet) SetAll() *BitSet { + if b != nil && b.set != nil { + for i := range b.set { + b.set[i] = allBits + } + + b.cleanLastWord() + } + return b +} + // wordCount returns the number of words used in a bit set func (b *BitSet) wordCount() int { - return len(b.set) + return wordsNeededUnbound(b.length) } // Clone this BitSet @@ -522,9 +577,10 @@ func (b *BitSet) Clone() *BitSet { return c } -// Copy into a destination BitSet -// Returning the size of the destination BitSet -// like array copy +// Copy into a destination BitSet using the Go array copy semantics: +// the number of bits copied is the minimum of the number of bits in the current +// BitSet (Len()) and the destination Bitset. +// We return the number of bits copied in the destination BitSet. func (b *BitSet) Copy(c *BitSet) (count uint) { if c == nil { return @@ -536,9 +592,33 @@ func (b *BitSet) Copy(c *BitSet) (count uint) { if b.length < c.length { count = b.length } + // Cleaning the last word is needed to keep the invariant that other functions, such as Count, require + // that any bits in the last word that would exceed the length of the bitmask are set to 0. + c.cleanLastWord() return } +// CopyFull copies into a destination BitSet such that the destination is +// identical to the source after the operation, allocating memory if necessary. +func (b *BitSet) CopyFull(c *BitSet) { + if c == nil { + return + } + c.length = b.length + if len(b.set) == 0 { + if c.set != nil { + c.set = c.set[:0] + } + } else { + if cap(c.set) < len(b.set) { + c.set = make([]uint64, len(b.set)) + } else { + c.set = c.set[:len(b.set)] + } + copy(c.set, b.set) + } +} + // Count (number of set bits). // Also known as "popcount" or "population count". func (b *BitSet) Count() uint { @@ -561,10 +641,15 @@ func (b *BitSet) Equal(c *BitSet) bool { if b.length == 0 { // if they have both length == 0, then could have nil set return true } - // testing for equality shoud not transform the bitset (no call to safeSet) - - for p, v := range b.set { - if c.set[p] != v { + wn := b.wordCount() + // bounds check elimination + if wn <= 0 { + return true + } + _ = b.set[wn-1] + _ = c.set[wn-1] + for p := 0; p < wn; p++ { + if c.set[p] != b.set[p] { return false } } @@ -583,9 +668,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) result = b.Clone() // clone b (in case b is bigger than compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } for i := 0; i < l; i++ { result.set[i] = b.set[i] &^ compare.set[i] @@ -597,9 +682,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } cnt := uint64(0) cnt += popcntMaskSlice(b.set[:l], compare.set[:l]) @@ -612,12 +697,19 @@ func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } + if l <= 0 { + return + } + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] for i := 0; i < l; i++ { - b.set[i] &^= compare.set[i] + data[i] &^= cmpData[i] } } @@ -660,18 +752,29 @@ func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceIntersection(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } - for i := 0; i < l; i++ { - b.set[i] &= compare.set[i] + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + + for i := 0; i < l; i++ { + data[i] &= cmpData[i] + } } - for i := l; i < len(b.set); i++ { - b.set[i] = 0 + if l >= 0 { + for i := l; i < len(b.set); i++ { + b.set[i] = 0 + } } if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length-1 >= b.length { + b.extendSet(compare.length - 1) + } } } @@ -706,15 +809,22 @@ func (b *BitSet) UnionCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceUnion(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length > 0 && compare.length-1 >= b.length { + b.extendSet(compare.length - 1) } - for i := 0; i < l; i++ { - b.set[i] |= compare.set[i] + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + + for i := 0; i < l; i++ { + data[i] |= cmpData[i] + } } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { @@ -754,15 +864,21 @@ func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length > 0 && compare.length-1 >= b.length { + b.extendSet(compare.length - 1) } - for i := 0; i < l; i++ { - b.set[i] ^= compare.set[i] + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + for i := 0; i < l; i++ { + data[i] ^= cmpData[i] + } } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { @@ -773,17 +889,17 @@ func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { // Is the length an exact multiple of word sizes? func (b *BitSet) isLenExactMultiple() bool { - return b.length%wordSize == 0 + return wordsIndex(b.length) == 0 } // Clean last word by setting unused bits to 0 func (b *BitSet) cleanLastWord() { if !b.isLenExactMultiple() { - b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize) + b.set[len(b.set)-1] &= allBits >> (wordSize - wordsIndex(b.length)) } } -// Complement computes the (local) complement of a biset (up to length bits) +// Complement computes the (local) complement of a bitset (up to length bits) func (b *BitSet) Complement() (result *BitSet) { panicIfNull(b) result = New(b.length) @@ -811,7 +927,6 @@ func (b *BitSet) None() bool { return false } } - return true } return true } @@ -824,12 +939,16 @@ func (b *BitSet) Any() bool { // IsSuperSet returns true if this is a superset of the other set func (b *BitSet) IsSuperSet(other *BitSet) bool { - for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) { - if !b.Test(i) { + l := other.wordCount() + if b.wordCount() < l { + l = b.wordCount() + } + for i, word := range other.set[:l] { + if b.set[i]&word != word { return false } } - return true + return popcntSlice(other.set[l:]) == 0 } // IsStrictSuperSet returns true if this is a strict superset of the other set @@ -837,7 +956,8 @@ func (b *BitSet) IsStrictSuperSet(other *BitSet) bool { return b.Count() > other.Count() && b.IsSuperSet(other) } -// DumpAsBits dumps a bit set as a string of bits +// DumpAsBits dumps a bit set as a string of bits. Following the usual convention in Go, +// the least significant bits are printed last (index 0 is at the end of the string). func (b *BitSet) DumpAsBits() string { if b.set == nil { return "." @@ -850,78 +970,156 @@ func (b *BitSet) DumpAsBits() string { return buffer.String() } -// BinaryStorageSize returns the binary storage requirements +// BinaryStorageSize returns the binary storage requirements (see WriteTo) in bytes. func (b *BitSet) BinaryStorageSize() int { - return binary.Size(uint64(0)) + binary.Size(b.set) + return int(wordBytes + wordBytes*uint(b.wordCount())) } -// WriteTo writes a BitSet to a stream +func readUint64Array(reader io.Reader, data []uint64) error { + length := len(data) + bufferSize := 128 + buffer := make([]byte, bufferSize*int(wordBytes)) + for i := 0; i < length; i += bufferSize { + end := i + bufferSize + if end > length { + end = length + buffer = buffer[:wordBytes*uint(end-i)] + } + chunk := data[i:end] + if _, err := io.ReadFull(reader, buffer); err != nil { + return err + } + for i := range chunk { + chunk[i] = uint64(binaryOrder.Uint64(buffer[8*i:])) + } + } + return nil +} + +func writeUint64Array(writer io.Writer, data []uint64) error { + bufferSize := 128 + buffer := make([]byte, bufferSize*int(wordBytes)) + for i := 0; i < len(data); i += bufferSize { + end := i + bufferSize + if end > len(data) { + end = len(data) + buffer = buffer[:wordBytes*uint(end-i)] + } + chunk := data[i:end] + for i, x := range chunk { + binaryOrder.PutUint64(buffer[8*i:], x) + } + _, err := writer.Write(buffer) + if err != nil { + return err + } + } + return nil +} + +// WriteTo writes a BitSet to a stream. The format is: +// 1. uint64 length +// 2. []uint64 set +// Upon success, the number of bytes written is returned. +// +// Performance: if this function is used to write to a disk or network +// connection, it might be beneficial to wrap the stream in a bufio.Writer. +// E.g., +// +// f, err := os.Create("myfile") +// w := bufio.NewWriter(f) func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { length := uint64(b.length) - // Write length - err := binary.Write(stream, binaryOrder, length) + err := binary.Write(stream, binaryOrder, &length) if err != nil { - return 0, err + // Upon failure, we do not guarantee that we + // return the number of bytes written. + return int64(0), err } - - // Write set - err = binary.Write(stream, binaryOrder, b.set) - return int64(b.BinaryStorageSize()), err + err = writeUint64Array(stream, b.set[:b.wordCount()]) + if err != nil { + // Upon failure, we do not guarantee that we + // return the number of bytes written. + return int64(wordBytes), err + } + return int64(b.BinaryStorageSize()), nil } // ReadFrom reads a BitSet from a stream written using WriteTo +// The format is: +// 1. uint64 length +// 2. []uint64 set +// Upon success, the number of bytes read is returned. +// If the current BitSet is not large enough to hold the data, +// it is extended. In case of error, the BitSet is either +// left unchanged or made empty if the error occurs too late +// to preserve the content. +// +// Performance: if this function is used to read from a disk or network +// connection, it might be beneficial to wrap the stream in a bufio.Reader. +// E.g., +// +// f, err := os.Open("myfile") +// r := bufio.NewReader(f) func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { var length uint64 - - // Read length first err := binary.Read(stream, binaryOrder, &length) if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } return 0, err } - newset := New(uint(length)) + newlength := uint(length) - if uint64(newset.length) != length { + if uint64(newlength) != length { return 0, errors.New("unmarshalling error: type mismatch") } + nWords := wordsNeeded(uint(newlength)) + if cap(b.set) >= nWords { + b.set = b.set[:nWords] + } else { + b.set = make([]uint64, nWords) + } - // Read remaining bytes as set - err = binary.Read(stream, binaryOrder, newset.set) + b.length = newlength + + err = readUint64Array(stream, b.set) if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + // We do not want to leave the BitSet partially filled as + // it is error prone. + b.set = b.set[:0] + b.length = 0 return 0, err } - *b = *newset return int64(b.BinaryStorageSize()), nil } // MarshalBinary encodes a BitSet into a binary form and returns the result. func (b *BitSet) MarshalBinary() ([]byte, error) { var buf bytes.Buffer - writer := bufio.NewWriter(&buf) - - _, err := b.WriteTo(writer) + _, err := b.WriteTo(&buf) if err != nil { return []byte{}, err } - err = writer.Flush() - return buf.Bytes(), err } // UnmarshalBinary decodes the binary form generated by MarshalBinary. func (b *BitSet) UnmarshalBinary(data []byte) error { buf := bytes.NewReader(data) - reader := bufio.NewReader(buf) - - _, err := b.ReadFrom(reader) - + _, err := b.ReadFrom(buf) return err } // MarshalJSON marshals a BitSet as a JSON structure -func (b *BitSet) MarshalJSON() ([]byte, error) { +func (b BitSet) MarshalJSON() ([]byte, error) { buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize())) _, err := b.WriteTo(buffer) if err != nil { @@ -950,3 +1148,37 @@ func (b *BitSet) UnmarshalJSON(data []byte) error { _, err = b.ReadFrom(bytes.NewReader(buf)) return err } + +// Rank returns the nunber of set bits up to and including the index +// that are set in the bitset. +// See https://en.wikipedia.org/wiki/Ranking#Ranking_in_statistics +func (b *BitSet) Rank(index uint) uint { + if index >= b.length { + return b.Count() + } + leftover := (index + 1) & 63 + answer := uint(popcntSlice(b.set[:(index+1)>>6])) + if leftover != 0 { + answer += uint(popcount(b.set[(index+1)>>6] << (64 - leftover))) + } + return answer +} + +// Select returns the index of the jth set bit, where j is the argument. +// The caller is responsible to ensure that 0 <= j < Count(): when j is +// out of range, the function returns the length of the bitset (b.length). +// +// Note that this function differs in convention from the Rank function which +// returns 1 when ranking the smallest value. We follow the conventional +// textbook definition of Select and Rank. +func (b *BitSet) Select(index uint) uint { + leftover := index + for idx, word := range b.set { + w := uint(popcount(word)) + if w > leftover { + return uint(idx)*64 + select64(word, leftover) + } + leftover -= w + } + return b.length +} diff --git a/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go b/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go index fc8ff4f36..7855c04b5 100644 --- a/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go +++ b/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go @@ -1,3 +1,4 @@ +//go:build go1.9 // +build go1.9 package bitset @@ -14,6 +15,10 @@ func popcntSlice(s []uint64) uint64 { func popcntMaskSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] &^ m[i]) } @@ -22,6 +27,10 @@ func popcntMaskSlice(s, m []uint64) uint64 { func popcntAndSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] & m[i]) } @@ -30,6 +39,10 @@ func popcntAndSlice(s, m []uint64) uint64 { func popcntOrSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] | m[i]) } @@ -38,6 +51,10 @@ func popcntOrSlice(s, m []uint64) uint64 { func popcntXorSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] ^ m[i]) } diff --git a/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go b/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go index 4cf64f24a..116e04440 100644 --- a/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go +++ b/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go @@ -1,5 +1,5 @@ -// +build !go1.9 -// +build amd64,!appengine +//go:build !go1.9 && amd64 && !appengine +// +build !go1.9,amd64,!appengine package bitset diff --git a/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go b/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go index 21e0ff7b4..9e0ad464e 100644 --- a/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go +++ b/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go @@ -1,3 +1,4 @@ +//go:build !go1.9 && (!amd64 || appengine) // +build !go1.9 // +build !amd64 appengine diff --git a/vendor/github.com/bits-and-blooms/bitset/select.go b/vendor/github.com/bits-and-blooms/bitset/select.go new file mode 100644 index 000000000..f15e74a2c --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bitset/select.go @@ -0,0 +1,45 @@ +package bitset + +func select64(w uint64, j uint) uint { + seen := 0 + // Divide 64bit + part := w & 0xFFFFFFFF + n := uint(popcount(part)) + if n <= j { + part = w >> 32 + seen += 32 + j -= n + } + ww := part + + // Divide 32bit + part = ww & 0xFFFF + + n = uint(popcount(part)) + if n <= j { + part = ww >> 16 + seen += 16 + j -= n + } + ww = part + + // Divide 16bit + part = ww & 0xFF + n = uint(popcount(part)) + if n <= j { + part = ww >> 8 + seen += 8 + j -= n + } + ww = part + + // Lookup in final byte + counter := 0 + for ; counter < 8; counter++ { + j -= uint((ww >> counter) & 1) + if j+1 == 0 { + break + } + } + return uint(seen + counter) +} diff --git a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go index c52b61be9..12336e76a 100644 --- a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go +++ b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go @@ -1,3 +1,4 @@ +//go:build !go1.9 // +build !go1.9 package bitset diff --git a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go index 36a988e71..cfb0a8409 100644 --- a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go +++ b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go @@ -1,3 +1,4 @@ +//go:build go1.9 // +build go1.9 package bitset diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/.gitignore b/vendor/github.com/bits-and-blooms/bloom/v3/.gitignore new file mode 100644 index 000000000..e71973974 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/.gitignore @@ -0,0 +1,27 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof + +target +.idea diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml b/vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml new file mode 100644 index 000000000..7b8fd3006 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml @@ -0,0 +1,38 @@ +language: go + +sudo: false + +branches: + except: + - release + +branches: + only: + - master + - develop + - travis + +go: + - 1.8 + - tip + +matrix: + allow_failures: + - go: tip + +before_install: + - if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi; + - if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi; + - go get github.com/mattn/goveralls + +before_script: + - make deps + +script: + - make qa + +after_failure: + - cat ./target/test/report.xml + +after_success: + - if [ "$TRAVIS_GO_VERSION" = "1.8" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi; diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/LICENSE b/vendor/github.com/bits-and-blooms/bloom/v3/LICENSE new file mode 100644 index 000000000..3b9d36aa6 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2014 Will Fitzgerald. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/Makefile b/vendor/github.com/bits-and-blooms/bloom/v3/Makefile new file mode 100644 index 000000000..0fcbdcb2d --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/Makefile @@ -0,0 +1,197 @@ +# MAKEFILE +# +# @author Nicola Asuni +# @link https://github.com/bits-and-blooms/bloom +# ------------------------------------------------------------------------------ + +# List special make targets that are not associated with files +.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke + +# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS). +SHELL=/bin/bash + +# CVS path (path to the parent dir containing the project) +CVSPATH=github.com/bits-and-blooms + +# Project owner +OWNER=bits-and-blooms + +# Project vendor +VENDOR=bits-and-blooms + +# Project name +PROJECT=bloom + +# Project version +VERSION=$(shell cat VERSION) + +# Name of RPM or DEB package +PKGNAME=${VENDOR}-${PROJECT} + +# Current directory +CURRENTDIR=$(shell pwd) + +# GO lang path +ifneq ($(GOPATH),) + ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),) + # the defined GOPATH is not valid + GOPATH= + endif +endif +ifeq ($(GOPATH),) + # extract the GOPATH + GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR))) +endif + +# --- MAKE TARGETS --- + +# Display general help about this command +help: + @echo "" + @echo "$(PROJECT) Makefile." + @echo "GOPATH=$(GOPATH)" + @echo "The following commands are available:" + @echo "" + @echo " make qa : Run all the tests" + @echo " make test : Run the unit tests" + @echo "" + @echo " make format : Format the source code" + @echo " make fmtcheck : Check if the source code has been formatted" + @echo " make vet : Check for suspicious constructs" + @echo " make lint : Check for style errors" + @echo " make coverage : Generate the coverage report" + @echo " make cyclo : Generate the cyclomatic complexity report" + @echo " make ineffassign : Detect ineffectual assignments" + @echo " make misspell : Detect commonly misspelled words in source files" + @echo " make structcheck : Find unused struct fields" + @echo " make varcheck : Find unused global variables and constants" + @echo " make errcheck : Check that error return values are used" + @echo " make gosimple : Suggest code simplifications" + @echo " make astscan : GO AST scanner" + @echo "" + @echo " make docs : Generate source code documentation" + @echo "" + @echo " make deps : Get the dependencies" + @echo " make clean : Remove any build artifact" + @echo " make nuke : Deletes any intermediate file" + @echo "" + +# Alias for help target +all: help + +# Run the unit tests +test: + @mkdir -p target/test + @mkdir -p target/report + GOPATH=$(GOPATH) \ + go test \ + -covermode=atomic \ + -bench=. \ + -race \ + -cpuprofile=target/report/cpu.out \ + -memprofile=target/report/mem.out \ + -mutexprofile=target/report/mutex.out \ + -coverprofile=target/report/coverage.out \ + -v ./... | \ + tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \ + test $${PIPESTATUS[0]} -eq 0 + +# Format the source code +format: + @find . -type f -name "*.go" -exec gofmt -s -w {} \; + +# Check if the source code has been formatted +fmtcheck: + @mkdir -p target + @find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff + @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; } + +# Check for syntax errors +vet: + GOPATH=$(GOPATH) go vet . + +# Check for style errors +lint: + GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint . + +# Generate the coverage report +coverage: + @mkdir -p target/report + GOPATH=$(GOPATH) \ + go tool cover -html=target/report/coverage.out -o target/report/coverage.html + +# Report cyclomatic complexity +cyclo: + @mkdir -p target/report + GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Detect ineffectual assignments +ineffassign: + @mkdir -p target/report + GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Detect commonly misspelled words in source files +misspell: + @mkdir -p target/report + GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Find unused struct fields +structcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt + +# Find unused global variables and constants +varcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt + +# Check that error return values are used +errcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt + +# Suggest code simplifications +gosimple: + @mkdir -p target/report + GOPATH=$(GOPATH) gosimple ./ | tee target/report/gosimple.txt + +# AST scanner +astscan: + @mkdir -p target/report + GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Generate source docs +docs: + @mkdir -p target/docs + nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 & + wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060` + @echo ''${PKGNAME}' Documentation ...' > target/docs/index.html + +# Alias to run all quality-assurance checks +qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan + +# --- INSTALL --- + +# Get the dependencies +deps: + GOPATH=$(GOPATH) go get ./... + GOPATH=$(GOPATH) go get github.com/golang/lint/golint + GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report + GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov + GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo + GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign + GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell + GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck + GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck + GOPATH=$(GOPATH) go get github.com/kisielk/errcheck + GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple + GOPATH=$(GOPATH) go get github.com/securego/gosec + +# Remove any build artifact +clean: + GOPATH=$(GOPATH) go clean ./... + +# Deletes any intermediate file +nuke: + rm -rf ./target + GOPATH=$(GOPATH) go clean -i ./... diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/README.md b/vendor/github.com/bits-and-blooms/bloom/v3/README.md new file mode 100644 index 000000000..dc472ef23 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/README.md @@ -0,0 +1,153 @@ +Bloom filters +------------- +[![Test](https://github.com/bits-and-blooms/bloom/actions/workflows/test.yml/badge.svg)](https://github.com/bits-and-blooms/bloom/actions/workflows/test.yml) +[![Go Report Card](https://goreportcard.com/badge/github.com/bits-and-blooms/bloom)](https://goreportcard.com/report/github.com/bits-and-blooms/bloom) +[![Go Reference](https://pkg.go.dev/badge/github.com/bits-and-blooms/bloom.svg)](https://pkg.go.dev/github.com/bits-and-blooms/bloom/v3) + +This library is used by popular systems such as [Milvus](https://github.com/milvus-io/milvus) and [beego](https://github.com/beego/Beego). + +A Bloom filter is a concise/compressed representation of a set, where the main +requirement is to make membership queries; _i.e._, whether an item is a +member of a set. A Bloom filter will always correctly report the presence +of an element in the set when the element is indeed present. A Bloom filter +can use much less storage than the original set, but it allows for some 'false positives': +it may sometimes report that an element is in the set whereas it is not. + +When you construct, you need to know how many elements you have (the desired capacity), and what is the desired false positive rate you are willing to tolerate. A common false-positive rate is 1%. The +lower the false-positive rate, the more memory you are going to require. Similarly, the higher the +capacity, the more memory you will use. +You may construct the Bloom filter capable of receiving 1 million elements with a false-positive +rate of 1% in the following manner. + +```Go + filter := bloom.NewWithEstimates(1000000, 0.01) +``` + +You should call `NewWithEstimates` conservatively: if you specify a number of elements that it is +too small, the false-positive bound might be exceeded. A Bloom filter is not a dynamic data structure: +you must know ahead of time what your desired capacity is. + +Our implementation accepts keys for setting and testing as `[]byte`. Thus, to +add a string item, `"Love"`: + +```Go + filter.Add([]byte("Love")) +``` + +Similarly, to test if `"Love"` is in bloom: + +```Go + if filter.Test([]byte("Love")) +``` + +For numerical data, we recommend that you look into the encoding/binary library. But, for example, to add a `uint32` to the filter: + +```Go + i := uint32(100) + n1 := make([]byte, 4) + binary.BigEndian.PutUint32(n1, i) + filter.Add(n1) +``` + +Godoc documentation: https://pkg.go.dev/github.com/bits-and-blooms/bloom/v3 + + +## Installation + +```bash +go get -u github.com/bits-and-blooms/bloom/v3 +``` + +## Verifying the False Positive Rate + + +Sometimes, the actual false positive rate may differ (slightly) from the +theoretical false positive rate. We have a function to estimate the false positive rate of a +Bloom filter with _m_ bits and _k_ hashing functions for a set of size _n_: + +```Go + if bloom.EstimateFalsePositiveRate(20*n, 5, n) > 0.001 ... +``` + +You can use it to validate the computed m, k parameters: + +```Go + m, k := bloom.EstimateParameters(n, fp) + ActualfpRate := bloom.EstimateFalsePositiveRate(m, k, n) +``` + +or + +```Go + f := bloom.NewWithEstimates(n, fp) + ActualfpRate := bloom.EstimateFalsePositiveRate(f.m, f.k, n) +``` + +You would expect `ActualfpRate` to be close to the desired false-positive rate `fp` in these cases. + +The `EstimateFalsePositiveRate` function creates a temporary Bloom filter. It is +also relatively expensive and only meant for validation. + +## Serialization + +You can read and write the Bloom filters as follows: + + +```Go + f := New(1000, 4) + var buf bytes.Buffer + bytesWritten, err := f.WriteTo(&buf) + if err != nil { + t.Fatal(err.Error()) + } + var g BloomFilter + bytesRead, err := g.ReadFrom(&buf) + if err != nil { + t.Fatal(err.Error()) + } + if bytesRead != bytesWritten { + t.Errorf("read unexpected number of bytes %d != %d", bytesRead, bytesWritten) + } +``` + +*Performance tip*: +When reading and writing to a file or a network connection, you may get better performance by +wrapping your streams with `bufio` instances. + +E.g., +```Go + f, err := os.Create("myfile") + w := bufio.NewWriter(f) +``` +```Go + f, err := os.Open("myfile") + r := bufio.NewReader(f) +``` + +## Contributing + +If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)") + +This project includes a Makefile that allows you to test and build the project with simple commands. +To see all available options: +```bash +make help +``` + +## Running all tests + +Before committing the code, please check if it passes all tests using (note: this will install some dependencies): +```bash +make deps +make qa +``` + +## Design + +A Bloom filter has two parameters: _m_, the number of bits used in storage, and _k_, the number of hashing functions on elements of the set. (The actual hashing functions are important, too, but this is not a parameter for this implementation). A Bloom filter is backed by a [BitSet](https://github.com/bits-and-blooms/bitset); a key is represented in the filter by setting the bits at each value of the hashing functions (modulo _m_). Set membership is done by _testing_ whether the bits at each value of the hashing functions (again, modulo _m_) are set. If so, the item is in the set. If the item is actually in the set, a Bloom filter will never fail (the true positive rate is 1.0); but it is susceptible to false positives. The art is to choose _k_ and _m_ correctly. + +In this implementation, the hashing functions used is [murmurhash](github.com/twmb/murmur3), a non-cryptographic hashing function. + + +Given the particular hashing scheme, it's best to be empirical about this. Note +that estimating the FP rate will clear the Bloom filter. diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/SECURITY.md b/vendor/github.com/bits-and-blooms/bloom/v3/SECURITY.md new file mode 100644 index 000000000..f888420c3 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +You can report privately a vulnerability by email at daniel@lemire.me (current maintainer). diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/bloom.go b/vendor/github.com/bits-and-blooms/bloom/v3/bloom.go new file mode 100644 index 000000000..89dbe24d2 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/bloom.go @@ -0,0 +1,453 @@ +/* +Package bloom provides data structures and methods for creating Bloom filters. + +A Bloom filter is a representation of a set of _n_ items, where the main +requirement is to make membership queries; _i.e._, whether an item is a +member of a set. + +A Bloom filter has two parameters: _m_, a maximum size (typically a reasonably large +multiple of the cardinality of the set to represent) and _k_, the number of hashing +functions on elements of the set. (The actual hashing functions are important, too, +but this is not a parameter for this implementation). A Bloom filter is backed by +a BitSet; a key is represented in the filter by setting the bits at each value of the +hashing functions (modulo _m_). Set membership is done by _testing_ whether the +bits at each value of the hashing functions (again, modulo _m_) are set. If so, +the item is in the set. If the item is actually in the set, a Bloom filter will +never fail (the true positive rate is 1.0); but it is susceptible to false +positives. The art is to choose _k_ and _m_ correctly. + +In this implementation, the hashing functions used is murmurhash, +a non-cryptographic hashing function. + +This implementation accepts keys for setting as testing as []byte. Thus, to +add a string item, "Love": + + uint n = 1000 + filter := bloom.New(20*n, 5) // load of 20, 5 keys + filter.Add([]byte("Love")) + +Similarly, to test if "Love" is in bloom: + + if filter.Test([]byte("Love")) + +For numeric data, I recommend that you look into the binary/encoding library. But, +for example, to add a uint32 to the filter: + + i := uint32(100) + n1 := make([]byte,4) + binary.BigEndian.PutUint32(n1,i) + f.Add(n1) + +Finally, there is a method to estimate the false positive rate of a +Bloom filter with _m_ bits and _k_ hashing functions for a set of size _n_: + + if bloom.EstimateFalsePositiveRate(20*n, 5, n) > 0.001 ... + +You can use it to validate the computed m, k parameters: + + m, k := bloom.EstimateParameters(n, fp) + ActualfpRate := bloom.EstimateFalsePositiveRate(m, k, n) + +or + + f := bloom.NewWithEstimates(n, fp) + ActualfpRate := bloom.EstimateFalsePositiveRate(f.m, f.k, n) + +You would expect ActualfpRate to be close to the desired fp in these cases. + +The EstimateFalsePositiveRate function creates a temporary Bloom filter. It is +also relatively expensive and only meant for validation. +*/ +package bloom + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "math" + + "github.com/bits-and-blooms/bitset" +) + +// A BloomFilter is a representation of a set of _n_ items, where the main +// requirement is to make membership queries; _i.e._, whether an item is a +// member of a set. +type BloomFilter struct { + m uint + k uint + b *bitset.BitSet +} + +func max(x, y uint) uint { + if x > y { + return x + } + return y +} + +// New creates a new Bloom filter with _m_ bits and _k_ hashing functions +// We force _m_ and _k_ to be at least one to avoid panics. +func New(m uint, k uint) *BloomFilter { + return &BloomFilter{max(1, m), max(1, k), bitset.New(m)} +} + +// From creates a new Bloom filter with len(_data_) * 64 bits and _k_ hashing +// functions. The data slice is not going to be reset. +func From(data []uint64, k uint) *BloomFilter { + m := uint(len(data) * 64) + return FromWithM(data, m, k) +} + +// FromWithM creates a new Bloom filter with _m_ length, _k_ hashing functions. +// The data slice is not going to be reset. +func FromWithM(data []uint64, m, k uint) *BloomFilter { + return &BloomFilter{m, k, bitset.From(data)} +} + +// baseHashes returns the four hash values of data that are used to create k +// hashes +func baseHashes(data []byte) [4]uint64 { + var d digest128 // murmur hashing + hash1, hash2, hash3, hash4 := d.sum256(data) + return [4]uint64{ + hash1, hash2, hash3, hash4, + } +} + +// location returns the ith hashed location using the four base hash values +func location(h [4]uint64, i uint) uint64 { + ii := uint64(i) + return h[ii%2] + ii*h[2+(((ii+(ii%2))%4)/2)] +} + +// location returns the ith hashed location using the four base hash values +func (f *BloomFilter) location(h [4]uint64, i uint) uint { + return uint(location(h, i) % uint64(f.m)) +} + +// EstimateParameters estimates requirements for m and k. +// Based on https://bitbucket.org/ww/bloom/src/829aa19d01d9/bloom.go +// used with permission. +func EstimateParameters(n uint, p float64) (m uint, k uint) { + m = uint(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2))) + k = uint(math.Ceil(math.Log(2) * float64(m) / float64(n))) + return +} + +// NewWithEstimates creates a new Bloom filter for about n items with fp +// false positive rate +func NewWithEstimates(n uint, fp float64) *BloomFilter { + m, k := EstimateParameters(n, fp) + return New(m, k) +} + +// Cap returns the capacity, _m_, of a Bloom filter +func (f *BloomFilter) Cap() uint { + return f.m +} + +// K returns the number of hash functions used in the BloomFilter +func (f *BloomFilter) K() uint { + return f.k +} + +// BitSet returns the underlying bitset for this filter. +func (f *BloomFilter) BitSet() *bitset.BitSet { + return f.b +} + +// Add data to the Bloom Filter. Returns the filter (allows chaining) +func (f *BloomFilter) Add(data []byte) *BloomFilter { + h := baseHashes(data) + for i := uint(0); i < f.k; i++ { + f.b.Set(f.location(h, i)) + } + return f +} + +// Merge the data from two Bloom Filters. +func (f *BloomFilter) Merge(g *BloomFilter) error { + // Make sure the m's and k's are the same, otherwise merging has no real use. + if f.m != g.m { + return fmt.Errorf("m's don't match: %d != %d", f.m, g.m) + } + + if f.k != g.k { + return fmt.Errorf("k's don't match: %d != %d", f.m, g.m) + } + + f.b.InPlaceUnion(g.b) + return nil +} + +// Copy creates a copy of a Bloom filter. +func (f *BloomFilter) Copy() *BloomFilter { + fc := New(f.m, f.k) + fc.Merge(f) // #nosec + return fc +} + +// AddString to the Bloom Filter. Returns the filter (allows chaining) +func (f *BloomFilter) AddString(data string) *BloomFilter { + return f.Add([]byte(data)) +} + +// Test returns true if the data is in the BloomFilter, false otherwise. +// If true, the result might be a false positive. If false, the data +// is definitely not in the set. +func (f *BloomFilter) Test(data []byte) bool { + h := baseHashes(data) + for i := uint(0); i < f.k; i++ { + if !f.b.Test(f.location(h, i)) { + return false + } + } + return true +} + +// TestString returns true if the string is in the BloomFilter, false otherwise. +// If true, the result might be a false positive. If false, the data +// is definitely not in the set. +func (f *BloomFilter) TestString(data string) bool { + return f.Test([]byte(data)) +} + +// TestLocations returns true if all locations are set in the BloomFilter, false +// otherwise. +func (f *BloomFilter) TestLocations(locs []uint64) bool { + for i := 0; i < len(locs); i++ { + if !f.b.Test(uint(locs[i] % uint64(f.m))) { + return false + } + } + return true +} + +// TestAndAdd is equivalent to calling Test(data) then Add(data). +// The filter is written to unconditionnally: even if the element is present, +// the corresponding bits are still set. See also TestOrAdd. +// Returns the result of Test. +func (f *BloomFilter) TestAndAdd(data []byte) bool { + present := true + h := baseHashes(data) + for i := uint(0); i < f.k; i++ { + l := f.location(h, i) + if !f.b.Test(l) { + present = false + } + f.b.Set(l) + } + return present +} + +// TestAndAddString is the equivalent to calling Test(string) then Add(string). +// The filter is written to unconditionnally: even if the string is present, +// the corresponding bits are still set. See also TestOrAdd. +// Returns the result of Test. +func (f *BloomFilter) TestAndAddString(data string) bool { + return f.TestAndAdd([]byte(data)) +} + +// TestOrAdd is equivalent to calling Test(data) then if not present Add(data). +// If the element is already in the filter, then the filter is unchanged. +// Returns the result of Test. +func (f *BloomFilter) TestOrAdd(data []byte) bool { + present := true + h := baseHashes(data) + for i := uint(0); i < f.k; i++ { + l := f.location(h, i) + if !f.b.Test(l) { + present = false + f.b.Set(l) + } + } + return present +} + +// TestOrAddString is the equivalent to calling Test(string) then if not present Add(string). +// If the string is already in the filter, then the filter is unchanged. +// Returns the result of Test. +func (f *BloomFilter) TestOrAddString(data string) bool { + return f.TestOrAdd([]byte(data)) +} + +// ClearAll clears all the data in a Bloom filter, removing all keys +func (f *BloomFilter) ClearAll() *BloomFilter { + f.b.ClearAll() + return f +} + +// EstimateFalsePositiveRate returns, for a BloomFilter of m bits +// and k hash functions, an estimation of the false positive rate when +// +// storing n entries. This is an empirical, relatively slow +// +// test using integers as keys. +// This function is useful to validate the implementation. +func EstimateFalsePositiveRate(m, k, n uint) (fpRate float64) { + rounds := uint32(100000) + // We construct a new filter. + f := New(m, k) + n1 := make([]byte, 4) + // We populate the filter with n values. + for i := uint32(0); i < uint32(n); i++ { + binary.BigEndian.PutUint32(n1, i) + f.Add(n1) + } + fp := 0 + // test for number of rounds + for i := uint32(0); i < rounds; i++ { + binary.BigEndian.PutUint32(n1, i+uint32(n)+1) + if f.Test(n1) { + fp++ + } + } + fpRate = float64(fp) / (float64(rounds)) + return +} + +// Approximating the number of items +// https://en.wikipedia.org/wiki/Bloom_filter#Approximating_the_number_of_items_in_a_Bloom_filter +func (f *BloomFilter) ApproximatedSize() uint32 { + x := float64(f.b.Count()) + m := float64(f.Cap()) + k := float64(f.K()) + size := -1 * m / k * math.Log(1-x/m) / math.Log(math.E) + return uint32(math.Floor(size + 0.5)) // round +} + +// bloomFilterJSON is an unexported type for marshaling/unmarshaling BloomFilter struct. +type bloomFilterJSON struct { + M uint `json:"m"` + K uint `json:"k"` + B *bitset.BitSet `json:"b"` +} + +// MarshalJSON implements json.Marshaler interface. +func (f BloomFilter) MarshalJSON() ([]byte, error) { + return json.Marshal(bloomFilterJSON{f.m, f.k, f.b}) +} + +// UnmarshalJSON implements json.Unmarshaler interface. +func (f *BloomFilter) UnmarshalJSON(data []byte) error { + var j bloomFilterJSON + err := json.Unmarshal(data, &j) + if err != nil { + return err + } + f.m = j.M + f.k = j.K + f.b = j.B + return nil +} + +// WriteTo writes a binary representation of the BloomFilter to an i/o stream. +// It returns the number of bytes written. +// +// Performance: if this function is used to write to a disk or network +// connection, it might be beneficial to wrap the stream in a bufio.Writer. +// E.g., +// +// f, err := os.Create("myfile") +// w := bufio.NewWriter(f) +func (f *BloomFilter) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, uint64(f.m)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(f.k)) + if err != nil { + return 0, err + } + numBytes, err := f.b.WriteTo(stream) + return numBytes + int64(2*binary.Size(uint64(0))), err +} + +// ReadFrom reads a binary representation of the BloomFilter (such as might +// have been written by WriteTo()) from an i/o stream. It returns the number +// of bytes read. +// +// Performance: if this function is used to read from a disk or network +// connection, it might be beneficial to wrap the stream in a bufio.Reader. +// E.g., +// +// f, err := os.Open("myfile") +// r := bufio.NewReader(f) +func (f *BloomFilter) ReadFrom(stream io.Reader) (int64, error) { + var m, k uint64 + err := binary.Read(stream, binary.BigEndian, &m) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &k) + if err != nil { + return 0, err + } + b := &bitset.BitSet{} + numBytes, err := b.ReadFrom(stream) + if err != nil { + return 0, err + } + f.m = uint(m) + f.k = uint(k) + f.b = b + return numBytes + int64(2*binary.Size(uint64(0))), nil +} + +// GobEncode implements gob.GobEncoder interface. +func (f *BloomFilter) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := f.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (f *BloomFilter) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := f.ReadFrom(buf) + + return err +} + +// MarshalBinary implements binary.BinaryMarshaler interface. +func (f *BloomFilter) MarshalBinary() ([]byte, error) { + var buf bytes.Buffer + _, err := f.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// UnmarshalBinary implements binary.BinaryUnmarshaler interface. +func (f *BloomFilter) UnmarshalBinary(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := f.ReadFrom(buf) + + return err +} + +// Equal tests for the equality of two Bloom filters +func (f *BloomFilter) Equal(g *BloomFilter) bool { + return f.m == g.m && f.k == g.k && f.b.Equal(g.b) +} + +// Locations returns a list of hash locations representing a data item. +func Locations(data []byte, k uint) []uint64 { + locs := make([]uint64, k) + + // calculate locations + h := baseHashes(data) + for i := uint(0); i < k; i++ { + locs[i] = location(h, i) + } + + return locs +} diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/murmur.go b/vendor/github.com/bits-and-blooms/bloom/v3/murmur.go new file mode 100644 index 000000000..c93b1ba99 --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bloom/v3/murmur.go @@ -0,0 +1,289 @@ +/* +The bloom library relied on the excellent murmur library +by Sébastien Paolacci. Unfortunately, it involved some heap +allocation. We want to avoid any heap allocation whatsoever +in the hashing process. To preserve backward compatibility, we roll +our own hashing functions. They are designed to be strictly equivalent +to Paolacci's implementation. + +License on original code: + + +Copyright 2013, Sébastien Paolacci. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the library nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package bloom + +import ( + "encoding/binary" + "math/bits" + "unsafe" +) + +const ( + c1_128 = 0x87c37b91114253d5 + c2_128 = 0x4cf5ad432745937f + block_size = 16 +) + +// digest128 represents a partial evaluation of a 128 bites hash. +type digest128 struct { + h1 uint64 // Unfinalized running hash part 1. + h2 uint64 // Unfinalized running hash part 2. +} + +// bmix will hash blocks (16 bytes) +func (d *digest128) bmix(p []byte) { + nblocks := len(p) / block_size + for i := 0; i < nblocks; i++ { + b := (*[16]byte)(unsafe.Pointer(&p[i*block_size])) + k1, k2 := binary.LittleEndian.Uint64(b[:8]), binary.LittleEndian.Uint64(b[8:]) + d.bmix_words(k1, k2) + } +} + +// bmix_words will hash two 64-bit words (16 bytes) +func (d *digest128) bmix_words(k1, k2 uint64) { + h1, h2 := d.h1, d.h2 + + k1 *= c1_128 + k1 = bits.RotateLeft64(k1, 31) + k1 *= c2_128 + h1 ^= k1 + + h1 = bits.RotateLeft64(h1, 27) + h1 += h2 + h1 = h1*5 + 0x52dce729 + + k2 *= c2_128 + k2 = bits.RotateLeft64(k2, 33) + k2 *= c1_128 + h2 ^= k2 + + h2 = bits.RotateLeft64(h2, 31) + h2 += h1 + h2 = h2*5 + 0x38495ab5 + d.h1, d.h2 = h1, h2 +} + +// sum128 computers two 64-bit hash value. It is assumed that +// bmix was first called on the data to process complete blocks +// of 16 bytes. The 'tail' is a slice representing the 'tail' (leftover +// elements, fewer than 16). If pad_tail is true, we make it seem like +// there is an extra element with value 1 appended to the tail. +// The length parameter represents the full length of the data (including +// the blocks of 16 bytes, and, if pad_tail is true, an extra byte). +func (d *digest128) sum128(pad_tail bool, length uint, tail []byte) (h1, h2 uint64) { + h1, h2 = d.h1, d.h2 + + var k1, k2 uint64 + if pad_tail { + switch (len(tail) + 1) & 15 { + case 15: + k2 ^= uint64(1) << 48 + break + case 14: + k2 ^= uint64(1) << 40 + break + case 13: + k2 ^= uint64(1) << 32 + break + case 12: + k2 ^= uint64(1) << 24 + break + case 11: + k2 ^= uint64(1) << 16 + break + case 10: + k2 ^= uint64(1) << 8 + break + case 9: + k2 ^= uint64(1) << 0 + + k2 *= c2_128 + k2 = bits.RotateLeft64(k2, 33) + k2 *= c1_128 + h2 ^= k2 + + break + + case 8: + k1 ^= uint64(1) << 56 + break + case 7: + k1 ^= uint64(1) << 48 + break + case 6: + k1 ^= uint64(1) << 40 + break + case 5: + k1 ^= uint64(1) << 32 + break + case 4: + k1 ^= uint64(1) << 24 + break + case 3: + k1 ^= uint64(1) << 16 + break + case 2: + k1 ^= uint64(1) << 8 + break + case 1: + k1 ^= uint64(1) << 0 + k1 *= c1_128 + k1 = bits.RotateLeft64(k1, 31) + k1 *= c2_128 + h1 ^= k1 + } + + } + switch len(tail) & 15 { + case 15: + k2 ^= uint64(tail[14]) << 48 + fallthrough + case 14: + k2 ^= uint64(tail[13]) << 40 + fallthrough + case 13: + k2 ^= uint64(tail[12]) << 32 + fallthrough + case 12: + k2 ^= uint64(tail[11]) << 24 + fallthrough + case 11: + k2 ^= uint64(tail[10]) << 16 + fallthrough + case 10: + k2 ^= uint64(tail[9]) << 8 + fallthrough + case 9: + k2 ^= uint64(tail[8]) << 0 + + k2 *= c2_128 + k2 = bits.RotateLeft64(k2, 33) + k2 *= c1_128 + h2 ^= k2 + + fallthrough + + case 8: + k1 ^= uint64(tail[7]) << 56 + fallthrough + case 7: + k1 ^= uint64(tail[6]) << 48 + fallthrough + case 6: + k1 ^= uint64(tail[5]) << 40 + fallthrough + case 5: + k1 ^= uint64(tail[4]) << 32 + fallthrough + case 4: + k1 ^= uint64(tail[3]) << 24 + fallthrough + case 3: + k1 ^= uint64(tail[2]) << 16 + fallthrough + case 2: + k1 ^= uint64(tail[1]) << 8 + fallthrough + case 1: + k1 ^= uint64(tail[0]) << 0 + k1 *= c1_128 + k1 = bits.RotateLeft64(k1, 31) + k1 *= c2_128 + h1 ^= k1 + } + + h1 ^= uint64(length) + h2 ^= uint64(length) + + h1 += h2 + h2 += h1 + + h1 = fmix64(h1) + h2 = fmix64(h2) + + h1 += h2 + h2 += h1 + + return h1, h2 +} + +func fmix64(k uint64) uint64 { + k ^= k >> 33 + k *= 0xff51afd7ed558ccd + k ^= k >> 33 + k *= 0xc4ceb9fe1a85ec53 + k ^= k >> 33 + return k +} + +// sum256 will compute 4 64-bit hash values from the input. +// It is designed to never allocate memory on the heap. So it +// works without any byte buffer whatsoever. +// It is designed to be strictly equivalent to +// +// a1 := []byte{1} +// hasher := murmur3.New128() +// hasher.Write(data) // #nosec +// v1, v2 := hasher.Sum128() +// hasher.Write(a1) // #nosec +// v3, v4 := hasher.Sum128() +// +// See TestHashRandom. +func (d *digest128) sum256(data []byte) (hash1, hash2, hash3, hash4 uint64) { + // We always start from zero. + d.h1, d.h2 = 0, 0 + // Process as many bytes as possible. + d.bmix(data) + // We have enough to compute the first two 64-bit numbers + length := uint(len(data)) + tail_length := length % block_size + tail := data[length-tail_length:] + hash1, hash2 = d.sum128(false, length, tail) + // Next we want to 'virtually' append 1 to the input, but, + // we do not want to append to an actual array!!! + if tail_length+1 == block_size { + // We are left with no tail!!! + word1 := binary.LittleEndian.Uint64(tail[:8]) + word2 := uint64(binary.LittleEndian.Uint32(tail[8 : 8+4])) + word2 = word2 | (uint64(tail[12]) << 32) | (uint64(tail[13]) << 40) | (uint64(tail[14]) << 48) + // We append 1. + word2 = word2 | (uint64(1) << 56) + // We process the resulting 2 words. + d.bmix_words(word1, word2) + tail := data[length:] // empty slice, deliberate. + hash3, hash4 = d.sum128(false, length+1, tail) + } else { + // We still have a tail (fewer than 15 bytes) but we + // need to append '1' to it. + hash3, hash4 = d.sum128(true, length+1, tail) + } + + return hash1, hash2, hash3, hash4 +} diff --git a/vendor/modules.txt b/vendor/modules.txt index ec587762d..bb9aefe2a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -137,9 +137,12 @@ github.com/benbjohnson/immutable # github.com/beorn7/perks v1.0.1 ## explicit; go 1.11 github.com/beorn7/perks/quantile -# github.com/bits-and-blooms/bitset v1.2.0 -## explicit; go 1.14 +# github.com/bits-and-blooms/bitset v1.13.0 +## explicit; go 1.16 github.com/bits-and-blooms/bitset +# github.com/bits-and-blooms/bloom/v3 v3.7.0 +## explicit; go 1.16 +github.com/bits-and-blooms/bloom/v3 # github.com/bradfitz/iter v0.0.0-20191230175014-e8f45d346db8 ## explicit; go 1.11 github.com/bradfitz/iter