chore_: add `bits-and-blooms/bloom/v3` module

This commit is contained in:
Patryk Osmaczko 2024-06-27 17:11:35 +02:00 committed by osmaczko
parent d69b3e5cc9
commit 1715defec8
20 changed files with 1709 additions and 145 deletions

3
go.mod
View File

@ -81,6 +81,7 @@ require (
github.com/Masterminds/squirrel v1.5.4
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5
github.com/andybalholm/brotli v1.0.5
github.com/bits-and-blooms/bloom/v3 v3.7.0
github.com/cenkalti/backoff/v4 v4.2.1
github.com/gorilla/sessions v1.2.1
github.com/ipfs/go-log/v2 v2.5.1
@ -131,7 +132,7 @@ require (
github.com/benbjohnson/clock v1.3.5 // indirect
github.com/benbjohnson/immutable v0.3.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bits-and-blooms/bitset v1.2.0 // indirect
github.com/bits-and-blooms/bitset v1.13.0 // indirect
github.com/bradfitz/iter v0.0.0-20191230175014-e8f45d346db8 // indirect
github.com/btcsuite/btcd v0.22.1 // indirect
github.com/btcsuite/btcd/btcec/v2 v2.3.2 // indirect

8
go.sum
View File

@ -411,8 +411,12 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k=
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJRUA0wFAVE=
github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bloom/v3 v3.7.0 h1:VfknkqV4xI+PsaDIsoHueyxVDZrfvMn56jeWUzvzdls=
github.com/bits-and-blooms/bloom/v3 v3.7.0/go.mod h1:VKlUSvp0lFIYqxJjzdnSsZEw4iHb1kOL2tfHTgyJBHg=
github.com/bkaradzic/go-lz4 v1.0.0/go.mod h1:0YdlkowM3VswSROI7qDxhRvJ3sLhlFrRRwjwegp5jy4=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
@ -2109,6 +2113,8 @@ github.com/tsenart/tb v0.0.0-20181025101425-0d2499c8b6e9 h1:kjbwitOGH46vD01f2s3l
github.com/tsenart/tb v0.0.0-20181025101425-0d2499c8b6e9/go.mod h1:EcGP24b8DY+bWHnpfJDP7fM+o8Nmz4fYH0l2xTtNr3I=
github.com/ttacon/chalk v0.0.0-20160626202418-22c06c80ed31/go.mod h1:onvgF043R+lC5RZ8IT9rBXDaEDnpnw/Cl+HFiw+v/7Q=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM=
github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg=
github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
github.com/tyler-smith/go-bip39 v1.0.1-0.20181017060643-dbb3b84ba2ef/go.mod h1:sJ5fKU0s6JVwZjjcUEX2zFOnvq0ASQ2K9Zr6cf67kNs=
github.com/tyler-smith/go-bip39 v1.1.0 h1:5eUemwrMargf3BSLRRCalXT93Ns6pQJIjYQN2nyfOP8=
github.com/tyler-smith/go-bip39 v1.1.0/go.mod h1:gUYDtqQw1JS3ZJ8UWVcGTGqqr6YIN3CWg+kkNaLt55U=

View File

@ -7,6 +7,15 @@
[![PkgGoDev](https://pkg.go.dev/badge/github.com/bits-and-blooms/bitset?tab=doc)](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc)
This library is part of the [awesome go collection](https://github.com/avelino/awesome-go). It is used in production by several important systems:
* [beego](https://github.com/beego/beego)
* [CubeFS](https://github.com/cubefs/cubefs)
* [Amazon EKS Distro](https://github.com/aws/eks-distro)
* [sourcegraph](https://github.com/sourcegraph/sourcegraph)
* [torrent](https://github.com/anacrolix/torrent)
## Description
Package bitset implements bitsets, a mapping between non-negative integers and boolean values.
@ -60,19 +69,76 @@ func main() {
}
```
As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets.
Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc
## Serialization
You may serialize a bitset safely and portably to a stream
of bytes as follows:
```Go
const length = 9585
const oneEvery = 97
bs := bitset.New(length)
// Add some bits
for i := uint(0); i < length; i += oneEvery {
bs = bs.Set(i)
}
var buf bytes.Buffer
n, err := bs.WriteTo(&buf)
if err != nil {
// failure
}
// Here n == buf.Len()
```
You can later deserialize the result as follows:
```Go
// Read back from buf
bs = bitset.New()
n, err = bs.ReadFrom(&buf)
if err != nil {
// error
}
// n is the number of bytes read
```
The `ReadFrom` function attempts to read the data into the existing
BitSet instance, to minimize memory allocations.
*Performance tip*:
When reading and writing to a file or a network connection, you may get better performance by
wrapping your streams with `bufio` instances.
E.g.,
```Go
f, err := os.Create("myfile")
w := bufio.NewWriter(f)
```
```Go
f, err := os.Open("myfile")
r := bufio.NewReader(f)
```
## Memory Usage
The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
The `roaring` library allows you to go back and forth between compressed Roaring bitmaps and the conventional bitset instances:
```Go
mybitset := roaringbitmap.ToBitSet()
newroaringbitmap := roaring.FromBitSet(mybitset)
```
## Implementation Note
Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed.
It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped.
It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `uint64`). If so, the version will be bumped.
## Installation

5
vendor/github.com/bits-and-blooms/bitset/SECURITY.md generated vendored Normal file
View File

@ -0,0 +1,5 @@
# Security Policy
## Reporting a Vulnerability
You can report privately a vulnerability by email at daniel@lemire.me (current maintainer).

View File

@ -33,12 +33,10 @@ Example use:
As an alternative to BitSets, one should check out the 'big' package,
which provides a (less set-theoretical) view of bitsets.
*/
package bitset
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/binary"
@ -52,6 +50,9 @@ import (
// the wordSize of a bit set
const wordSize = uint(64)
// the wordSize of a bit set in bytes
const wordBytes = wordSize / 8
// log2WordSize is lg(wordSize)
const log2WordSize = uint(6)
@ -87,12 +88,23 @@ func (b *BitSet) safeSet() []uint64 {
return b.set
}
// From is a constructor used to create a BitSet from an array of integers
func From(buf []uint64) *BitSet {
return &BitSet{uint(len(buf)) * 64, buf}
// SetBitsetFrom fills the bitset with an array of integers without creating a new BitSet instance
func (b *BitSet) SetBitsetFrom(buf []uint64) {
b.length = uint(len(buf)) * 64
b.set = buf
}
// Bytes returns the bitset as array of integers
// From is a constructor used to create a BitSet from an array of words
func From(buf []uint64) *BitSet {
return FromWithLength(uint(len(buf))*64, buf)
}
// FromWithLength constructs from an array of words and length.
func FromWithLength(len uint, set []uint64) *BitSet {
return &BitSet{len, set}
}
// Bytes returns the bitset as array of words
func (b *BitSet) Bytes() []uint64 {
return b.set
}
@ -105,6 +117,17 @@ func wordsNeeded(i uint) int {
return int((i + (wordSize - 1)) >> log2WordSize)
}
// wordsNeededUnbound calculates the number of words needed for i bits, possibly exceeding the capacity.
// This function is useful if you know that the capacity cannot be exceeded (e.g., you have an existing bitmap).
func wordsNeededUnbound(i uint) int {
return int((i + (wordSize - 1)) >> log2WordSize)
}
// wordsIndex calculates the index of words in a `uint64`
func wordsIndex(i uint) uint {
return i & (wordSize - 1)
}
// New creates a new BitSet with a hint that length bits will be required
func New(length uint) (bset *BitSet) {
defer func() {
@ -135,9 +158,8 @@ func (b *BitSet) Len() uint {
return b.length
}
// extendSetMaybe adds additional words to incorporate new bits if needed
func (b *BitSet) extendSetMaybe(i uint) {
if i >= b.length { // if we need more bits, make 'em
// extendSet adds additional words to incorporate new bits if needed
func (b *BitSet) extendSet(i uint) {
if i >= Cap() {
panic("You are exceeding the capacity")
}
@ -153,14 +175,13 @@ func (b *BitSet) extendSetMaybe(i uint) {
}
b.length = i + 1
}
}
// Test whether bit i is set.
func (b *BitSet) Test(i uint) bool {
if i >= b.length {
return false
}
return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0
return b.set[i>>log2WordSize]&(1<<wordsIndex(i)) != 0
}
// Set bit i to 1, the capacity of the bitset is automatically
@ -170,8 +191,10 @@ func (b *BitSet) Test(i uint) bool {
// may lead to a memory shortage and a panic: the caller is responsible
// for providing sensible parameters in line with their memory capacity.
func (b *BitSet) Set(i uint) *BitSet {
b.extendSetMaybe(i)
b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1))
if i >= b.length { // if we need more bits, make 'em
b.extendSet(i)
}
b.set[i>>log2WordSize] |= 1 << wordsIndex(i)
return b
}
@ -180,7 +203,7 @@ func (b *BitSet) Clear(i uint) *BitSet {
if i >= b.length {
return b
}
b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1))
b.set[i>>log2WordSize] &^= 1 << wordsIndex(i)
return b
}
@ -205,7 +228,7 @@ func (b *BitSet) Flip(i uint) *BitSet {
if i >= b.length {
return b.Set(i)
}
b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1))
b.set[i>>log2WordSize] ^= 1 << wordsIndex(i)
return b
}
@ -218,15 +241,23 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet {
if start >= end {
return b
}
b.extendSetMaybe(end - 1)
if end-1 >= b.length { // if we need more bits, make 'em
b.extendSet(end - 1)
}
var startWord uint = start >> log2WordSize
var endWord uint = end >> log2WordSize
b.set[startWord] ^= ^(^uint64(0) << (start & (wordSize - 1)))
b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start))
if endWord > 0 {
// bounds check elimination
data := b.set
_ = data[endWord-1]
for i := startWord; i < endWord; i++ {
b.set[i] = ^b.set[i]
data[i] = ^data[i]
}
}
if end&(wordSize-1) != 0 {
b.set[endWord] ^= ^uint64(0) >> wordsIndex(-end)
}
b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1))
return b
}
@ -254,7 +285,10 @@ func (b *BitSet) Shrink(lastbitindex uint) *BitSet {
copy(shrunk, b.set[:idx])
b.set = shrunk
b.length = length
b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1))))
lastWordUsedBits := length % 64
if lastWordUsedBits != 0 {
b.set[idx-1] &= allBits >> uint64(64-wordsIndex(lastWordUsedBits))
}
return b
}
@ -283,7 +317,7 @@ func (b *BitSet) Compact() *BitSet {
// this method could be extremely slow and in some cases might cause the entire BitSet
// to be recopied.
func (b *BitSet) InsertAt(idx uint) *BitSet {
insertAtElement := (idx >> log2WordSize)
insertAtElement := idx >> log2WordSize
// if length of set is a multiple of wordSize we need to allocate more space first
if b.isLenExactMultiple() {
@ -302,13 +336,13 @@ func (b *BitSet) InsertAt(idx uint) *BitSet {
// generate a mask to extract the data that we need to shift left
// within the element where we insert a bit
dataMask := ^(uint64(1)<<uint64(idx&(wordSize-1)) - 1)
dataMask := uint64(1)<<uint64(wordsIndex(idx)) - 1
// extract that data that we'll shift
data := b.set[i] & dataMask
data := b.set[i] & (^dataMask)
// set the positions of the data mask to 0 in the element where we insert
b.set[i] &= ^dataMask
b.set[i] &= dataMask
// shift data mask to the left and insert its data to the slice element
b.set[i] |= data << 1
@ -356,7 +390,7 @@ func (b *BitSet) DeleteAt(i uint) *BitSet {
// generate a mask for the data that needs to be shifted right
// within that slice element that gets modified
dataMask := ^((uint64(1) << (i & (wordSize - 1))) - 1)
dataMask := ^((uint64(1) << wordsIndex(i)) - 1)
// extract the data that we'll shift right from the slice element
data := b.set[deleteAtElement] & dataMask
@ -394,16 +428,20 @@ func (b *BitSet) NextSet(i uint) (uint, bool) {
return 0, false
}
w := b.set[x]
w = w >> (i & (wordSize - 1))
w = w >> wordsIndex(i)
if w != 0 {
return i + trailingZeroes64(w), true
}
x = x + 1
x++
// bounds check elimination in the loop
if x < 0 {
return 0, false
}
for x < len(b.set) {
if b.set[x] != 0 {
return uint(x)*wordSize + trailingZeroes64(b.set[x]), true
}
x = x + 1
x++
}
return 0, false
@ -423,7 +461,6 @@ func (b *BitSet) NextSet(i uint) (uint, bool) {
// j += 1
// }
//
//
// It is possible to retrieve all set bits as follow:
//
// indices := make([]uint, bitmap.Count())
@ -438,7 +475,7 @@ func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) {
if x >= len(b.set) || capacity == 0 {
return 0, myanswer[:0]
}
skip := i & (wordSize - 1)
skip := wordsIndex(i)
word := b.set[x] >> skip
myanswer = myanswer[:capacity]
size := int(0)
@ -481,18 +518,24 @@ func (b *BitSet) NextClear(i uint) (uint, bool) {
return 0, false
}
w := b.set[x]
w = w >> (i & (wordSize - 1))
wA := allBits >> (i & (wordSize - 1))
w = w >> wordsIndex(i)
wA := allBits >> wordsIndex(i)
index := i + trailingZeroes64(^w)
if w != wA && index < b.length {
return index, true
}
x++
// bounds check elimination in the loop
if x < 0 {
return 0, false
}
for x < len(b.set) {
if b.set[x] != allBits {
index = uint(x)*wordSize + trailingZeroes64(^b.set[x])
if b.set[x] != allBits && index < b.length {
if index < b.length {
return index, true
}
}
x++
}
return 0, false
@ -508,9 +551,21 @@ func (b *BitSet) ClearAll() *BitSet {
return b
}
// SetAll sets the entire BitSet
func (b *BitSet) SetAll() *BitSet {
if b != nil && b.set != nil {
for i := range b.set {
b.set[i] = allBits
}
b.cleanLastWord()
}
return b
}
// wordCount returns the number of words used in a bit set
func (b *BitSet) wordCount() int {
return len(b.set)
return wordsNeededUnbound(b.length)
}
// Clone this BitSet
@ -522,9 +577,10 @@ func (b *BitSet) Clone() *BitSet {
return c
}
// Copy into a destination BitSet
// Returning the size of the destination BitSet
// like array copy
// Copy into a destination BitSet using the Go array copy semantics:
// the number of bits copied is the minimum of the number of bits in the current
// BitSet (Len()) and the destination Bitset.
// We return the number of bits copied in the destination BitSet.
func (b *BitSet) Copy(c *BitSet) (count uint) {
if c == nil {
return
@ -536,9 +592,33 @@ func (b *BitSet) Copy(c *BitSet) (count uint) {
if b.length < c.length {
count = b.length
}
// Cleaning the last word is needed to keep the invariant that other functions, such as Count, require
// that any bits in the last word that would exceed the length of the bitmask are set to 0.
c.cleanLastWord()
return
}
// CopyFull copies into a destination BitSet such that the destination is
// identical to the source after the operation, allocating memory if necessary.
func (b *BitSet) CopyFull(c *BitSet) {
if c == nil {
return
}
c.length = b.length
if len(b.set) == 0 {
if c.set != nil {
c.set = c.set[:0]
}
} else {
if cap(c.set) < len(b.set) {
c.set = make([]uint64, len(b.set))
} else {
c.set = c.set[:len(b.set)]
}
copy(c.set, b.set)
}
}
// Count (number of set bits).
// Also known as "popcount" or "population count".
func (b *BitSet) Count() uint {
@ -561,10 +641,15 @@ func (b *BitSet) Equal(c *BitSet) bool {
if b.length == 0 { // if they have both length == 0, then could have nil set
return true
}
// testing for equality shoud not transform the bitset (no call to safeSet)
for p, v := range b.set {
if c.set[p] != v {
wn := b.wordCount()
// bounds check elimination
if wn <= 0 {
return true
}
_ = b.set[wn-1]
_ = c.set[wn-1]
for p := 0; p < wn; p++ {
if c.set[p] != b.set[p] {
return false
}
}
@ -583,9 +668,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) {
panicIfNull(b)
panicIfNull(compare)
result = b.Clone() // clone b (in case b is bigger than compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
l := compare.wordCount()
if l > b.wordCount() {
l = b.wordCount()
}
for i := 0; i < l; i++ {
result.set[i] = b.set[i] &^ compare.set[i]
@ -597,9 +682,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) {
func (b *BitSet) DifferenceCardinality(compare *BitSet) uint {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
l := compare.wordCount()
if l > b.wordCount() {
l = b.wordCount()
}
cnt := uint64(0)
cnt += popcntMaskSlice(b.set[:l], compare.set[:l])
@ -612,12 +697,19 @@ func (b *BitSet) DifferenceCardinality(compare *BitSet) uint {
func (b *BitSet) InPlaceDifference(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
l := compare.wordCount()
if l > b.wordCount() {
l = b.wordCount()
}
if l <= 0 {
return
}
// bounds check elimination
data, cmpData := b.set, compare.set
_ = data[l-1]
_ = cmpData[l-1]
for i := 0; i < l; i++ {
b.set[i] &^= compare.set[i]
data[i] &^= cmpData[i]
}
}
@ -660,18 +752,29 @@ func (b *BitSet) IntersectionCardinality(compare *BitSet) uint {
func (b *BitSet) InPlaceIntersection(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
l := compare.wordCount()
if l > b.wordCount() {
l = b.wordCount()
}
if l > 0 {
// bounds check elimination
data, cmpData := b.set, compare.set
_ = data[l-1]
_ = cmpData[l-1]
for i := 0; i < l; i++ {
b.set[i] &= compare.set[i]
data[i] &= cmpData[i]
}
}
if l >= 0 {
for i := l; i < len(b.set); i++ {
b.set[i] = 0
}
}
if compare.length > 0 {
b.extendSetMaybe(compare.length - 1)
if compare.length-1 >= b.length {
b.extendSet(compare.length - 1)
}
}
}
@ -706,15 +809,22 @@ func (b *BitSet) UnionCardinality(compare *BitSet) uint {
func (b *BitSet) InPlaceUnion(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
l := compare.wordCount()
if l > b.wordCount() {
l = b.wordCount()
}
if compare.length > 0 {
b.extendSetMaybe(compare.length - 1)
if compare.length > 0 && compare.length-1 >= b.length {
b.extendSet(compare.length - 1)
}
if l > 0 {
// bounds check elimination
data, cmpData := b.set, compare.set
_ = data[l-1]
_ = cmpData[l-1]
for i := 0; i < l; i++ {
b.set[i] |= compare.set[i]
data[i] |= cmpData[i]
}
}
if len(compare.set) > l {
for i := l; i < len(compare.set); i++ {
@ -754,15 +864,21 @@ func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint {
func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
l := compare.wordCount()
if l > b.wordCount() {
l = b.wordCount()
}
if compare.length > 0 {
b.extendSetMaybe(compare.length - 1)
if compare.length > 0 && compare.length-1 >= b.length {
b.extendSet(compare.length - 1)
}
if l > 0 {
// bounds check elimination
data, cmpData := b.set, compare.set
_ = data[l-1]
_ = cmpData[l-1]
for i := 0; i < l; i++ {
b.set[i] ^= compare.set[i]
data[i] ^= cmpData[i]
}
}
if len(compare.set) > l {
for i := l; i < len(compare.set); i++ {
@ -773,17 +889,17 @@ func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) {
// Is the length an exact multiple of word sizes?
func (b *BitSet) isLenExactMultiple() bool {
return b.length%wordSize == 0
return wordsIndex(b.length) == 0
}
// Clean last word by setting unused bits to 0
func (b *BitSet) cleanLastWord() {
if !b.isLenExactMultiple() {
b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize)
b.set[len(b.set)-1] &= allBits >> (wordSize - wordsIndex(b.length))
}
}
// Complement computes the (local) complement of a biset (up to length bits)
// Complement computes the (local) complement of a bitset (up to length bits)
func (b *BitSet) Complement() (result *BitSet) {
panicIfNull(b)
result = New(b.length)
@ -811,7 +927,6 @@ func (b *BitSet) None() bool {
return false
}
}
return true
}
return true
}
@ -824,12 +939,16 @@ func (b *BitSet) Any() bool {
// IsSuperSet returns true if this is a superset of the other set
func (b *BitSet) IsSuperSet(other *BitSet) bool {
for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) {
if !b.Test(i) {
l := other.wordCount()
if b.wordCount() < l {
l = b.wordCount()
}
for i, word := range other.set[:l] {
if b.set[i]&word != word {
return false
}
}
return true
return popcntSlice(other.set[l:]) == 0
}
// IsStrictSuperSet returns true if this is a strict superset of the other set
@ -837,7 +956,8 @@ func (b *BitSet) IsStrictSuperSet(other *BitSet) bool {
return b.Count() > other.Count() && b.IsSuperSet(other)
}
// DumpAsBits dumps a bit set as a string of bits
// DumpAsBits dumps a bit set as a string of bits. Following the usual convention in Go,
// the least significant bits are printed last (index 0 is at the end of the string).
func (b *BitSet) DumpAsBits() string {
if b.set == nil {
return "."
@ -850,78 +970,156 @@ func (b *BitSet) DumpAsBits() string {
return buffer.String()
}
// BinaryStorageSize returns the binary storage requirements
// BinaryStorageSize returns the binary storage requirements (see WriteTo) in bytes.
func (b *BitSet) BinaryStorageSize() int {
return binary.Size(uint64(0)) + binary.Size(b.set)
return int(wordBytes + wordBytes*uint(b.wordCount()))
}
// WriteTo writes a BitSet to a stream
func readUint64Array(reader io.Reader, data []uint64) error {
length := len(data)
bufferSize := 128
buffer := make([]byte, bufferSize*int(wordBytes))
for i := 0; i < length; i += bufferSize {
end := i + bufferSize
if end > length {
end = length
buffer = buffer[:wordBytes*uint(end-i)]
}
chunk := data[i:end]
if _, err := io.ReadFull(reader, buffer); err != nil {
return err
}
for i := range chunk {
chunk[i] = uint64(binaryOrder.Uint64(buffer[8*i:]))
}
}
return nil
}
func writeUint64Array(writer io.Writer, data []uint64) error {
bufferSize := 128
buffer := make([]byte, bufferSize*int(wordBytes))
for i := 0; i < len(data); i += bufferSize {
end := i + bufferSize
if end > len(data) {
end = len(data)
buffer = buffer[:wordBytes*uint(end-i)]
}
chunk := data[i:end]
for i, x := range chunk {
binaryOrder.PutUint64(buffer[8*i:], x)
}
_, err := writer.Write(buffer)
if err != nil {
return err
}
}
return nil
}
// WriteTo writes a BitSet to a stream. The format is:
// 1. uint64 length
// 2. []uint64 set
// Upon success, the number of bytes written is returned.
//
// Performance: if this function is used to write to a disk or network
// connection, it might be beneficial to wrap the stream in a bufio.Writer.
// E.g.,
//
// f, err := os.Create("myfile")
// w := bufio.NewWriter(f)
func (b *BitSet) WriteTo(stream io.Writer) (int64, error) {
length := uint64(b.length)
// Write length
err := binary.Write(stream, binaryOrder, length)
err := binary.Write(stream, binaryOrder, &length)
if err != nil {
return 0, err
// Upon failure, we do not guarantee that we
// return the number of bytes written.
return int64(0), err
}
// Write set
err = binary.Write(stream, binaryOrder, b.set)
return int64(b.BinaryStorageSize()), err
err = writeUint64Array(stream, b.set[:b.wordCount()])
if err != nil {
// Upon failure, we do not guarantee that we
// return the number of bytes written.
return int64(wordBytes), err
}
return int64(b.BinaryStorageSize()), nil
}
// ReadFrom reads a BitSet from a stream written using WriteTo
// The format is:
// 1. uint64 length
// 2. []uint64 set
// Upon success, the number of bytes read is returned.
// If the current BitSet is not large enough to hold the data,
// it is extended. In case of error, the BitSet is either
// left unchanged or made empty if the error occurs too late
// to preserve the content.
//
// Performance: if this function is used to read from a disk or network
// connection, it might be beneficial to wrap the stream in a bufio.Reader.
// E.g.,
//
// f, err := os.Open("myfile")
// r := bufio.NewReader(f)
func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) {
var length uint64
// Read length first
err := binary.Read(stream, binaryOrder, &length)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return 0, err
}
newset := New(uint(length))
newlength := uint(length)
if uint64(newset.length) != length {
if uint64(newlength) != length {
return 0, errors.New("unmarshalling error: type mismatch")
}
nWords := wordsNeeded(uint(newlength))
if cap(b.set) >= nWords {
b.set = b.set[:nWords]
} else {
b.set = make([]uint64, nWords)
}
// Read remaining bytes as set
err = binary.Read(stream, binaryOrder, newset.set)
b.length = newlength
err = readUint64Array(stream, b.set)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
// We do not want to leave the BitSet partially filled as
// it is error prone.
b.set = b.set[:0]
b.length = 0
return 0, err
}
*b = *newset
return int64(b.BinaryStorageSize()), nil
}
// MarshalBinary encodes a BitSet into a binary form and returns the result.
func (b *BitSet) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
writer := bufio.NewWriter(&buf)
_, err := b.WriteTo(writer)
_, err := b.WriteTo(&buf)
if err != nil {
return []byte{}, err
}
err = writer.Flush()
return buf.Bytes(), err
}
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
func (b *BitSet) UnmarshalBinary(data []byte) error {
buf := bytes.NewReader(data)
reader := bufio.NewReader(buf)
_, err := b.ReadFrom(reader)
_, err := b.ReadFrom(buf)
return err
}
// MarshalJSON marshals a BitSet as a JSON structure
func (b *BitSet) MarshalJSON() ([]byte, error) {
func (b BitSet) MarshalJSON() ([]byte, error) {
buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize()))
_, err := b.WriteTo(buffer)
if err != nil {
@ -950,3 +1148,37 @@ func (b *BitSet) UnmarshalJSON(data []byte) error {
_, err = b.ReadFrom(bytes.NewReader(buf))
return err
}
// Rank returns the nunber of set bits up to and including the index
// that are set in the bitset.
// See https://en.wikipedia.org/wiki/Ranking#Ranking_in_statistics
func (b *BitSet) Rank(index uint) uint {
if index >= b.length {
return b.Count()
}
leftover := (index + 1) & 63
answer := uint(popcntSlice(b.set[:(index+1)>>6]))
if leftover != 0 {
answer += uint(popcount(b.set[(index+1)>>6] << (64 - leftover)))
}
return answer
}
// Select returns the index of the jth set bit, where j is the argument.
// The caller is responsible to ensure that 0 <= j < Count(): when j is
// out of range, the function returns the length of the bitset (b.length).
//
// Note that this function differs in convention from the Rank function which
// returns 1 when ranking the smallest value. We follow the conventional
// textbook definition of Select and Rank.
func (b *BitSet) Select(index uint) uint {
leftover := index
for idx, word := range b.set {
w := uint(popcount(word))
if w > leftover {
return uint(idx)*64 + select64(word, leftover)
}
leftover -= w
}
return b.length
}

View File

@ -1,3 +1,4 @@
//go:build go1.9
// +build go1.9
package bitset
@ -14,6 +15,10 @@ func popcntSlice(s []uint64) uint64 {
func popcntMaskSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] &^ m[i])
}
@ -22,6 +27,10 @@ func popcntMaskSlice(s, m []uint64) uint64 {
func popcntAndSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] & m[i])
}
@ -30,6 +39,10 @@ func popcntAndSlice(s, m []uint64) uint64 {
func popcntOrSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] | m[i])
}
@ -38,6 +51,10 @@ func popcntOrSlice(s, m []uint64) uint64 {
func popcntXorSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] ^ m[i])
}

View File

@ -1,5 +1,5 @@
// +build !go1.9
// +build amd64,!appengine
//go:build !go1.9 && amd64 && !appengine
// +build !go1.9,amd64,!appengine
package bitset

View File

@ -1,3 +1,4 @@
//go:build !go1.9 && (!amd64 || appengine)
// +build !go1.9
// +build !amd64 appengine

45
vendor/github.com/bits-and-blooms/bitset/select.go generated vendored Normal file
View File

@ -0,0 +1,45 @@
package bitset
func select64(w uint64, j uint) uint {
seen := 0
// Divide 64bit
part := w & 0xFFFFFFFF
n := uint(popcount(part))
if n <= j {
part = w >> 32
seen += 32
j -= n
}
ww := part
// Divide 32bit
part = ww & 0xFFFF
n = uint(popcount(part))
if n <= j {
part = ww >> 16
seen += 16
j -= n
}
ww = part
// Divide 16bit
part = ww & 0xFF
n = uint(popcount(part))
if n <= j {
part = ww >> 8
seen += 8
j -= n
}
ww = part
// Lookup in final byte
counter := 0
for ; counter < 8; counter++ {
j -= uint((ww >> counter) & 1)
if j+1 == 0 {
break
}
}
return uint(seen + counter)
}

View File

@ -1,3 +1,4 @@
//go:build !go1.9
// +build !go1.9
package bitset

View File

@ -1,3 +1,4 @@
//go:build go1.9
// +build go1.9
package bitset

27
vendor/github.com/bits-and-blooms/bloom/v3/.gitignore generated vendored Normal file
View File

@ -0,0 +1,27 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
target
.idea

38
vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml generated vendored Normal file
View File

@ -0,0 +1,38 @@
language: go
sudo: false
branches:
except:
- release
branches:
only:
- master
- develop
- travis
go:
- 1.8
- tip
matrix:
allow_failures:
- go: tip
before_install:
- if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi;
- if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi;
- go get github.com/mattn/goveralls
before_script:
- make deps
script:
- make qa
after_failure:
- cat ./target/test/report.xml
after_success:
- if [ "$TRAVIS_GO_VERSION" = "1.8" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi;

24
vendor/github.com/bits-and-blooms/bloom/v3/LICENSE generated vendored Normal file
View File

@ -0,0 +1,24 @@
Copyright (c) 2014 Will Fitzgerald. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

197
vendor/github.com/bits-and-blooms/bloom/v3/Makefile generated vendored Normal file
View File

@ -0,0 +1,197 @@
# MAKEFILE
#
# @author Nicola Asuni <info@tecnick.com>
# @link https://github.com/bits-and-blooms/bloom
# ------------------------------------------------------------------------------
# List special make targets that are not associated with files
.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke
# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS).
SHELL=/bin/bash
# CVS path (path to the parent dir containing the project)
CVSPATH=github.com/bits-and-blooms
# Project owner
OWNER=bits-and-blooms
# Project vendor
VENDOR=bits-and-blooms
# Project name
PROJECT=bloom
# Project version
VERSION=$(shell cat VERSION)
# Name of RPM or DEB package
PKGNAME=${VENDOR}-${PROJECT}
# Current directory
CURRENTDIR=$(shell pwd)
# GO lang path
ifneq ($(GOPATH),)
ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),)
# the defined GOPATH is not valid
GOPATH=
endif
endif
ifeq ($(GOPATH),)
# extract the GOPATH
GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR)))
endif
# --- MAKE TARGETS ---
# Display general help about this command
help:
@echo ""
@echo "$(PROJECT) Makefile."
@echo "GOPATH=$(GOPATH)"
@echo "The following commands are available:"
@echo ""
@echo " make qa : Run all the tests"
@echo " make test : Run the unit tests"
@echo ""
@echo " make format : Format the source code"
@echo " make fmtcheck : Check if the source code has been formatted"
@echo " make vet : Check for suspicious constructs"
@echo " make lint : Check for style errors"
@echo " make coverage : Generate the coverage report"
@echo " make cyclo : Generate the cyclomatic complexity report"
@echo " make ineffassign : Detect ineffectual assignments"
@echo " make misspell : Detect commonly misspelled words in source files"
@echo " make structcheck : Find unused struct fields"
@echo " make varcheck : Find unused global variables and constants"
@echo " make errcheck : Check that error return values are used"
@echo " make gosimple : Suggest code simplifications"
@echo " make astscan : GO AST scanner"
@echo ""
@echo " make docs : Generate source code documentation"
@echo ""
@echo " make deps : Get the dependencies"
@echo " make clean : Remove any build artifact"
@echo " make nuke : Deletes any intermediate file"
@echo ""
# Alias for help target
all: help
# Run the unit tests
test:
@mkdir -p target/test
@mkdir -p target/report
GOPATH=$(GOPATH) \
go test \
-covermode=atomic \
-bench=. \
-race \
-cpuprofile=target/report/cpu.out \
-memprofile=target/report/mem.out \
-mutexprofile=target/report/mutex.out \
-coverprofile=target/report/coverage.out \
-v ./... | \
tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \
test $${PIPESTATUS[0]} -eq 0
# Format the source code
format:
@find . -type f -name "*.go" -exec gofmt -s -w {} \;
# Check if the source code has been formatted
fmtcheck:
@mkdir -p target
@find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff
@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
# Check for syntax errors
vet:
GOPATH=$(GOPATH) go vet .
# Check for style errors
lint:
GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint .
# Generate the coverage report
coverage:
@mkdir -p target/report
GOPATH=$(GOPATH) \
go tool cover -html=target/report/coverage.out -o target/report/coverage.html
# Report cyclomatic complexity
cyclo:
@mkdir -p target/report
GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0
# Detect ineffectual assignments
ineffassign:
@mkdir -p target/report
GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0
# Detect commonly misspelled words in source files
misspell:
@mkdir -p target/report
GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0
# Find unused struct fields
structcheck:
@mkdir -p target/report
GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt
# Find unused global variables and constants
varcheck:
@mkdir -p target/report
GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt
# Check that error return values are used
errcheck:
@mkdir -p target/report
GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt
# Suggest code simplifications
gosimple:
@mkdir -p target/report
GOPATH=$(GOPATH) gosimple ./ | tee target/report/gosimple.txt
# AST scanner
astscan:
@mkdir -p target/report
GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0
# Generate source docs
docs:
@mkdir -p target/docs
nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 &
wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060`
@echo '<html><head><meta http-equiv="refresh" content="0;./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html"/></head><a href="./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html">'${PKGNAME}' Documentation ...</a></html>' > target/docs/index.html
# Alias to run all quality-assurance checks
qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan
# --- INSTALL ---
# Get the dependencies
deps:
GOPATH=$(GOPATH) go get ./...
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report
GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov
GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo
GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign
GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell
GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck
GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck
GOPATH=$(GOPATH) go get github.com/kisielk/errcheck
GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple
GOPATH=$(GOPATH) go get github.com/securego/gosec
# Remove any build artifact
clean:
GOPATH=$(GOPATH) go clean ./...
# Deletes any intermediate file
nuke:
rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./...

153
vendor/github.com/bits-and-blooms/bloom/v3/README.md generated vendored Normal file
View File

@ -0,0 +1,153 @@
Bloom filters
-------------
[![Test](https://github.com/bits-and-blooms/bloom/actions/workflows/test.yml/badge.svg)](https://github.com/bits-and-blooms/bloom/actions/workflows/test.yml)
[![Go Report Card](https://goreportcard.com/badge/github.com/bits-and-blooms/bloom)](https://goreportcard.com/report/github.com/bits-and-blooms/bloom)
[![Go Reference](https://pkg.go.dev/badge/github.com/bits-and-blooms/bloom.svg)](https://pkg.go.dev/github.com/bits-and-blooms/bloom/v3)
This library is used by popular systems such as [Milvus](https://github.com/milvus-io/milvus) and [beego](https://github.com/beego/Beego).
A Bloom filter is a concise/compressed representation of a set, where the main
requirement is to make membership queries; _i.e._, whether an item is a
member of a set. A Bloom filter will always correctly report the presence
of an element in the set when the element is indeed present. A Bloom filter
can use much less storage than the original set, but it allows for some 'false positives':
it may sometimes report that an element is in the set whereas it is not.
When you construct, you need to know how many elements you have (the desired capacity), and what is the desired false positive rate you are willing to tolerate. A common false-positive rate is 1%. The
lower the false-positive rate, the more memory you are going to require. Similarly, the higher the
capacity, the more memory you will use.
You may construct the Bloom filter capable of receiving 1 million elements with a false-positive
rate of 1% in the following manner.
```Go
filter := bloom.NewWithEstimates(1000000, 0.01)
```
You should call `NewWithEstimates` conservatively: if you specify a number of elements that it is
too small, the false-positive bound might be exceeded. A Bloom filter is not a dynamic data structure:
you must know ahead of time what your desired capacity is.
Our implementation accepts keys for setting and testing as `[]byte`. Thus, to
add a string item, `"Love"`:
```Go
filter.Add([]byte("Love"))
```
Similarly, to test if `"Love"` is in bloom:
```Go
if filter.Test([]byte("Love"))
```
For numerical data, we recommend that you look into the encoding/binary library. But, for example, to add a `uint32` to the filter:
```Go
i := uint32(100)
n1 := make([]byte, 4)
binary.BigEndian.PutUint32(n1, i)
filter.Add(n1)
```
Godoc documentation: https://pkg.go.dev/github.com/bits-and-blooms/bloom/v3
## Installation
```bash
go get -u github.com/bits-and-blooms/bloom/v3
```
## Verifying the False Positive Rate
Sometimes, the actual false positive rate may differ (slightly) from the
theoretical false positive rate. We have a function to estimate the false positive rate of a
Bloom filter with _m_ bits and _k_ hashing functions for a set of size _n_:
```Go
if bloom.EstimateFalsePositiveRate(20*n, 5, n) > 0.001 ...
```
You can use it to validate the computed m, k parameters:
```Go
m, k := bloom.EstimateParameters(n, fp)
ActualfpRate := bloom.EstimateFalsePositiveRate(m, k, n)
```
or
```Go
f := bloom.NewWithEstimates(n, fp)
ActualfpRate := bloom.EstimateFalsePositiveRate(f.m, f.k, n)
```
You would expect `ActualfpRate` to be close to the desired false-positive rate `fp` in these cases.
The `EstimateFalsePositiveRate` function creates a temporary Bloom filter. It is
also relatively expensive and only meant for validation.
## Serialization
You can read and write the Bloom filters as follows:
```Go
f := New(1000, 4)
var buf bytes.Buffer
bytesWritten, err := f.WriteTo(&buf)
if err != nil {
t.Fatal(err.Error())
}
var g BloomFilter
bytesRead, err := g.ReadFrom(&buf)
if err != nil {
t.Fatal(err.Error())
}
if bytesRead != bytesWritten {
t.Errorf("read unexpected number of bytes %d != %d", bytesRead, bytesWritten)
}
```
*Performance tip*:
When reading and writing to a file or a network connection, you may get better performance by
wrapping your streams with `bufio` instances.
E.g.,
```Go
f, err := os.Create("myfile")
w := bufio.NewWriter(f)
```
```Go
f, err := os.Open("myfile")
r := bufio.NewReader(f)
```
## Contributing
If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)")
This project includes a Makefile that allows you to test and build the project with simple commands.
To see all available options:
```bash
make help
```
## Running all tests
Before committing the code, please check if it passes all tests using (note: this will install some dependencies):
```bash
make deps
make qa
```
## Design
A Bloom filter has two parameters: _m_, the number of bits used in storage, and _k_, the number of hashing functions on elements of the set. (The actual hashing functions are important, too, but this is not a parameter for this implementation). A Bloom filter is backed by a [BitSet](https://github.com/bits-and-blooms/bitset); a key is represented in the filter by setting the bits at each value of the hashing functions (modulo _m_). Set membership is done by _testing_ whether the bits at each value of the hashing functions (again, modulo _m_) are set. If so, the item is in the set. If the item is actually in the set, a Bloom filter will never fail (the true positive rate is 1.0); but it is susceptible to false positives. The art is to choose _k_ and _m_ correctly.
In this implementation, the hashing functions used is [murmurhash](github.com/twmb/murmur3), a non-cryptographic hashing function.
Given the particular hashing scheme, it's best to be empirical about this. Note
that estimating the FP rate will clear the Bloom filter.

View File

@ -0,0 +1,5 @@
# Security Policy
## Reporting a Vulnerability
You can report privately a vulnerability by email at daniel@lemire.me (current maintainer).

453
vendor/github.com/bits-and-blooms/bloom/v3/bloom.go generated vendored Normal file
View File

@ -0,0 +1,453 @@
/*
Package bloom provides data structures and methods for creating Bloom filters.
A Bloom filter is a representation of a set of _n_ items, where the main
requirement is to make membership queries; _i.e._, whether an item is a
member of a set.
A Bloom filter has two parameters: _m_, a maximum size (typically a reasonably large
multiple of the cardinality of the set to represent) and _k_, the number of hashing
functions on elements of the set. (The actual hashing functions are important, too,
but this is not a parameter for this implementation). A Bloom filter is backed by
a BitSet; a key is represented in the filter by setting the bits at each value of the
hashing functions (modulo _m_). Set membership is done by _testing_ whether the
bits at each value of the hashing functions (again, modulo _m_) are set. If so,
the item is in the set. If the item is actually in the set, a Bloom filter will
never fail (the true positive rate is 1.0); but it is susceptible to false
positives. The art is to choose _k_ and _m_ correctly.
In this implementation, the hashing functions used is murmurhash,
a non-cryptographic hashing function.
This implementation accepts keys for setting as testing as []byte. Thus, to
add a string item, "Love":
uint n = 1000
filter := bloom.New(20*n, 5) // load of 20, 5 keys
filter.Add([]byte("Love"))
Similarly, to test if "Love" is in bloom:
if filter.Test([]byte("Love"))
For numeric data, I recommend that you look into the binary/encoding library. But,
for example, to add a uint32 to the filter:
i := uint32(100)
n1 := make([]byte,4)
binary.BigEndian.PutUint32(n1,i)
f.Add(n1)
Finally, there is a method to estimate the false positive rate of a
Bloom filter with _m_ bits and _k_ hashing functions for a set of size _n_:
if bloom.EstimateFalsePositiveRate(20*n, 5, n) > 0.001 ...
You can use it to validate the computed m, k parameters:
m, k := bloom.EstimateParameters(n, fp)
ActualfpRate := bloom.EstimateFalsePositiveRate(m, k, n)
or
f := bloom.NewWithEstimates(n, fp)
ActualfpRate := bloom.EstimateFalsePositiveRate(f.m, f.k, n)
You would expect ActualfpRate to be close to the desired fp in these cases.
The EstimateFalsePositiveRate function creates a temporary Bloom filter. It is
also relatively expensive and only meant for validation.
*/
package bloom
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"math"
"github.com/bits-and-blooms/bitset"
)
// A BloomFilter is a representation of a set of _n_ items, where the main
// requirement is to make membership queries; _i.e._, whether an item is a
// member of a set.
type BloomFilter struct {
m uint
k uint
b *bitset.BitSet
}
func max(x, y uint) uint {
if x > y {
return x
}
return y
}
// New creates a new Bloom filter with _m_ bits and _k_ hashing functions
// We force _m_ and _k_ to be at least one to avoid panics.
func New(m uint, k uint) *BloomFilter {
return &BloomFilter{max(1, m), max(1, k), bitset.New(m)}
}
// From creates a new Bloom filter with len(_data_) * 64 bits and _k_ hashing
// functions. The data slice is not going to be reset.
func From(data []uint64, k uint) *BloomFilter {
m := uint(len(data) * 64)
return FromWithM(data, m, k)
}
// FromWithM creates a new Bloom filter with _m_ length, _k_ hashing functions.
// The data slice is not going to be reset.
func FromWithM(data []uint64, m, k uint) *BloomFilter {
return &BloomFilter{m, k, bitset.From(data)}
}
// baseHashes returns the four hash values of data that are used to create k
// hashes
func baseHashes(data []byte) [4]uint64 {
var d digest128 // murmur hashing
hash1, hash2, hash3, hash4 := d.sum256(data)
return [4]uint64{
hash1, hash2, hash3, hash4,
}
}
// location returns the ith hashed location using the four base hash values
func location(h [4]uint64, i uint) uint64 {
ii := uint64(i)
return h[ii%2] + ii*h[2+(((ii+(ii%2))%4)/2)]
}
// location returns the ith hashed location using the four base hash values
func (f *BloomFilter) location(h [4]uint64, i uint) uint {
return uint(location(h, i) % uint64(f.m))
}
// EstimateParameters estimates requirements for m and k.
// Based on https://bitbucket.org/ww/bloom/src/829aa19d01d9/bloom.go
// used with permission.
func EstimateParameters(n uint, p float64) (m uint, k uint) {
m = uint(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2)))
k = uint(math.Ceil(math.Log(2) * float64(m) / float64(n)))
return
}
// NewWithEstimates creates a new Bloom filter for about n items with fp
// false positive rate
func NewWithEstimates(n uint, fp float64) *BloomFilter {
m, k := EstimateParameters(n, fp)
return New(m, k)
}
// Cap returns the capacity, _m_, of a Bloom filter
func (f *BloomFilter) Cap() uint {
return f.m
}
// K returns the number of hash functions used in the BloomFilter
func (f *BloomFilter) K() uint {
return f.k
}
// BitSet returns the underlying bitset for this filter.
func (f *BloomFilter) BitSet() *bitset.BitSet {
return f.b
}
// Add data to the Bloom Filter. Returns the filter (allows chaining)
func (f *BloomFilter) Add(data []byte) *BloomFilter {
h := baseHashes(data)
for i := uint(0); i < f.k; i++ {
f.b.Set(f.location(h, i))
}
return f
}
// Merge the data from two Bloom Filters.
func (f *BloomFilter) Merge(g *BloomFilter) error {
// Make sure the m's and k's are the same, otherwise merging has no real use.
if f.m != g.m {
return fmt.Errorf("m's don't match: %d != %d", f.m, g.m)
}
if f.k != g.k {
return fmt.Errorf("k's don't match: %d != %d", f.m, g.m)
}
f.b.InPlaceUnion(g.b)
return nil
}
// Copy creates a copy of a Bloom filter.
func (f *BloomFilter) Copy() *BloomFilter {
fc := New(f.m, f.k)
fc.Merge(f) // #nosec
return fc
}
// AddString to the Bloom Filter. Returns the filter (allows chaining)
func (f *BloomFilter) AddString(data string) *BloomFilter {
return f.Add([]byte(data))
}
// Test returns true if the data is in the BloomFilter, false otherwise.
// If true, the result might be a false positive. If false, the data
// is definitely not in the set.
func (f *BloomFilter) Test(data []byte) bool {
h := baseHashes(data)
for i := uint(0); i < f.k; i++ {
if !f.b.Test(f.location(h, i)) {
return false
}
}
return true
}
// TestString returns true if the string is in the BloomFilter, false otherwise.
// If true, the result might be a false positive. If false, the data
// is definitely not in the set.
func (f *BloomFilter) TestString(data string) bool {
return f.Test([]byte(data))
}
// TestLocations returns true if all locations are set in the BloomFilter, false
// otherwise.
func (f *BloomFilter) TestLocations(locs []uint64) bool {
for i := 0; i < len(locs); i++ {
if !f.b.Test(uint(locs[i] % uint64(f.m))) {
return false
}
}
return true
}
// TestAndAdd is equivalent to calling Test(data) then Add(data).
// The filter is written to unconditionnally: even if the element is present,
// the corresponding bits are still set. See also TestOrAdd.
// Returns the result of Test.
func (f *BloomFilter) TestAndAdd(data []byte) bool {
present := true
h := baseHashes(data)
for i := uint(0); i < f.k; i++ {
l := f.location(h, i)
if !f.b.Test(l) {
present = false
}
f.b.Set(l)
}
return present
}
// TestAndAddString is the equivalent to calling Test(string) then Add(string).
// The filter is written to unconditionnally: even if the string is present,
// the corresponding bits are still set. See also TestOrAdd.
// Returns the result of Test.
func (f *BloomFilter) TestAndAddString(data string) bool {
return f.TestAndAdd([]byte(data))
}
// TestOrAdd is equivalent to calling Test(data) then if not present Add(data).
// If the element is already in the filter, then the filter is unchanged.
// Returns the result of Test.
func (f *BloomFilter) TestOrAdd(data []byte) bool {
present := true
h := baseHashes(data)
for i := uint(0); i < f.k; i++ {
l := f.location(h, i)
if !f.b.Test(l) {
present = false
f.b.Set(l)
}
}
return present
}
// TestOrAddString is the equivalent to calling Test(string) then if not present Add(string).
// If the string is already in the filter, then the filter is unchanged.
// Returns the result of Test.
func (f *BloomFilter) TestOrAddString(data string) bool {
return f.TestOrAdd([]byte(data))
}
// ClearAll clears all the data in a Bloom filter, removing all keys
func (f *BloomFilter) ClearAll() *BloomFilter {
f.b.ClearAll()
return f
}
// EstimateFalsePositiveRate returns, for a BloomFilter of m bits
// and k hash functions, an estimation of the false positive rate when
//
// storing n entries. This is an empirical, relatively slow
//
// test using integers as keys.
// This function is useful to validate the implementation.
func EstimateFalsePositiveRate(m, k, n uint) (fpRate float64) {
rounds := uint32(100000)
// We construct a new filter.
f := New(m, k)
n1 := make([]byte, 4)
// We populate the filter with n values.
for i := uint32(0); i < uint32(n); i++ {
binary.BigEndian.PutUint32(n1, i)
f.Add(n1)
}
fp := 0
// test for number of rounds
for i := uint32(0); i < rounds; i++ {
binary.BigEndian.PutUint32(n1, i+uint32(n)+1)
if f.Test(n1) {
fp++
}
}
fpRate = float64(fp) / (float64(rounds))
return
}
// Approximating the number of items
// https://en.wikipedia.org/wiki/Bloom_filter#Approximating_the_number_of_items_in_a_Bloom_filter
func (f *BloomFilter) ApproximatedSize() uint32 {
x := float64(f.b.Count())
m := float64(f.Cap())
k := float64(f.K())
size := -1 * m / k * math.Log(1-x/m) / math.Log(math.E)
return uint32(math.Floor(size + 0.5)) // round
}
// bloomFilterJSON is an unexported type for marshaling/unmarshaling BloomFilter struct.
type bloomFilterJSON struct {
M uint `json:"m"`
K uint `json:"k"`
B *bitset.BitSet `json:"b"`
}
// MarshalJSON implements json.Marshaler interface.
func (f BloomFilter) MarshalJSON() ([]byte, error) {
return json.Marshal(bloomFilterJSON{f.m, f.k, f.b})
}
// UnmarshalJSON implements json.Unmarshaler interface.
func (f *BloomFilter) UnmarshalJSON(data []byte) error {
var j bloomFilterJSON
err := json.Unmarshal(data, &j)
if err != nil {
return err
}
f.m = j.M
f.k = j.K
f.b = j.B
return nil
}
// WriteTo writes a binary representation of the BloomFilter to an i/o stream.
// It returns the number of bytes written.
//
// Performance: if this function is used to write to a disk or network
// connection, it might be beneficial to wrap the stream in a bufio.Writer.
// E.g.,
//
// f, err := os.Create("myfile")
// w := bufio.NewWriter(f)
func (f *BloomFilter) WriteTo(stream io.Writer) (int64, error) {
err := binary.Write(stream, binary.BigEndian, uint64(f.m))
if err != nil {
return 0, err
}
err = binary.Write(stream, binary.BigEndian, uint64(f.k))
if err != nil {
return 0, err
}
numBytes, err := f.b.WriteTo(stream)
return numBytes + int64(2*binary.Size(uint64(0))), err
}
// ReadFrom reads a binary representation of the BloomFilter (such as might
// have been written by WriteTo()) from an i/o stream. It returns the number
// of bytes read.
//
// Performance: if this function is used to read from a disk or network
// connection, it might be beneficial to wrap the stream in a bufio.Reader.
// E.g.,
//
// f, err := os.Open("myfile")
// r := bufio.NewReader(f)
func (f *BloomFilter) ReadFrom(stream io.Reader) (int64, error) {
var m, k uint64
err := binary.Read(stream, binary.BigEndian, &m)
if err != nil {
return 0, err
}
err = binary.Read(stream, binary.BigEndian, &k)
if err != nil {
return 0, err
}
b := &bitset.BitSet{}
numBytes, err := b.ReadFrom(stream)
if err != nil {
return 0, err
}
f.m = uint(m)
f.k = uint(k)
f.b = b
return numBytes + int64(2*binary.Size(uint64(0))), nil
}
// GobEncode implements gob.GobEncoder interface.
func (f *BloomFilter) GobEncode() ([]byte, error) {
var buf bytes.Buffer
_, err := f.WriteTo(&buf)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// GobDecode implements gob.GobDecoder interface.
func (f *BloomFilter) GobDecode(data []byte) error {
buf := bytes.NewBuffer(data)
_, err := f.ReadFrom(buf)
return err
}
// MarshalBinary implements binary.BinaryMarshaler interface.
func (f *BloomFilter) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
_, err := f.WriteTo(&buf)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// UnmarshalBinary implements binary.BinaryUnmarshaler interface.
func (f *BloomFilter) UnmarshalBinary(data []byte) error {
buf := bytes.NewBuffer(data)
_, err := f.ReadFrom(buf)
return err
}
// Equal tests for the equality of two Bloom filters
func (f *BloomFilter) Equal(g *BloomFilter) bool {
return f.m == g.m && f.k == g.k && f.b.Equal(g.b)
}
// Locations returns a list of hash locations representing a data item.
func Locations(data []byte, k uint) []uint64 {
locs := make([]uint64, k)
// calculate locations
h := baseHashes(data)
for i := uint(0); i < k; i++ {
locs[i] = location(h, i)
}
return locs
}

289
vendor/github.com/bits-and-blooms/bloom/v3/murmur.go generated vendored Normal file
View File

@ -0,0 +1,289 @@
/*
The bloom library relied on the excellent murmur library
by Sébastien Paolacci. Unfortunately, it involved some heap
allocation. We want to avoid any heap allocation whatsoever
in the hashing process. To preserve backward compatibility, we roll
our own hashing functions. They are designed to be strictly equivalent
to Paolacci's implementation.
License on original code:
Copyright 2013, Sébastien Paolacci.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the library nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package bloom
import (
"encoding/binary"
"math/bits"
"unsafe"
)
const (
c1_128 = 0x87c37b91114253d5
c2_128 = 0x4cf5ad432745937f
block_size = 16
)
// digest128 represents a partial evaluation of a 128 bites hash.
type digest128 struct {
h1 uint64 // Unfinalized running hash part 1.
h2 uint64 // Unfinalized running hash part 2.
}
// bmix will hash blocks (16 bytes)
func (d *digest128) bmix(p []byte) {
nblocks := len(p) / block_size
for i := 0; i < nblocks; i++ {
b := (*[16]byte)(unsafe.Pointer(&p[i*block_size]))
k1, k2 := binary.LittleEndian.Uint64(b[:8]), binary.LittleEndian.Uint64(b[8:])
d.bmix_words(k1, k2)
}
}
// bmix_words will hash two 64-bit words (16 bytes)
func (d *digest128) bmix_words(k1, k2 uint64) {
h1, h2 := d.h1, d.h2
k1 *= c1_128
k1 = bits.RotateLeft64(k1, 31)
k1 *= c2_128
h1 ^= k1
h1 = bits.RotateLeft64(h1, 27)
h1 += h2
h1 = h1*5 + 0x52dce729
k2 *= c2_128
k2 = bits.RotateLeft64(k2, 33)
k2 *= c1_128
h2 ^= k2
h2 = bits.RotateLeft64(h2, 31)
h2 += h1
h2 = h2*5 + 0x38495ab5
d.h1, d.h2 = h1, h2
}
// sum128 computers two 64-bit hash value. It is assumed that
// bmix was first called on the data to process complete blocks
// of 16 bytes. The 'tail' is a slice representing the 'tail' (leftover
// elements, fewer than 16). If pad_tail is true, we make it seem like
// there is an extra element with value 1 appended to the tail.
// The length parameter represents the full length of the data (including
// the blocks of 16 bytes, and, if pad_tail is true, an extra byte).
func (d *digest128) sum128(pad_tail bool, length uint, tail []byte) (h1, h2 uint64) {
h1, h2 = d.h1, d.h2
var k1, k2 uint64
if pad_tail {
switch (len(tail) + 1) & 15 {
case 15:
k2 ^= uint64(1) << 48
break
case 14:
k2 ^= uint64(1) << 40
break
case 13:
k2 ^= uint64(1) << 32
break
case 12:
k2 ^= uint64(1) << 24
break
case 11:
k2 ^= uint64(1) << 16
break
case 10:
k2 ^= uint64(1) << 8
break
case 9:
k2 ^= uint64(1) << 0
k2 *= c2_128
k2 = bits.RotateLeft64(k2, 33)
k2 *= c1_128
h2 ^= k2
break
case 8:
k1 ^= uint64(1) << 56
break
case 7:
k1 ^= uint64(1) << 48
break
case 6:
k1 ^= uint64(1) << 40
break
case 5:
k1 ^= uint64(1) << 32
break
case 4:
k1 ^= uint64(1) << 24
break
case 3:
k1 ^= uint64(1) << 16
break
case 2:
k1 ^= uint64(1) << 8
break
case 1:
k1 ^= uint64(1) << 0
k1 *= c1_128
k1 = bits.RotateLeft64(k1, 31)
k1 *= c2_128
h1 ^= k1
}
}
switch len(tail) & 15 {
case 15:
k2 ^= uint64(tail[14]) << 48
fallthrough
case 14:
k2 ^= uint64(tail[13]) << 40
fallthrough
case 13:
k2 ^= uint64(tail[12]) << 32
fallthrough
case 12:
k2 ^= uint64(tail[11]) << 24
fallthrough
case 11:
k2 ^= uint64(tail[10]) << 16
fallthrough
case 10:
k2 ^= uint64(tail[9]) << 8
fallthrough
case 9:
k2 ^= uint64(tail[8]) << 0
k2 *= c2_128
k2 = bits.RotateLeft64(k2, 33)
k2 *= c1_128
h2 ^= k2
fallthrough
case 8:
k1 ^= uint64(tail[7]) << 56
fallthrough
case 7:
k1 ^= uint64(tail[6]) << 48
fallthrough
case 6:
k1 ^= uint64(tail[5]) << 40
fallthrough
case 5:
k1 ^= uint64(tail[4]) << 32
fallthrough
case 4:
k1 ^= uint64(tail[3]) << 24
fallthrough
case 3:
k1 ^= uint64(tail[2]) << 16
fallthrough
case 2:
k1 ^= uint64(tail[1]) << 8
fallthrough
case 1:
k1 ^= uint64(tail[0]) << 0
k1 *= c1_128
k1 = bits.RotateLeft64(k1, 31)
k1 *= c2_128
h1 ^= k1
}
h1 ^= uint64(length)
h2 ^= uint64(length)
h1 += h2
h2 += h1
h1 = fmix64(h1)
h2 = fmix64(h2)
h1 += h2
h2 += h1
return h1, h2
}
func fmix64(k uint64) uint64 {
k ^= k >> 33
k *= 0xff51afd7ed558ccd
k ^= k >> 33
k *= 0xc4ceb9fe1a85ec53
k ^= k >> 33
return k
}
// sum256 will compute 4 64-bit hash values from the input.
// It is designed to never allocate memory on the heap. So it
// works without any byte buffer whatsoever.
// It is designed to be strictly equivalent to
//
// a1 := []byte{1}
// hasher := murmur3.New128()
// hasher.Write(data) // #nosec
// v1, v2 := hasher.Sum128()
// hasher.Write(a1) // #nosec
// v3, v4 := hasher.Sum128()
//
// See TestHashRandom.
func (d *digest128) sum256(data []byte) (hash1, hash2, hash3, hash4 uint64) {
// We always start from zero.
d.h1, d.h2 = 0, 0
// Process as many bytes as possible.
d.bmix(data)
// We have enough to compute the first two 64-bit numbers
length := uint(len(data))
tail_length := length % block_size
tail := data[length-tail_length:]
hash1, hash2 = d.sum128(false, length, tail)
// Next we want to 'virtually' append 1 to the input, but,
// we do not want to append to an actual array!!!
if tail_length+1 == block_size {
// We are left with no tail!!!
word1 := binary.LittleEndian.Uint64(tail[:8])
word2 := uint64(binary.LittleEndian.Uint32(tail[8 : 8+4]))
word2 = word2 | (uint64(tail[12]) << 32) | (uint64(tail[13]) << 40) | (uint64(tail[14]) << 48)
// We append 1.
word2 = word2 | (uint64(1) << 56)
// We process the resulting 2 words.
d.bmix_words(word1, word2)
tail := data[length:] // empty slice, deliberate.
hash3, hash4 = d.sum128(false, length+1, tail)
} else {
// We still have a tail (fewer than 15 bytes) but we
// need to append '1' to it.
hash3, hash4 = d.sum128(true, length+1, tail)
}
return hash1, hash2, hash3, hash4
}

7
vendor/modules.txt vendored
View File

@ -137,9 +137,12 @@ github.com/benbjohnson/immutable
# github.com/beorn7/perks v1.0.1
## explicit; go 1.11
github.com/beorn7/perks/quantile
# github.com/bits-and-blooms/bitset v1.2.0
## explicit; go 1.14
# github.com/bits-and-blooms/bitset v1.13.0
## explicit; go 1.16
github.com/bits-and-blooms/bitset
# github.com/bits-and-blooms/bloom/v3 v3.7.0
## explicit; go 1.16
github.com/bits-and-blooms/bloom/v3
# github.com/bradfitz/iter v0.0.0-20191230175014-e8f45d346db8
## explicit; go 1.11
github.com/bradfitz/iter