mirror of https://github.com/status-im/op-geth.git
core, eth, trie: bloom filter for trie node dedup during fast sync (#19489)
* core, eth, trie: bloom filter for trie node dedup during fast sync * eth/downloader, trie: address review comments * core, ethdb, trie: restart fast-sync bloom construction now and again * eth/downloader: initialize fast sync bloom on startup * eth: reenable eth/62 until we properly remove it
This commit is contained in:
parent
40cdcf8c47
commit
9effd64290
|
@ -35,6 +35,7 @@ import (
|
|||
"github.com/ethereum/go-ethereum/eth/downloader"
|
||||
"github.com/ethereum/go-ethereum/event"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
"gopkg.in/urfave/cli.v1"
|
||||
)
|
||||
|
||||
|
@ -375,11 +376,16 @@ func copyDb(ctx *cli.Context) error {
|
|||
defer stack.Close()
|
||||
|
||||
chain, chainDb := utils.MakeChain(ctx, stack)
|
||||
syncmode := *utils.GlobalTextMarshaler(ctx, utils.SyncModeFlag.Name).(*downloader.SyncMode)
|
||||
dl := downloader.New(syncmode, 0, chainDb, new(event.TypeMux), chain, nil, nil)
|
||||
syncMode := *utils.GlobalTextMarshaler(ctx, utils.SyncModeFlag.Name).(*downloader.SyncMode)
|
||||
|
||||
var syncBloom *trie.SyncBloom
|
||||
if syncMode == downloader.FastSync {
|
||||
syncBloom = trie.NewSyncBloom(uint64(ctx.GlobalInt(utils.CacheFlag.Name)/2), chainDb)
|
||||
}
|
||||
dl := downloader.New(0, chainDb, syncBloom, new(event.TypeMux), chain, nil, nil)
|
||||
|
||||
// Create a source peer to satisfy downloader requests from
|
||||
db, err := rawdb.NewLevelDBDatabase(ctx.Args().First(), ctx.GlobalInt(utils.CacheFlag.Name), 256, "")
|
||||
db, err := rawdb.NewLevelDBDatabase(ctx.Args().First(), ctx.GlobalInt(utils.CacheFlag.Name)/2, 256, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -395,7 +401,7 @@ func copyDb(ctx *cli.Context) error {
|
|||
start := time.Now()
|
||||
|
||||
currentHeader := hc.CurrentHeader()
|
||||
if err = dl.Synchronise("local", currentHeader.Hash(), hc.GetTd(currentHeader.Hash(), currentHeader.Number.Uint64()), syncmode); err != nil {
|
||||
if err = dl.Synchronise("local", currentHeader.Hash(), hc.GetTd(currentHeader.Hash(), currentHeader.Number.Uint64()), syncMode); err != nil {
|
||||
return err
|
||||
}
|
||||
for dl.Synchronising() {
|
||||
|
|
|
@ -67,6 +67,13 @@ func (t *table) NewIterator() ethdb.Iterator {
|
|||
return t.NewIteratorWithPrefix(nil)
|
||||
}
|
||||
|
||||
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
|
||||
// database content starting at a particular initial key (or after, if it does
|
||||
// not exist).
|
||||
func (t *table) NewIteratorWithStart(start []byte) ethdb.Iterator {
|
||||
return t.db.NewIteratorWithStart(start)
|
||||
}
|
||||
|
||||
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset
|
||||
// of database content with a particular key prefix.
|
||||
func (t *table) NewIteratorWithPrefix(prefix []byte) ethdb.Iterator {
|
||||
|
|
|
@ -26,7 +26,7 @@ import (
|
|||
)
|
||||
|
||||
// NewStateSync create a new state trie download scheduler.
|
||||
func NewStateSync(root common.Hash, database ethdb.Reader) *trie.Sync {
|
||||
func NewStateSync(root common.Hash, database ethdb.Reader, bloom *trie.SyncBloom) *trie.Sync {
|
||||
var syncer *trie.Sync
|
||||
callback := func(leaf []byte, parent common.Hash) error {
|
||||
var obj Account
|
||||
|
@ -37,6 +37,6 @@ func NewStateSync(root common.Hash, database ethdb.Reader) *trie.Sync {
|
|||
syncer.AddRawEntry(common.BytesToHash(obj.CodeHash), 64, parent)
|
||||
return nil
|
||||
}
|
||||
syncer = trie.NewSync(root, database, callback)
|
||||
syncer = trie.NewSync(root, database, callback, bloom)
|
||||
return syncer
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import (
|
|||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/crypto"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/ethdb/memorydb"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
)
|
||||
|
||||
|
@ -125,7 +126,7 @@ func checkStateConsistency(db ethdb.Database, root common.Hash) error {
|
|||
// Tests that an empty state is not scheduled for syncing.
|
||||
func TestEmptyStateSync(t *testing.T) {
|
||||
empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
|
||||
if req := NewStateSync(empty, rawdb.NewMemoryDatabase()).Missing(1); len(req) != 0 {
|
||||
if req := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New())).Missing(1); len(req) != 0 {
|
||||
t.Errorf("content requested for empty state: %v", req)
|
||||
}
|
||||
}
|
||||
|
@ -141,7 +142,7 @@ func testIterativeStateSync(t *testing.T, batch int) {
|
|||
|
||||
// Create a destination state and sync with the scheduler
|
||||
dstDb := rawdb.NewMemoryDatabase()
|
||||
sched := NewStateSync(srcRoot, dstDb)
|
||||
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
|
||||
|
||||
queue := append([]common.Hash{}, sched.Missing(batch)...)
|
||||
for len(queue) > 0 {
|
||||
|
@ -173,7 +174,7 @@ func TestIterativeDelayedStateSync(t *testing.T) {
|
|||
|
||||
// Create a destination state and sync with the scheduler
|
||||
dstDb := rawdb.NewMemoryDatabase()
|
||||
sched := NewStateSync(srcRoot, dstDb)
|
||||
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
|
||||
|
||||
queue := append([]common.Hash{}, sched.Missing(0)...)
|
||||
for len(queue) > 0 {
|
||||
|
@ -210,7 +211,7 @@ func testIterativeRandomStateSync(t *testing.T, batch int) {
|
|||
|
||||
// Create a destination state and sync with the scheduler
|
||||
dstDb := rawdb.NewMemoryDatabase()
|
||||
sched := NewStateSync(srcRoot, dstDb)
|
||||
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
|
||||
|
||||
queue := make(map[common.Hash]struct{})
|
||||
for _, hash := range sched.Missing(batch) {
|
||||
|
@ -250,7 +251,7 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
|
|||
|
||||
// Create a destination state and sync with the scheduler
|
||||
dstDb := rawdb.NewMemoryDatabase()
|
||||
sched := NewStateSync(srcRoot, dstDb)
|
||||
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
|
||||
|
||||
queue := make(map[common.Hash]struct{})
|
||||
for _, hash := range sched.Missing(0) {
|
||||
|
@ -297,7 +298,7 @@ func TestIncompleteStateSync(t *testing.T) {
|
|||
|
||||
// Create a destination state and sync with the scheduler
|
||||
dstDb := rawdb.NewMemoryDatabase()
|
||||
sched := NewStateSync(srcRoot, dstDb)
|
||||
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
|
||||
|
||||
added := []common.Hash{}
|
||||
queue := append([]common.Hash{}, sched.Missing(1)...)
|
||||
|
|
|
@ -190,10 +190,11 @@ func New(ctx *node.ServiceContext, config *Config) (*Ethereum, error) {
|
|||
}
|
||||
eth.txPool = core.NewTxPool(config.TxPool, chainConfig, eth.blockchain)
|
||||
|
||||
if eth.protocolManager, err = NewProtocolManager(chainConfig, config.SyncMode, config.NetworkId, eth.eventMux, eth.txPool, eth.engine, eth.blockchain, chainDb, config.Whitelist); err != nil {
|
||||
// Permit the downloader to use the trie cache allowance during fast sync
|
||||
cacheLimit := cacheConfig.TrieCleanLimit + cacheConfig.TrieDirtyLimit
|
||||
if eth.protocolManager, err = NewProtocolManager(chainConfig, config.SyncMode, config.NetworkId, eth.eventMux, eth.txPool, eth.engine, eth.blockchain, chainDb, cacheLimit, config.Whitelist); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
eth.miner = miner.New(eth, &config.Miner, chainConfig, eth.EventMux(), eth.engine, eth.isLocalBlock)
|
||||
eth.miner.SetExtra(makeExtraData(config.Miner.ExtraData))
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ import (
|
|||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/metrics"
|
||||
"github.com/ethereum/go-ethereum/params"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -104,7 +105,9 @@ type Downloader struct {
|
|||
genesis uint64 // Genesis block number to limit sync to (e.g. light client CHT)
|
||||
queue *queue // Scheduler for selecting the hashes to download
|
||||
peers *peerSet // Set of active peers from which download can proceed
|
||||
stateDB ethdb.Database
|
||||
|
||||
stateDB ethdb.Database // Database to state sync into (and deduplicate via)
|
||||
stateBloom *trie.SyncBloom // Bloom filter for fast trie node existence checks
|
||||
|
||||
rttEstimate uint64 // Round trip time to target for download requests
|
||||
rttConfidence uint64 // Confidence in the estimated RTT (unit: millionths to allow atomic ops)
|
||||
|
@ -207,13 +210,13 @@ type BlockChain interface {
|
|||
}
|
||||
|
||||
// New creates a new downloader to fetch hashes and blocks from remote peers.
|
||||
func New(mode SyncMode, checkpoint uint64, stateDb ethdb.Database, mux *event.TypeMux, chain BlockChain, lightchain LightChain, dropPeer peerDropFn) *Downloader {
|
||||
func New(checkpoint uint64, stateDb ethdb.Database, stateBloom *trie.SyncBloom, mux *event.TypeMux, chain BlockChain, lightchain LightChain, dropPeer peerDropFn) *Downloader {
|
||||
if lightchain == nil {
|
||||
lightchain = chain
|
||||
}
|
||||
dl := &Downloader{
|
||||
mode: mode,
|
||||
stateDB: stateDb,
|
||||
stateBloom: stateBloom,
|
||||
mux: mux,
|
||||
checkpoint: checkpoint,
|
||||
queue: newQueue(),
|
||||
|
@ -255,13 +258,15 @@ func (d *Downloader) Progress() ethereum.SyncProgress {
|
|||
defer d.syncStatsLock.RUnlock()
|
||||
|
||||
current := uint64(0)
|
||||
switch d.mode {
|
||||
case FullSync:
|
||||
switch {
|
||||
case d.blockchain != nil && d.mode == FullSync:
|
||||
current = d.blockchain.CurrentBlock().NumberU64()
|
||||
case FastSync:
|
||||
case d.blockchain != nil && d.mode == FastSync:
|
||||
current = d.blockchain.CurrentFastBlock().NumberU64()
|
||||
case LightSync:
|
||||
case d.lightchain != nil:
|
||||
current = d.lightchain.CurrentHeader().Number.Uint64()
|
||||
default:
|
||||
log.Error("Unknown downloader chain/mode combo", "light", d.lightchain != nil, "full", d.blockchain != nil, "mode", d.mode)
|
||||
}
|
||||
return ethereum.SyncProgress{
|
||||
StartingBlock: d.syncStatsChainOrigin,
|
||||
|
@ -363,6 +368,12 @@ func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int, mode
|
|||
if atomic.CompareAndSwapInt32(&d.notified, 0, 1) {
|
||||
log.Info("Block synchronisation started")
|
||||
}
|
||||
// If we are already full syncing, but have a fast-sync bloom filter laying
|
||||
// around, make sure it does't use memory any more. This is a special case
|
||||
// when the user attempts to fast sync a new empty network.
|
||||
if mode == FullSync && d.stateBloom != nil {
|
||||
d.stateBloom.Close()
|
||||
}
|
||||
// Reset the queue, peer set and wake channels to clean any internal leftover state
|
||||
d.queue.Reset()
|
||||
d.peers.Reset()
|
||||
|
@ -1662,6 +1673,8 @@ func (d *Downloader) commitFastSyncData(results []*fetchResult, stateSync *state
|
|||
func (d *Downloader) commitPivotBlock(result *fetchResult) error {
|
||||
block := types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles)
|
||||
log.Debug("Committing fast sync pivot as new head", "number", block.Number(), "hash", block.Hash())
|
||||
|
||||
// Commit the pivot block as the new head, will require full sync from here on
|
||||
if _, err := d.blockchain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{result.Receipts}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1669,6 +1682,15 @@ func (d *Downloader) commitPivotBlock(result *fetchResult) error {
|
|||
return err
|
||||
}
|
||||
atomic.StoreInt32(&d.committed, 1)
|
||||
|
||||
// If we had a bloom filter for the state sync, deallocate it now. Note, we only
|
||||
// deallocate internally, but keep the empty wrapper. This ensures that if we do
|
||||
// a rollback after committing the pivot and restarting fast sync, we don't end
|
||||
// up using a nil bloom. Empty bloom is fine, it just returns that it does not
|
||||
// have the info we need, so reach down to the database instead.
|
||||
if d.stateBloom != nil {
|
||||
d.stateBloom.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ func newTester() *downloadTester {
|
|||
tester.stateDb = rawdb.NewMemoryDatabase()
|
||||
tester.stateDb.Put(testGenesis.Root().Bytes(), []byte{0x00})
|
||||
|
||||
tester.downloader = New(FullSync, 0, tester.stateDb, new(event.TypeMux), tester, nil, tester.dropPeer)
|
||||
tester.downloader = New(0, tester.stateDb, trie.NewSyncBloom(1, tester.stateDb), new(event.TypeMux), tester, nil, tester.dropPeer)
|
||||
return tester
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ type stateSyncStats struct {
|
|||
|
||||
// syncState starts downloading state with the given root hash.
|
||||
func (d *Downloader) syncState(root common.Hash) *stateSync {
|
||||
// Create the state sync
|
||||
s := newStateSync(d, root)
|
||||
select {
|
||||
case d.stateSyncStart <- s:
|
||||
|
@ -239,7 +240,7 @@ type stateTask struct {
|
|||
func newStateSync(d *Downloader, root common.Hash) *stateSync {
|
||||
return &stateSync{
|
||||
d: d,
|
||||
sched: state.NewStateSync(root, d.stateDB),
|
||||
sched: state.NewStateSync(root, d.stateDB, d.stateBloom),
|
||||
keccak: sha3.NewLegacyKeccak256(),
|
||||
tasks: make(map[common.Hash]*stateTask),
|
||||
deliver: make(chan *stateReq),
|
||||
|
|
|
@ -39,6 +39,7 @@ import (
|
|||
"github.com/ethereum/go-ethereum/p2p/enode"
|
||||
"github.com/ethereum/go-ethereum/params"
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -105,7 +106,7 @@ type ProtocolManager struct {
|
|||
|
||||
// NewProtocolManager returns a new Ethereum sub protocol manager. The Ethereum sub protocol manages peers capable
|
||||
// with the Ethereum network.
|
||||
func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, networkID uint64, mux *event.TypeMux, txpool txPool, engine consensus.Engine, blockchain *core.BlockChain, chaindb ethdb.Database, whitelist map[uint64]common.Hash) (*ProtocolManager, error) {
|
||||
func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, networkID uint64, mux *event.TypeMux, txpool txPool, engine consensus.Engine, blockchain *core.BlockChain, chaindb ethdb.Database, cacheLimit int, whitelist map[uint64]common.Hash) (*ProtocolManager, error) {
|
||||
// Create the protocol manager with the base fields
|
||||
manager := &ProtocolManager{
|
||||
networkID: networkID,
|
||||
|
@ -120,12 +121,8 @@ func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, ne
|
|||
txsyncCh: make(chan *txsync),
|
||||
quitSync: make(chan struct{}),
|
||||
}
|
||||
// Figure out whether to allow fast sync or not
|
||||
if mode == downloader.FastSync && blockchain.CurrentBlock().NumberU64() > 0 {
|
||||
log.Warn("Blockchain not empty, fast sync disabled")
|
||||
mode = downloader.FullSync
|
||||
}
|
||||
if mode == downloader.FastSync {
|
||||
// If fast sync was requested and our database is empty, grant it
|
||||
if mode == downloader.FastSync && blockchain.CurrentBlock().NumberU64() == 0 {
|
||||
manager.fastSync = uint32(1)
|
||||
}
|
||||
// If we have trusted checkpoints, enforce them on the chain
|
||||
|
@ -137,7 +134,8 @@ func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, ne
|
|||
manager.SubProtocols = make([]p2p.Protocol, 0, len(ProtocolVersions))
|
||||
for i, version := range ProtocolVersions {
|
||||
// Skip protocol version if incompatible with the mode of operation
|
||||
if mode == downloader.FastSync && version < eth63 {
|
||||
// TODO(karalabe): hard-drop eth/62 from the code base
|
||||
if atomic.LoadUint32(&manager.fastSync) == 1 && version < eth63 {
|
||||
continue
|
||||
}
|
||||
// Compatible; initialise the sub-protocol
|
||||
|
@ -171,9 +169,16 @@ func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, ne
|
|||
if len(manager.SubProtocols) == 0 {
|
||||
return nil, errIncompatibleConfig
|
||||
}
|
||||
// Construct the different synchronisation mechanisms
|
||||
manager.downloader = downloader.New(mode, manager.checkpointNumber, chaindb, manager.eventMux, blockchain, nil, manager.removePeer)
|
||||
// Construct the downloader (long sync) and its backing state bloom if fast
|
||||
// sync is requested. The downloader is responsible for deallocating the state
|
||||
// bloom when it's done.
|
||||
var stateBloom *trie.SyncBloom
|
||||
if atomic.LoadUint32(&manager.fastSync) == 1 {
|
||||
stateBloom = trie.NewSyncBloom(uint64(cacheLimit), chaindb)
|
||||
}
|
||||
manager.downloader = downloader.New(manager.checkpointNumber, chaindb, stateBloom, manager.eventMux, blockchain, nil, manager.removePeer)
|
||||
|
||||
// Construct the fetcher (short sync)
|
||||
validator := func(header *types.Header) error {
|
||||
return engine.VerifyHeader(blockchain, header, true)
|
||||
}
|
||||
|
|
|
@ -528,7 +528,7 @@ func testCheckpointChallenge(t *testing.T, syncmode downloader.SyncMode, checkpo
|
|||
if err != nil {
|
||||
t.Fatalf("failed to create new blockchain: %v", err)
|
||||
}
|
||||
pm, err := NewProtocolManager(config, syncmode, DefaultConfig.NetworkId, new(event.TypeMux), new(testTxPool), ethash.NewFaker(), blockchain, db, nil)
|
||||
pm, err := NewProtocolManager(config, syncmode, DefaultConfig.NetworkId, new(event.TypeMux), new(testTxPool), ethash.NewFaker(), blockchain, db, 1, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start test protocol manager: %v", err)
|
||||
}
|
||||
|
@ -615,7 +615,7 @@ func testBroadcastBlock(t *testing.T, totalPeers, broadcastExpected int) {
|
|||
if err != nil {
|
||||
t.Fatalf("failed to create new blockchain: %v", err)
|
||||
}
|
||||
pm, err := NewProtocolManager(config, downloader.FullSync, DefaultConfig.NetworkId, evmux, new(testTxPool), pow, blockchain, db, nil)
|
||||
pm, err := NewProtocolManager(config, downloader.FullSync, DefaultConfig.NetworkId, evmux, new(testTxPool), pow, blockchain, db, 1, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start test protocol manager: %v", err)
|
||||
}
|
||||
|
|
|
@ -66,8 +66,7 @@ func newTestProtocolManager(mode downloader.SyncMode, blocks int, generator func
|
|||
if _, err := blockchain.InsertChain(chain); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
pm, err := NewProtocolManager(gspec.Config, mode, DefaultConfig.NetworkId, evmux, &testTxPool{added: newtx}, engine, blockchain, db, nil)
|
||||
pm, err := NewProtocolManager(gspec.Config, mode, DefaultConfig.NetworkId, evmux, &testTxPool{added: newtx}, engine, blockchain, db, 1, nil)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
|
@ -55,6 +55,11 @@ type Iteratee interface {
|
|||
// contained within the key-value database.
|
||||
NewIterator() Iterator
|
||||
|
||||
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
|
||||
// database content starting at a particular initial key (or after, if it does
|
||||
// not exist).
|
||||
NewIteratorWithStart(start []byte) Iterator
|
||||
|
||||
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset
|
||||
// of database content with a particular key prefix.
|
||||
NewIteratorWithPrefix(prefix []byte) Iterator
|
||||
|
|
|
@ -175,7 +175,14 @@ func (db *Database) NewBatch() ethdb.Batch {
|
|||
// NewIterator creates a binary-alphabetical iterator over the entire keyspace
|
||||
// contained within the leveldb database.
|
||||
func (db *Database) NewIterator() ethdb.Iterator {
|
||||
return db.NewIteratorWithPrefix(nil)
|
||||
return db.db.NewIterator(new(util.Range), nil)
|
||||
}
|
||||
|
||||
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
|
||||
// database content starting at a particular initial key (or after, if it does
|
||||
// not exist).
|
||||
func (db *Database) NewIteratorWithStart(start []byte) ethdb.Iterator {
|
||||
return db.db.NewIterator(&util.Range{Start: start}, nil)
|
||||
}
|
||||
|
||||
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset
|
||||
|
|
|
@ -132,7 +132,36 @@ func (db *Database) NewBatch() ethdb.Batch {
|
|||
// NewIterator creates a binary-alphabetical iterator over the entire keyspace
|
||||
// contained within the memory database.
|
||||
func (db *Database) NewIterator() ethdb.Iterator {
|
||||
return db.NewIteratorWithPrefix(nil)
|
||||
return db.NewIteratorWithStart(nil)
|
||||
}
|
||||
|
||||
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
|
||||
// database content starting at a particular initial key (or after, if it does
|
||||
// not exist).
|
||||
func (db *Database) NewIteratorWithStart(start []byte) ethdb.Iterator {
|
||||
db.lock.RLock()
|
||||
defer db.lock.RUnlock()
|
||||
|
||||
var (
|
||||
st = string(start)
|
||||
keys = make([]string, 0, len(db.db))
|
||||
values = make([][]byte, 0, len(db.db))
|
||||
)
|
||||
// Collect the keys from the memory database corresponding to the given start
|
||||
for key := range db.db {
|
||||
if key >= st {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
// Sort the items and retrieve the associated values
|
||||
sort.Strings(keys)
|
||||
for _, key := range keys {
|
||||
values = append(values, db.db[key])
|
||||
}
|
||||
return &iterator{
|
||||
keys: keys,
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
|
||||
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset
|
||||
|
|
|
@ -179,7 +179,7 @@ func NewProtocolManager(
|
|||
if cht, ok := params.TrustedCheckpoints[blockchain.Genesis().Hash()]; ok {
|
||||
checkpoint = (cht.SectionIndex+1)*params.CHTFrequency - 1
|
||||
}
|
||||
manager.downloader = downloader.New(downloader.LightSync, checkpoint, chainDb, manager.eventMux, nil, blockchain, removePeer)
|
||||
manager.downloader = downloader.New(checkpoint, chainDb, nil, manager.eventMux, nil, blockchain, removePeer)
|
||||
manager.peers.notify((*downloaderPeerNotify)(manager))
|
||||
manager.fetcher = newLightFetcher(manager)
|
||||
}
|
||||
|
|
37
trie/sync.go
37
trie/sync.go
|
@ -76,15 +76,17 @@ type Sync struct {
|
|||
membatch *syncMemBatch // Memory buffer to avoid frequent database writes
|
||||
requests map[common.Hash]*request // Pending requests pertaining to a key hash
|
||||
queue *prque.Prque // Priority queue with the pending requests
|
||||
bloom *SyncBloom // Bloom filter for fast node existence checks
|
||||
}
|
||||
|
||||
// NewSync creates a new trie data download scheduler.
|
||||
func NewSync(root common.Hash, database ethdb.Reader, callback LeafCallback) *Sync {
|
||||
func NewSync(root common.Hash, database ethdb.Reader, callback LeafCallback, bloom *SyncBloom) *Sync {
|
||||
ts := &Sync{
|
||||
database: database,
|
||||
membatch: newSyncMemBatch(),
|
||||
requests: make(map[common.Hash]*request),
|
||||
queue: prque.New(nil),
|
||||
bloom: bloom,
|
||||
}
|
||||
ts.AddSubTrie(root, 0, common.Hash{}, callback)
|
||||
return ts
|
||||
|
@ -99,10 +101,14 @@ func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callb
|
|||
if _, ok := s.membatch.batch[root]; ok {
|
||||
return
|
||||
}
|
||||
key := root.Bytes()
|
||||
blob, _ := s.database.Get(key)
|
||||
if local, err := decodeNode(key, blob); local != nil && err == nil {
|
||||
return
|
||||
if s.bloom.Contains(root[:]) {
|
||||
// Bloom filter says this might be a duplicate, double check
|
||||
blob, _ := s.database.Get(root[:])
|
||||
if local, err := decodeNode(root[:], blob); local != nil && err == nil {
|
||||
return
|
||||
}
|
||||
// False positive, bump fault meter
|
||||
bloomFaultMeter.Mark(1)
|
||||
}
|
||||
// Assemble the new sub-trie sync request
|
||||
req := &request{
|
||||
|
@ -134,8 +140,13 @@ func (s *Sync) AddRawEntry(hash common.Hash, depth int, parent common.Hash) {
|
|||
if _, ok := s.membatch.batch[hash]; ok {
|
||||
return
|
||||
}
|
||||
if ok, _ := s.database.Has(hash.Bytes()); ok {
|
||||
return
|
||||
if s.bloom.Contains(hash[:]) {
|
||||
// Bloom filter says this might be a duplicate, double check
|
||||
if ok, _ := s.database.Has(hash[:]); ok {
|
||||
return
|
||||
}
|
||||
// False positive, bump fault meter
|
||||
bloomFaultMeter.Mark(1)
|
||||
}
|
||||
// Assemble the new sub-trie sync request
|
||||
req := &request{
|
||||
|
@ -219,8 +230,9 @@ func (s *Sync) Commit(dbw ethdb.Writer) (int, error) {
|
|||
if err := dbw.Put(key[:], s.membatch.batch[key]); err != nil {
|
||||
return i, err
|
||||
}
|
||||
s.bloom.Add(key[:])
|
||||
}
|
||||
written := len(s.membatch.order)
|
||||
written := len(s.membatch.order) // TODO(karalabe): could an order change improve write performance?
|
||||
|
||||
// Drop the membatch data and return
|
||||
s.membatch = newSyncMemBatch()
|
||||
|
@ -292,8 +304,13 @@ func (s *Sync) children(req *request, object node) ([]*request, error) {
|
|||
if _, ok := s.membatch.batch[hash]; ok {
|
||||
continue
|
||||
}
|
||||
if ok, _ := s.database.Has(node); ok {
|
||||
continue
|
||||
if s.bloom.Contains(node) {
|
||||
// Bloom filter says this might be a duplicate, double check
|
||||
if ok, _ := s.database.Has(node); ok {
|
||||
continue
|
||||
}
|
||||
// False positive, bump fault meter
|
||||
bloomFaultMeter.Mark(1)
|
||||
}
|
||||
// Locally unknown node, schedule for retrieval
|
||||
requests = append(requests, &request{
|
||||
|
|
|
@ -0,0 +1,207 @@
|
|||
// Copyright 2019 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package trie
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/metrics"
|
||||
"github.com/steakknife/bloomfilter"
|
||||
)
|
||||
|
||||
var (
|
||||
bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil)
|
||||
bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil)
|
||||
bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil)
|
||||
bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil)
|
||||
bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil)
|
||||
bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil)
|
||||
)
|
||||
|
||||
// syncBloomHasher is a wrapper around a byte blob to satisfy the interface API
|
||||
// requirements of the bloom library used. It's used to convert a trie hash into
|
||||
// a 64 bit mini hash.
|
||||
type syncBloomHasher []byte
|
||||
|
||||
func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
|
||||
func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") }
|
||||
func (f syncBloomHasher) Reset() { panic("not implemented") }
|
||||
func (f syncBloomHasher) BlockSize() int { panic("not implemented") }
|
||||
func (f syncBloomHasher) Size() int { return 8 }
|
||||
func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }
|
||||
|
||||
// SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
|
||||
// node already exists on disk or not. It self populates from the provided disk
|
||||
// database on creation in a background thread and will only start returning live
|
||||
// results once that's finished.
|
||||
type SyncBloom struct {
|
||||
bloom *bloomfilter.Filter
|
||||
inited uint32
|
||||
closer sync.Once
|
||||
closed uint32
|
||||
pend sync.WaitGroup
|
||||
}
|
||||
|
||||
// NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
|
||||
// initializes it from the database. The bloom is hard coded to use 3 filters.
|
||||
func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom {
|
||||
// Create the bloom filter to track known trie nodes
|
||||
bloom, err := bloomfilter.New(memory*1024*1024*8, 3)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("failed to create bloom: %v", err)) // Can't happen, here for sanity
|
||||
}
|
||||
log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024))
|
||||
|
||||
// Assemble the fast sync bloom and init it from previous sessions
|
||||
b := &SyncBloom{
|
||||
bloom: bloom,
|
||||
}
|
||||
b.pend.Add(2)
|
||||
go func() {
|
||||
defer b.pend.Done()
|
||||
b.init(database)
|
||||
}()
|
||||
go func() {
|
||||
defer b.pend.Done()
|
||||
b.meter()
|
||||
}()
|
||||
return b
|
||||
}
|
||||
|
||||
// init iterates over the database, pushing every trie hash into the bloom filter.
|
||||
func (b *SyncBloom) init(database ethdb.Iteratee) {
|
||||
// Iterate over the database, but restart every now and again to avoid holding
|
||||
// a persistent snapshot since fast sync can push a ton of data concurrently,
|
||||
// bloating the disk.
|
||||
//
|
||||
// Note, this is fine, because everything inserted into leveldb by fast sync is
|
||||
// also pushed into the bloom directly, so we're not missing anything when the
|
||||
// iterator is swapped out for a new one.
|
||||
it := database.NewIterator()
|
||||
|
||||
var (
|
||||
start = time.Now()
|
||||
swap = time.Now()
|
||||
)
|
||||
for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
|
||||
// If the database entry is a trie node, add it to the bloom
|
||||
if key := it.Key(); len(key) == common.HashLength {
|
||||
b.bloom.Add(syncBloomHasher(key))
|
||||
bloomLoadMeter.Mark(1)
|
||||
}
|
||||
// If enough time elapsed since the last iterator swap, restart
|
||||
if time.Since(swap) > 8*time.Second {
|
||||
key := common.CopyBytes(it.Key())
|
||||
|
||||
it.Release()
|
||||
it = database.NewIteratorWithStart(key)
|
||||
|
||||
log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", time.Since(start))
|
||||
swap = time.Now()
|
||||
}
|
||||
}
|
||||
it.Release()
|
||||
|
||||
// Mark the bloom filter inited and return
|
||||
log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", time.Since(start))
|
||||
atomic.StoreUint32(&b.inited, 1)
|
||||
}
|
||||
|
||||
// meter periodically recalculates the false positive error rate of the bloom
|
||||
// filter and reports it in a metric.
|
||||
func (b *SyncBloom) meter() {
|
||||
for {
|
||||
// Report the current error ration. No floats, lame, scale it up.
|
||||
bloomErrorGauge.Update(int64(b.errorRate() * 100000))
|
||||
|
||||
// Wait one second, but check termination more frequently
|
||||
for i := 0; i < 10; i++ {
|
||||
if atomic.LoadUint32(&b.closed) == 1 {
|
||||
return
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close terminates any background initializer still running and releases all the
|
||||
// memory allocated for the bloom.
|
||||
func (b *SyncBloom) Close() error {
|
||||
b.closer.Do(func() {
|
||||
// Ensure the initializer is stopped
|
||||
atomic.StoreUint32(&b.closed, 1)
|
||||
b.pend.Wait()
|
||||
|
||||
// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
|
||||
log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate())
|
||||
|
||||
atomic.StoreUint32(&b.inited, 0)
|
||||
b.bloom = nil
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add inserts a new trie node hash into the bloom filter.
|
||||
func (b *SyncBloom) Add(hash []byte) {
|
||||
if atomic.LoadUint32(&b.closed) == 1 {
|
||||
return
|
||||
}
|
||||
b.bloom.Add(syncBloomHasher(hash))
|
||||
bloomAddMeter.Mark(1)
|
||||
}
|
||||
|
||||
// Contains tests if the bloom filter contains the given hash:
|
||||
// - false: the bloom definitely does not contain hash
|
||||
// - true: the bloom maybe contains hash
|
||||
//
|
||||
// While the bloom is being initialized, any query will return true.
|
||||
func (b *SyncBloom) Contains(hash []byte) bool {
|
||||
bloomTestMeter.Mark(1)
|
||||
if atomic.LoadUint32(&b.inited) == 0 {
|
||||
// We didn't load all the trie nodes from the previous run of Geth yet. As
|
||||
// such, we can't say for sure if a hash is not present for anything. Until
|
||||
// the init is done, we're faking "possible presence" for everything.
|
||||
return true
|
||||
}
|
||||
// Bloom initialized, check the real one and report any successful misses
|
||||
maybe := b.bloom.Contains(syncBloomHasher(hash))
|
||||
if !maybe {
|
||||
bloomMissMeter.Mark(1)
|
||||
}
|
||||
return maybe
|
||||
}
|
||||
|
||||
// errorRate calculates the probability of a random containment test returning a
|
||||
// false positive.
|
||||
//
|
||||
// We're calculating it ourselves because the bloom library we used missed a
|
||||
// parentheses in the formula and calculates it wrong. And it's discontinued...
|
||||
func (b *SyncBloom) errorRate() float64 {
|
||||
k := float64(b.bloom.K())
|
||||
n := float64(b.bloom.N())
|
||||
m := float64(b.bloom.M())
|
||||
|
||||
return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)
|
||||
}
|
|
@ -94,7 +94,7 @@ func TestEmptySync(t *testing.T) {
|
|||
emptyB, _ := New(emptyRoot, dbB)
|
||||
|
||||
for i, trie := range []*Trie{emptyA, emptyB} {
|
||||
if req := NewSync(trie.Hash(), memorydb.New(), nil).Missing(1); len(req) != 0 {
|
||||
if req := NewSync(trie.Hash(), memorydb.New(), nil, NewSyncBloom(1, memorydb.New())).Missing(1); len(req) != 0 {
|
||||
t.Errorf("test %d: content requested for empty trie: %v", i, req)
|
||||
}
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ func testIterativeSync(t *testing.T, batch int) {
|
|||
// Create a destination trie and sync with the scheduler
|
||||
diskdb := memorydb.New()
|
||||
triedb := NewDatabase(diskdb)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
|
||||
|
||||
queue := append([]common.Hash{}, sched.Missing(batch)...)
|
||||
for len(queue) > 0 {
|
||||
|
@ -145,7 +145,7 @@ func TestIterativeDelayedSync(t *testing.T) {
|
|||
// Create a destination trie and sync with the scheduler
|
||||
diskdb := memorydb.New()
|
||||
triedb := NewDatabase(diskdb)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
|
||||
|
||||
queue := append([]common.Hash{}, sched.Missing(10000)...)
|
||||
for len(queue) > 0 {
|
||||
|
@ -183,7 +183,7 @@ func testIterativeRandomSync(t *testing.T, batch int) {
|
|||
// Create a destination trie and sync with the scheduler
|
||||
diskdb := memorydb.New()
|
||||
triedb := NewDatabase(diskdb)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
|
||||
|
||||
queue := make(map[common.Hash]struct{})
|
||||
for _, hash := range sched.Missing(batch) {
|
||||
|
@ -224,7 +224,7 @@ func TestIterativeRandomDelayedSync(t *testing.T) {
|
|||
// Create a destination trie and sync with the scheduler
|
||||
diskdb := memorydb.New()
|
||||
triedb := NewDatabase(diskdb)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
|
||||
|
||||
queue := make(map[common.Hash]struct{})
|
||||
for _, hash := range sched.Missing(10000) {
|
||||
|
@ -271,7 +271,7 @@ func TestDuplicateAvoidanceSync(t *testing.T) {
|
|||
// Create a destination trie and sync with the scheduler
|
||||
diskdb := memorydb.New()
|
||||
triedb := NewDatabase(diskdb)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
|
||||
|
||||
queue := append([]common.Hash{}, sched.Missing(0)...)
|
||||
requested := make(map[common.Hash]struct{})
|
||||
|
@ -311,7 +311,7 @@ func TestIncompleteSync(t *testing.T) {
|
|||
// Create a destination trie and sync with the scheduler
|
||||
diskdb := memorydb.New()
|
||||
triedb := NewDatabase(diskdb)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil)
|
||||
sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
|
||||
|
||||
var added []common.Hash
|
||||
queue := append([]common.Hash{}, sched.Missing(1)...)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
The MIT License (MIT)
|
||||
Copyright © 2014, 2015 Barry Allard
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,123 @@
|
|||
**Important**: Zeroth, [consider](https://bdupras.github.io/filter-tutorial/) if a [Cuckoo filter](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) could be [right for your use-case](https://github.com/seiflotfy/cuckoofilter).
|
||||
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/steakknife/bloomfilter?status.png)](https://godoc.org/github.com/steakknife/bloomfilter) [![travis](https://img.shields.io/travis/steakknife/bloomfilter.svg)](https://travis-ci.org/steakknife/bloomfilter)
|
||||
|
||||
# Face-meltingly fast, thread-safe, marshalable, unionable, probability- and optimal-size-calculating Bloom filter in go
|
||||
|
||||
Copyright © 2014-2016,2018 Barry Allard
|
||||
|
||||
[MIT license](MIT-LICENSE.txt)
|
||||
|
||||
## WTF is a bloom filter
|
||||
|
||||
**TL;DR: **Probabilistic, extra lookup table to track a set of elements kept elsewhere to reduce expensive, unnecessary set element retrieval and/or iterator operations **when an element is not present in the set.** It's a classic time-storage tradeoff algoritm.
|
||||
|
||||
### Properties
|
||||
|
||||
#### [See wikipedia](https://en.wikipedia.org/wiki/Bloom_filter) for algorithm details
|
||||
|
||||
|Impact|What|Description|
|
||||
|---|---|---|
|
||||
|Good|No false negatives|know for certain if a given element is definitely NOT in the set|
|
||||
|Bad|False positives|uncertain if a given element is in the set|
|
||||
|Bad|Theoretical potential for hash collisions|in very large systems and/or badly hash.Hash64-conforming implementations|
|
||||
|Bad|Add only|Cannot remove an element, it would destroy information about other elements|
|
||||
|Good|Constant storage|uses only a fixed amount of memory|
|
||||
|
||||
## Naming conventions
|
||||
|
||||
(Similar to algorithm)
|
||||
|
||||
|Variable/function|Description|Range|
|
||||
|---|---|---|
|
||||
|m/M()|number of bits in the bloom filter (memory representation is about m/8 bytes in size)|>=2|
|
||||
|n/N()|number of elements present|>=0|
|
||||
|k/K()|number of keys to use (keys are kept private to user code but are de/serialized to Marshal and file I/O)|>=0|
|
||||
|maxN|maximum capacity of intended structure|>0|
|
||||
|p|maximum allowed probability of collision (for computing m and k for optimal sizing)|>0..<1|
|
||||
|
||||
- Memory representation should be exactly `24 + 8*(k + (m+63)/64) + unsafe.Sizeof(RWMutex)` bytes.
|
||||
- Serialized (`BinaryMarshaler`) representation should be exactly `72 + 8*(k + (m+63)/64)` bytes. (Disk format is less due to compression.)
|
||||
|
||||
## Binary serialization format
|
||||
|
||||
All values in Little-endian format
|
||||
|
||||
|Offset|Offset (Hex)|Length (bytes)|Name|Type|
|
||||
|---|---|---|---|---|
|
||||
|0|00|8|k|`uint64`|
|
||||
|8|08|8|n|`uint64`|
|
||||
|16|10|8|m|`uint64`|
|
||||
|24|18|k|(keys)|`[k]uint64`|
|
||||
|24+8*k|...|(m+63)/64|(bloom filter)|`[(m+63)/64]uint64`|
|
||||
|24+8\*k+8\*((m+63)/64)|...|48|(SHA384 of all previous fields, hashed in order)|`[48]byte`|
|
||||
|
||||
- `bloomfilter.Filter` conforms to `encoding.BinaryMarshaler` and `encoding.BinaryUnmarshaler'
|
||||
|
||||
## Usage
|
||||
|
||||
```go
|
||||
|
||||
import "github.com/steakknife/bloomfilter"
|
||||
|
||||
const (
|
||||
maxElements = 100000
|
||||
probCollide = 0.0000001
|
||||
)
|
||||
|
||||
bf, err := bloomfilter.NewOptimal(maxElements, probCollide)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
someValue := ... // must conform to hash.Hash64
|
||||
|
||||
bf.Add(someValue)
|
||||
if bf.Contains(someValue) { // probably true, could be false
|
||||
// whatever
|
||||
}
|
||||
|
||||
anotherValue := ... // must also conform to hash.Hash64
|
||||
|
||||
if bf.Contains(anotherValue) {
|
||||
panic("This should never happen")
|
||||
}
|
||||
|
||||
err := bf.WriteFile("1.bf.gz") // saves this BF to a file
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
bf2, err := bloomfilter.ReadFile("1.bf.gz") // read the BF to another var
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Design
|
||||
|
||||
Where possible, branch-free operations are used to avoid deep pipeline / execution unit stalls on branch-misses.
|
||||
|
||||
## Get
|
||||
|
||||
go get -u github.com/steakknife/bloomfilter # master is always stable
|
||||
|
||||
## Source
|
||||
|
||||
- On the web: [https://github.com/steakknife/bloomfilter](https://github.com/steakknife/bloomfilter)
|
||||
|
||||
- Git: `git clone https://github.com/steakknife/bloomfilter`
|
||||
|
||||
## Contact
|
||||
|
||||
- [Feedback](mailto:barry.allard@gmail.com)
|
||||
|
||||
- [Issues](https://github.com/steakknife/bloomfilter/issues)
|
||||
|
||||
## License
|
||||
|
||||
[MIT license](MIT-LICENSE.txt)
|
||||
|
||||
Copyright © 2014-2016 Barry Allard
|
|
@ -0,0 +1,87 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha512"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// conforms to encoding.BinaryMarshaler
|
||||
|
||||
// marshalled binary layout (Little Endian):
|
||||
//
|
||||
// k 1 uint64
|
||||
// n 1 uint64
|
||||
// m 1 uint64
|
||||
// keys [k]uint64
|
||||
// bits [(m+63)/64]uint64
|
||||
// hash sha384 (384 bits == 48 bytes)
|
||||
//
|
||||
// size = (3 + k + (m+63)/64) * 8 bytes
|
||||
//
|
||||
|
||||
func (f *Filter) marshal() (buf *bytes.Buffer,
|
||||
hash [sha512.Size384]byte,
|
||||
err error,
|
||||
) {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
debug("write bf k=%d n=%d m=%d\n", f.K(), f.n, f.m)
|
||||
|
||||
buf = new(bytes.Buffer)
|
||||
|
||||
err = binary.Write(buf, binary.LittleEndian, f.K())
|
||||
if err != nil {
|
||||
return nil, hash, err
|
||||
}
|
||||
|
||||
err = binary.Write(buf, binary.LittleEndian, f.n)
|
||||
if err != nil {
|
||||
return nil, hash, err
|
||||
}
|
||||
|
||||
err = binary.Write(buf, binary.LittleEndian, f.m)
|
||||
if err != nil {
|
||||
return nil, hash, err
|
||||
}
|
||||
|
||||
err = binary.Write(buf, binary.LittleEndian, f.keys)
|
||||
if err != nil {
|
||||
return nil, hash, err
|
||||
}
|
||||
|
||||
err = binary.Write(buf, binary.LittleEndian, f.bits)
|
||||
if err != nil {
|
||||
return nil, hash, err
|
||||
}
|
||||
|
||||
hash = sha512.Sum384(buf.Bytes())
|
||||
err = binary.Write(buf, binary.LittleEndian, hash)
|
||||
return buf, hash, err
|
||||
}
|
||||
|
||||
// MarshalBinary converts a Filter into []bytes
|
||||
func (f *Filter) MarshalBinary() (data []byte, err error) {
|
||||
buf, hash, err := f.marshal()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
debug(
|
||||
"bloomfilter.MarshalBinary: Successfully wrote %d byte(s), sha384 %v",
|
||||
buf.Len(), hash,
|
||||
)
|
||||
data = buf.Bytes()
|
||||
return data, nil
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/hmac"
|
||||
"crypto/sha512"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
)
|
||||
|
||||
func unmarshalBinaryHeader(r io.Reader) (k, n, m uint64, err error) {
|
||||
err = binary.Read(r, binary.LittleEndian, &k)
|
||||
if err != nil {
|
||||
return k, n, m, err
|
||||
}
|
||||
|
||||
if k < KMin {
|
||||
return k, n, m, errK()
|
||||
}
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &n)
|
||||
if err != nil {
|
||||
return k, n, m, err
|
||||
}
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &m)
|
||||
if err != nil {
|
||||
return k, n, m, err
|
||||
}
|
||||
|
||||
if m < MMin {
|
||||
return k, n, m, errM()
|
||||
}
|
||||
|
||||
debug("read bf k=%d n=%d m=%d\n", k, n, m)
|
||||
|
||||
return k, n, m, err
|
||||
}
|
||||
|
||||
func unmarshalBinaryBits(r io.Reader, m uint64) (bits []uint64, err error) {
|
||||
bits, err = newBits(m)
|
||||
if err != nil {
|
||||
return bits, err
|
||||
}
|
||||
err = binary.Read(r, binary.LittleEndian, bits)
|
||||
return bits, err
|
||||
|
||||
}
|
||||
|
||||
func unmarshalBinaryKeys(r io.Reader, k uint64) (keys []uint64, err error) {
|
||||
keys = make([]uint64, k)
|
||||
err = binary.Read(r, binary.LittleEndian, keys)
|
||||
return keys, err
|
||||
}
|
||||
|
||||
func checkBinaryHash(r io.Reader, data []byte) (err error) {
|
||||
expectedHash := make([]byte, sha512.Size384)
|
||||
err = binary.Read(r, binary.LittleEndian, expectedHash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
actualHash := sha512.Sum384(data[:len(data)-sha512.Size384])
|
||||
|
||||
if !hmac.Equal(expectedHash, actualHash[:]) {
|
||||
debug("bloomfilter.UnmarshalBinary() sha384 hash failed:"+
|
||||
" actual %v expected %v", actualHash, expectedHash)
|
||||
return errHash()
|
||||
}
|
||||
|
||||
debug("bloomfilter.UnmarshalBinary() successfully read"+
|
||||
" %d byte(s), sha384 %v", len(data), actualHash)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalBinary converts []bytes into a Filter
|
||||
// conforms to encoding.BinaryUnmarshaler
|
||||
func (f *Filter) UnmarshalBinary(data []byte) (err error) {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
buf := bytes.NewBuffer(data)
|
||||
|
||||
var k uint64
|
||||
k, f.n, f.m, err = unmarshalBinaryHeader(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f.keys, err = unmarshalBinaryKeys(buf, k)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f.bits, err = unmarshalBinaryBits(buf, f.m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return checkBinaryHash(buf, data)
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"hash"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Filter is an opaque Bloom filter type
|
||||
type Filter struct {
|
||||
lock sync.RWMutex
|
||||
bits []uint64
|
||||
keys []uint64
|
||||
m uint64 // number of bits the "bits" field should recognize
|
||||
n uint64 // number of inserted elements
|
||||
}
|
||||
|
||||
// Hashable -> hashes
|
||||
func (f *Filter) hash(v hash.Hash64) []uint64 {
|
||||
rawHash := v.Sum64()
|
||||
n := len(f.keys)
|
||||
hashes := make([]uint64, n)
|
||||
for i := 0; i < n; i++ {
|
||||
hashes[i] = rawHash ^ f.keys[i]
|
||||
}
|
||||
return hashes
|
||||
}
|
||||
|
||||
// M is the size of Bloom filter, in bits
|
||||
func (f *Filter) M() uint64 {
|
||||
return f.m
|
||||
}
|
||||
|
||||
// K is the count of keys
|
||||
func (f *Filter) K() uint64 {
|
||||
return uint64(len(f.keys))
|
||||
}
|
||||
|
||||
// Add a hashable item, v, to the filter
|
||||
func (f *Filter) Add(v hash.Hash64) {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
for _, i := range f.hash(v) {
|
||||
// f.setBit(i)
|
||||
i %= f.m
|
||||
f.bits[i>>6] |= 1 << uint(i&0x3f)
|
||||
}
|
||||
f.n++
|
||||
}
|
||||
|
||||
// Contains tests if f contains v
|
||||
// false: f definitely does not contain value v
|
||||
// true: f maybe contains value v
|
||||
func (f *Filter) Contains(v hash.Hash64) bool {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
r := uint64(1)
|
||||
for _, i := range f.hash(v) {
|
||||
// r |= f.getBit(k)
|
||||
i %= f.m
|
||||
r &= (f.bits[i>>6] >> uint(i&0x3f)) & 1
|
||||
}
|
||||
return uint64ToBool(r)
|
||||
}
|
||||
|
||||
// Copy f to a new Bloom filter
|
||||
func (f *Filter) Copy() (*Filter, error) {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
out, err := f.NewCompatible()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copy(out.bits, f.bits)
|
||||
out.n = f.n
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// UnionInPlace merges Bloom filter f2 into f
|
||||
func (f *Filter) UnionInPlace(f2 *Filter) error {
|
||||
if !f.IsCompatible(f2) {
|
||||
return errIncompatibleBloomFilters()
|
||||
}
|
||||
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
for i, bitword := range f2.bits {
|
||||
f.bits[i] |= bitword
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Union merges f2 and f2 into a new Filter out
|
||||
func (f *Filter) Union(f2 *Filter) (out *Filter, err error) {
|
||||
if !f.IsCompatible(f2) {
|
||||
return nil, errIncompatibleBloomFilters()
|
||||
}
|
||||
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
out, err = f.NewCompatible()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for i, bitword := range f2.bits {
|
||||
out.bits[i] = f.bits[i] | bitword
|
||||
}
|
||||
return out, nil
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"encoding"
|
||||
"encoding/gob"
|
||||
"io"
|
||||
)
|
||||
|
||||
// compile-time conformance tests
|
||||
var (
|
||||
_ encoding.BinaryMarshaler = (*Filter)(nil)
|
||||
_ encoding.BinaryUnmarshaler = (*Filter)(nil)
|
||||
_ encoding.TextMarshaler = (*Filter)(nil)
|
||||
_ encoding.TextUnmarshaler = (*Filter)(nil)
|
||||
_ io.ReaderFrom = (*Filter)(nil)
|
||||
_ io.WriterTo = (*Filter)(nil)
|
||||
_ gob.GobDecoder = (*Filter)(nil)
|
||||
_ gob.GobEncoder = (*Filter)(nil)
|
||||
)
|
|
@ -0,0 +1,37 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
const debugVar = "GOLANG_STEAKKNIFE_BLOOMFILTER_DEBUG"
|
||||
|
||||
// EnableDebugging permits debug() logging of details to stderr
|
||||
func EnableDebugging() {
|
||||
err := os.Setenv(debugVar, "1")
|
||||
if err != nil {
|
||||
panic("Unable to Setenv " + debugVar)
|
||||
}
|
||||
}
|
||||
|
||||
func debugging() bool {
|
||||
return os.Getenv(debugVar) != ""
|
||||
}
|
||||
|
||||
// debug printing when debugging() is true
|
||||
func debug(format string, a ...interface{}) {
|
||||
if debugging() {
|
||||
log.Printf(format, a...)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import "fmt"
|
||||
|
||||
func errHash() error {
|
||||
return fmt.Errorf(
|
||||
"Hash mismatch, the Bloom filter is probably corrupt")
|
||||
}
|
||||
func errK() error {
|
||||
return fmt.Errorf(
|
||||
"keys must have length %d or greater", KMin)
|
||||
}
|
||||
func errM() error {
|
||||
return fmt.Errorf(
|
||||
"m (number of bits in the Bloom filter) must be >= %d", MMin)
|
||||
}
|
||||
func errUniqueKeys() error {
|
||||
return fmt.Errorf(
|
||||
"Bloom filter keys must be unique")
|
||||
}
|
||||
func errIncompatibleBloomFilters() error {
|
||||
return fmt.Errorf(
|
||||
"Cannot perform union on two incompatible Bloom filters")
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
)
|
||||
|
||||
// ReadFrom r and overwrite f with new Bloom filter data
|
||||
func (f *Filter) ReadFrom(r io.Reader) (n int64, err error) {
|
||||
f2, n, err := ReadFrom(r)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
f.m = f2.m
|
||||
f.n = f2.n
|
||||
f.bits = f2.bits
|
||||
f.keys = f2.keys
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// ReadFrom Reader r into a lossless-compressed Bloom filter f
|
||||
func ReadFrom(r io.Reader) (f *Filter, n int64, err error) {
|
||||
rawR, err := gzip.NewReader(r)
|
||||
if err != nil {
|
||||
return nil, -1, err
|
||||
}
|
||||
defer func() {
|
||||
err = rawR.Close()
|
||||
}()
|
||||
|
||||
content, err := ioutil.ReadAll(rawR)
|
||||
if err != nil {
|
||||
return nil, -1, err
|
||||
}
|
||||
|
||||
f = new(Filter)
|
||||
n = int64(len(content))
|
||||
err = f.UnmarshalBinary(content)
|
||||
if err != nil {
|
||||
return nil, -1, err
|
||||
}
|
||||
return f, n, nil
|
||||
}
|
||||
|
||||
// ReadFile from filename into a lossless-compressed Bloom Filter f
|
||||
// Suggested file extension: .bf.gz
|
||||
func ReadFile(filename string) (f *Filter, n int64, err error) {
|
||||
r, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return nil, -1, err
|
||||
}
|
||||
defer func() {
|
||||
err = r.Close()
|
||||
}()
|
||||
|
||||
return ReadFrom(r)
|
||||
}
|
||||
|
||||
// WriteTo a Writer w from lossless-compressed Bloom Filter f
|
||||
func (f *Filter) WriteTo(w io.Writer) (n int64, err error) {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
rawW := gzip.NewWriter(w)
|
||||
defer func() {
|
||||
err = rawW.Close()
|
||||
}()
|
||||
|
||||
content, err := f.MarshalBinary()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
intN, err := rawW.Write(content)
|
||||
n = int64(intN)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// WriteFile filename from a a lossless-compressed Bloom Filter f
|
||||
// Suggested file extension: .bf.gz
|
||||
func (f *Filter) WriteFile(filename string) (n int64, err error) {
|
||||
w, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
defer func() {
|
||||
err = w.Close()
|
||||
}()
|
||||
|
||||
return f.WriteTo(w)
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import _ "encoding/gob" // make sure gob is available
|
||||
|
||||
// GobDecode conforms to interface gob.GobDecoder
|
||||
func (f *Filter) GobDecode(data []byte) error {
|
||||
return f.UnmarshalBinary(data)
|
||||
}
|
||||
|
||||
// GobEncode conforms to interface gob.GobEncoder
|
||||
func (f *Filter) GobEncode() ([]byte, error) {
|
||||
return f.MarshalBinary()
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func uint64ToBool(x uint64) bool {
|
||||
return *(*bool)(unsafe.Pointer(&x)) // #nosec
|
||||
}
|
||||
|
||||
// returns 0 if equal, does not compare len(b0) with len(b1)
|
||||
func noBranchCompareUint64s(b0, b1 []uint64) uint64 {
|
||||
r := uint64(0)
|
||||
for i, b0i := range b0 {
|
||||
r |= b0i ^ b1[i]
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// IsCompatible is true if f and f2 can be Union()ed together
|
||||
func (f *Filter) IsCompatible(f2 *Filter) bool {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
f.lock.RLock()
|
||||
defer f2.lock.RUnlock()
|
||||
|
||||
// 0 is true, non-0 is false
|
||||
compat := f.M() ^ f2.M()
|
||||
compat |= f.K() ^ f2.K()
|
||||
compat |= noBranchCompareUint64s(f.keys, f2.keys)
|
||||
return uint64ToBool(^compat)
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"log"
|
||||
)
|
||||
|
||||
const (
|
||||
// MMin is the minimum Bloom filter bits count
|
||||
MMin = 2
|
||||
// KMin is the minimum number of keys
|
||||
KMin = 1
|
||||
// Uint64Bytes is the number of bytes in type uint64
|
||||
Uint64Bytes = 8
|
||||
)
|
||||
|
||||
// New Filter with CSPRNG keys
|
||||
//
|
||||
// m is the size of the Bloom filter, in bits, >= 2
|
||||
//
|
||||
// k is the number of random keys, >= 1
|
||||
func New(m, k uint64) (*Filter, error) {
|
||||
return NewWithKeys(m, newRandKeys(k))
|
||||
}
|
||||
|
||||
func newRandKeys(k uint64) []uint64 {
|
||||
keys := make([]uint64, k)
|
||||
err := binary.Read(rand.Reader, binary.LittleEndian, keys)
|
||||
if err != nil {
|
||||
log.Panicf(
|
||||
"Cannot read %d bytes from CSRPNG crypto/rand.Read (err=%v)",
|
||||
Uint64Bytes, err,
|
||||
)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
// NewCompatible Filter compatible with f
|
||||
func (f *Filter) NewCompatible() (*Filter, error) {
|
||||
return NewWithKeys(f.m, f.keys)
|
||||
}
|
||||
|
||||
// NewOptimal Bloom filter with random CSPRNG keys
|
||||
func NewOptimal(maxN uint64, p float64) (*Filter, error) {
|
||||
m := OptimalM(maxN, p)
|
||||
k := OptimalK(m, maxN)
|
||||
debug("New optimal bloom filter ::"+
|
||||
" requested max elements (n):%d,"+
|
||||
" probability of collision (p):%1.10f "+
|
||||
"-> recommends -> bits (m): %d (%f GiB), "+
|
||||
"number of keys (k): %d",
|
||||
maxN, p, m, float64(m)/(gigabitsPerGiB), k)
|
||||
return New(m, k)
|
||||
}
|
||||
|
||||
// UniqueKeys is true if all keys are unique
|
||||
func UniqueKeys(keys []uint64) bool {
|
||||
for j := 0; j < len(keys)-1; j++ {
|
||||
elem := keys[j]
|
||||
for i := 1; i < j; i++ {
|
||||
if keys[i] == elem {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// NewWithKeys creates a new Filter from user-supplied origKeys
|
||||
func NewWithKeys(m uint64, origKeys []uint64) (f *Filter, err error) {
|
||||
bits, err := newBits(m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
keys, err := newKeysCopy(origKeys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Filter{
|
||||
m: m,
|
||||
n: 0,
|
||||
bits: bits,
|
||||
keys: keys,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newBits(m uint64) ([]uint64, error) {
|
||||
if m < MMin {
|
||||
return nil, errM()
|
||||
}
|
||||
return make([]uint64, (m+63)/64), nil
|
||||
}
|
||||
|
||||
func newKeysBlank(k uint64) ([]uint64, error) {
|
||||
if k < KMin {
|
||||
return nil, errK()
|
||||
}
|
||||
return make([]uint64, k), nil
|
||||
}
|
||||
|
||||
func newKeysCopy(origKeys []uint64) (keys []uint64, err error) {
|
||||
if !UniqueKeys(origKeys) {
|
||||
return nil, errUniqueKeys()
|
||||
}
|
||||
keys, err = newKeysBlank(uint64(len(origKeys)))
|
||||
if err != nil {
|
||||
return keys, err
|
||||
}
|
||||
copy(keys, origKeys)
|
||||
return keys, err
|
||||
}
|
||||
|
||||
func newWithKeysAndBits(m uint64, keys []uint64, bits []uint64, n uint64) (
|
||||
f *Filter, err error,
|
||||
) {
|
||||
f, err = NewWithKeys(m, keys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copy(f.bits, bits)
|
||||
f.n = n
|
||||
return f, nil
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import "math"
|
||||
|
||||
const gigabitsPerGiB float64 = 8.0 * 1024 * 1024 * 1024
|
||||
|
||||
// OptimalK calculates the optimal k value for creating a new Bloom filter
|
||||
// maxn is the maximum anticipated number of elements
|
||||
func OptimalK(m, maxN uint64) uint64 {
|
||||
return uint64(math.Ceil(float64(m) * math.Ln2 / float64(maxN)))
|
||||
}
|
||||
|
||||
// OptimalM calculates the optimal m value for creating a new Bloom filter
|
||||
// p is the desired false positive probability
|
||||
// optimal m = ceiling( - n * ln(p) / ln(2)**2 )
|
||||
func OptimalM(maxN uint64, p float64) uint64 {
|
||||
return uint64(math.Ceil(-float64(maxN) * math.Log(p) / (math.Ln2 * math.Ln2)))
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/steakknife/hamming"
|
||||
)
|
||||
|
||||
// PreciseFilledRatio is an exhaustive count # of 1's
|
||||
func (f *Filter) PreciseFilledRatio() float64 {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
return float64(hamming.CountBitsUint64s(f.bits)) / float64(f.M())
|
||||
}
|
||||
|
||||
// N is how many elements have been inserted
|
||||
// (actually, how many Add()s have been performed?)
|
||||
func (f *Filter) N() uint64 {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
return f.n
|
||||
}
|
||||
|
||||
// FalsePosititveProbability is the upper-bound probability of false positives
|
||||
// (1 - exp(-k*(n+0.5)/(m-1))) ** k
|
||||
func (f *Filter) FalsePosititveProbability() float64 {
|
||||
k := float64(f.K())
|
||||
n := float64(f.N())
|
||||
m := float64(f.M())
|
||||
return math.Pow(1.0-math.Exp(-k)*(n+0.5)/(m-1), k)
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import "fmt"
|
||||
|
||||
// MarshalText conforms to encoding.TextMarshaler
|
||||
func (f *Filter) MarshalText() (text []byte, err error) {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
s := fmt.Sprintln("k")
|
||||
s += fmt.Sprintln(f.K())
|
||||
s += fmt.Sprintln("n")
|
||||
s += fmt.Sprintln(f.n)
|
||||
s += fmt.Sprintln("m")
|
||||
s += fmt.Sprintln(f.m)
|
||||
|
||||
s += fmt.Sprintln("keys")
|
||||
for key := range f.keys {
|
||||
s += fmt.Sprintf(keyFormat, key) + nl()
|
||||
}
|
||||
|
||||
s += fmt.Sprintln("bits")
|
||||
for w := range f.bits {
|
||||
s += fmt.Sprintf(bitsFormat, w) + nl()
|
||||
}
|
||||
|
||||
_, hash, err := f.marshal()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s += fmt.Sprintln("sha384")
|
||||
for b := range hash {
|
||||
s += fmt.Sprintf("%02x", b)
|
||||
}
|
||||
s += nl()
|
||||
|
||||
text = []byte(s)
|
||||
return text, nil
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
// Package bloomfilter is face-meltingly fast, thread-safe,
|
||||
// marshalable, unionable, probability- and
|
||||
// optimal-size-calculating Bloom filter in go
|
||||
//
|
||||
// https://github.com/steakknife/bloomfilter
|
||||
//
|
||||
// Copyright © 2014, 2015, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package bloomfilter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/hmac"
|
||||
"crypto/sha512"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
const (
|
||||
keyFormat = "%016x"
|
||||
bitsFormat = "%016x"
|
||||
)
|
||||
|
||||
func nl() string {
|
||||
return fmt.Sprintln()
|
||||
}
|
||||
|
||||
func unmarshalTextHeader(r io.Reader) (k, n, m uint64, err error) {
|
||||
format := "k" + nl() + "%d" + nl()
|
||||
format += "n" + nl() + "%d" + nl()
|
||||
format += "m" + nl() + "%d" + nl()
|
||||
format += "keys" + nl()
|
||||
|
||||
_, err = fmt.Fscanf(r, format, k, n, m)
|
||||
return k, n, m, err
|
||||
}
|
||||
|
||||
func unmarshalTextKeys(r io.Reader, keys []uint64) (err error) {
|
||||
for i := range keys {
|
||||
_, err = fmt.Fscanf(r, keyFormat, keys[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalTextBits(r io.Reader, bits []uint64) (err error) {
|
||||
_, err = fmt.Fscanf(r, "bits")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := range bits {
|
||||
_, err = fmt.Fscanf(r, bitsFormat, bits[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalAndCheckTextHash(r io.Reader, f *Filter) (err error) {
|
||||
_, err = fmt.Fscanf(r, "sha384")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
actualHash := [sha512.Size384]byte{}
|
||||
|
||||
for i := range actualHash {
|
||||
_, err = fmt.Fscanf(r, "%02x", actualHash[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
_, expectedHash, err := f.marshal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !hmac.Equal(expectedHash[:], actualHash[:]) {
|
||||
return errHash()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalText conforms to TextUnmarshaler
|
||||
func UnmarshalText(text []byte) (f *Filter, err error) {
|
||||
r := bytes.NewBuffer(text)
|
||||
k, n, m, err := unmarshalTextHeader(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
keys, err := newKeysBlank(k)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = unmarshalTextKeys(r, keys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bits, err := newBits(m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = unmarshalTextBits(r, bits)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f, err = newWithKeysAndBits(m, keys, bits, n)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = unmarshalAndCheckTextHash(r, f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// UnmarshalText method overwrites f with data decoded from text
|
||||
func (f *Filter) UnmarshalText(text []byte) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
f2, err := UnmarshalText(text)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f.m = f2.m
|
||||
f.n = f2.n
|
||||
copy(f.bits, f2.bits)
|
||||
copy(f.keys, f2.keys)
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
The MIT License (MIT)
|
||||
Copyright © 2014, 2015, 2016 Barry Allard
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,82 @@
|
|||
[![GoDoc](https://godoc.org/github.com/steakknife/hamming?status.png)](https://godoc.org/github.com/steakknife/hamming) [![Build Status](https://travis-ci.org/steakknife/hamming.svg?branch=master)](https://travis-ci.org/steakknife/hamming)
|
||||
|
||||
|
||||
# hamming distance calculations in Go
|
||||
|
||||
Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
|
||||
[MIT license](MIT-LICENSE.txt)
|
||||
|
||||
## Performance
|
||||
|
||||
```
|
||||
$ go test -bench=.
|
||||
BenchmarkCountBitsInt8PopCnt-4 300000000 4.30 ns/op
|
||||
BenchmarkCountBitsInt16PopCnt-4 300000000 3.83 ns/op
|
||||
BenchmarkCountBitsInt32PopCnt-4 300000000 3.64 ns/op
|
||||
BenchmarkCountBitsInt64PopCnt-4 500000000 3.60 ns/op
|
||||
BenchmarkCountBitsIntPopCnt-4 300000000 5.72 ns/op
|
||||
BenchmarkCountBitsUint8PopCnt-4 1000000000 2.98 ns/op
|
||||
BenchmarkCountBitsUint16PopCnt-4 500000000 3.23 ns/op
|
||||
BenchmarkCountBitsUint32PopCnt-4 500000000 3.00 ns/op
|
||||
BenchmarkCountBitsUint64PopCnt-4 1000000000 2.94 ns/op
|
||||
BenchmarkCountBitsUintPopCnt-4 300000000 5.04 ns/op
|
||||
BenchmarkCountBitsBytePopCnt-4 300000000 3.99 ns/op
|
||||
BenchmarkCountBitsRunePopCnt-4 300000000 3.83 ns/op
|
||||
BenchmarkCountBitsInt8-4 2000000000 0.74 ns/op
|
||||
BenchmarkCountBitsInt16-4 2000000000 1.54 ns/op
|
||||
BenchmarkCountBitsInt32-4 1000000000 2.63 ns/op
|
||||
BenchmarkCountBitsInt64-4 1000000000 2.56 ns/op
|
||||
BenchmarkCountBitsInt-4 200000000 7.23 ns/op
|
||||
BenchmarkCountBitsUint16-4 2000000000 1.51 ns/op
|
||||
BenchmarkCountBitsUint32-4 500000000 4.00 ns/op
|
||||
BenchmarkCountBitsUint64-4 1000000000 2.64 ns/op
|
||||
BenchmarkCountBitsUint64Alt-4 200000000 7.60 ns/op
|
||||
BenchmarkCountBitsUint-4 300000000 5.48 ns/op
|
||||
BenchmarkCountBitsUintReference-4 100000000 19.2 ns/op
|
||||
BenchmarkCountBitsByte-4 2000000000 0.75 ns/op
|
||||
BenchmarkCountBitsByteAlt-4 1000000000 2.37 ns/op
|
||||
BenchmarkCountBitsRune-4 500000000 2.85 ns/op
|
||||
PASS
|
||||
ok _/Users/bmf/Projects/hamming 58.305s
|
||||
$
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```go
|
||||
import 'github.com/steakknife/hamming'
|
||||
|
||||
// ...
|
||||
|
||||
// hamming distance between values
|
||||
hamming.Byte(0xFF, 0x00) // 8
|
||||
hamming.Byte(0x00, 0x00) // 0
|
||||
|
||||
// just count bits in a byte
|
||||
hamming.CountBitsByte(0xA5), // 4
|
||||
```
|
||||
|
||||
See help in the [docs](https://godoc.org/github.com/steakknife/hamming)
|
||||
|
||||
## Get
|
||||
|
||||
go get -u github.com/steakknife/hamming # master is always stable
|
||||
|
||||
## Source
|
||||
|
||||
- On the web: https://github.com/steakknife/hamming
|
||||
|
||||
- Git: `git clone https://github.com/steakknife/hamming`
|
||||
|
||||
## Contact
|
||||
|
||||
- [Feedback](mailto:barry.allard@gmail.com)
|
||||
|
||||
- [Issues](https://github.com/steakknife/hamming/issues)
|
||||
|
||||
## License
|
||||
|
||||
[MIT license](MIT-LICENSE.txt)
|
||||
|
||||
Copyright © 2014, 2015, 2016 Barry Allard
|
|
@ -0,0 +1,35 @@
|
|||
//
|
||||
// Package hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
//
|
||||
// Usage
|
||||
//
|
||||
// For functions named CountBits.+s?. The plural forms are for slices.
|
||||
// The CountBits.+ forms are Population Count only, where the bare-type
|
||||
// forms are Hamming distance (number of bits different) between two values.
|
||||
//
|
||||
// Optimized assembly .+PopCnt forms are available on amd64, and operate just
|
||||
// like the regular forms (Must check and guard on HasPopCnt() first before
|
||||
// trying to call .+PopCnt functions).
|
||||
//
|
||||
// import 'github.com/steakknife/hamming'
|
||||
//
|
||||
// // ...
|
||||
//
|
||||
// // hamming distance between values
|
||||
// hamming.Byte(0xFF, 0x00) // 8
|
||||
// hamming.Byte(0x00, 0x00) // 0
|
||||
//
|
||||
// // just count bits in a byte
|
||||
// hamming.CountBitsByte(0xA5), // 4
|
||||
//
|
||||
// Got rune? use int32
|
||||
// Got uint8? use byte
|
||||
//
|
||||
package hamming
|
|
@ -0,0 +1,70 @@
|
|||
//
|
||||
// Package hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package hamming
|
||||
|
||||
// Int8 hamming distance of two int8's
|
||||
func Int8(x, y int8) int {
|
||||
return CountBitsInt8(x ^ y)
|
||||
}
|
||||
|
||||
// Int16 hamming distance of two int16's
|
||||
func Int16(x, y int16) int {
|
||||
return CountBitsInt16(x ^ y)
|
||||
}
|
||||
|
||||
// Int32 hamming distance of two int32's
|
||||
func Int32(x, y int32) int {
|
||||
return CountBitsInt32(x ^ y)
|
||||
}
|
||||
|
||||
// Int64 hamming distance of two int64's
|
||||
func Int64(x, y int64) int {
|
||||
return CountBitsInt64(x ^ y)
|
||||
}
|
||||
|
||||
// Int hamming distance of two ints
|
||||
func Int(x, y int) int {
|
||||
return CountBitsInt(x ^ y)
|
||||
}
|
||||
|
||||
// Uint8 hamming distance of two uint8's
|
||||
func Uint8(x, y uint8) int {
|
||||
return CountBitsUint8(x ^ y)
|
||||
}
|
||||
|
||||
// Uint16 hamming distance of two uint16's
|
||||
func Uint16(x, y uint16) int {
|
||||
return CountBitsUint16(x ^ y)
|
||||
}
|
||||
|
||||
// Uint32 hamming distance of two uint32's
|
||||
func Uint32(x, y uint32) int {
|
||||
return CountBitsUint32(x ^ y)
|
||||
}
|
||||
|
||||
// Uint64 hamming distance of two uint64's
|
||||
func Uint64(x, y uint64) int {
|
||||
return CountBitsUint64(x ^ y)
|
||||
}
|
||||
|
||||
// Uint hamming distance of two uint's
|
||||
func Uint(x, y uint) int {
|
||||
return CountBitsUint(x ^ y)
|
||||
}
|
||||
|
||||
// Byte hamming distance of two bytes
|
||||
func Byte(x, y byte) int {
|
||||
return CountBitsByte(x ^ y)
|
||||
}
|
||||
|
||||
// Rune hamming distance of two runes
|
||||
func Rune(x, y rune) int {
|
||||
return CountBitsRune(x ^ y)
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
//
|
||||
// Package hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package hamming
|
||||
|
||||
import "strconv"
|
||||
|
||||
// HasPopCnt returns true if *PopCnt functions are callable
|
||||
func HasPopCnt() (ret bool)
|
||||
|
||||
// CountBitsInt8PopCnt count 1's in x
|
||||
func CountBitsInt8PopCnt(x int8) (ret int)
|
||||
|
||||
// CountBitsInt16PopCnt count 1's in x
|
||||
func CountBitsInt16PopCnt(x int16) (ret int)
|
||||
|
||||
// CountBitsInt32PopCnt count 1's in x
|
||||
func CountBitsInt32PopCnt(x int32) (ret int)
|
||||
|
||||
// CountBitsInt64PopCnt count 1's in x
|
||||
func CountBitsInt64PopCnt(x int64) (ret int)
|
||||
|
||||
// CountBitsIntPopCnt count 1's in x
|
||||
func CountBitsIntPopCnt(x int) int {
|
||||
if strconv.IntSize == 64 {
|
||||
return CountBitsInt64PopCnt(int64(x))
|
||||
} else if strconv.IntSize == 32 {
|
||||
return CountBitsInt32PopCnt(int32(x))
|
||||
}
|
||||
panic("strconv.IntSize must be 32 or 64")
|
||||
}
|
||||
|
||||
// CountBitsUint8PopCnt count 1's in x
|
||||
func CountBitsUint8PopCnt(x uint8) (ret int)
|
||||
|
||||
// CountBitsUint16PopCnt count 1's in x
|
||||
func CountBitsUint16PopCnt(x uint16) (ret int)
|
||||
|
||||
// CountBitsUint32PopCnt count 1's in x
|
||||
func CountBitsUint32PopCnt(x uint32) (ret int)
|
||||
|
||||
// CountBitsUint64PopCnt count 1's in x
|
||||
func CountBitsUint64PopCnt(x uint64) (ret int)
|
||||
|
||||
// CountBitsUintPopCnt count 1's in x
|
||||
func CountBitsUintPopCnt(x uint) int {
|
||||
if strconv.IntSize == 64 {
|
||||
return CountBitsUint64PopCnt(uint64(x))
|
||||
} else if strconv.IntSize == 32 {
|
||||
return CountBitsUint32PopCnt(uint32(x))
|
||||
}
|
||||
panic("strconv.IntSize must be 32 or 64")
|
||||
}
|
||||
|
||||
// CountBitsBytePopCnt count 1's in x
|
||||
func CountBitsBytePopCnt(x byte) (ret int)
|
||||
|
||||
// CountBitsRunePopCnt count 1's in x
|
||||
func CountBitsRunePopCnt(x rune) (ret int)
|
|
@ -0,0 +1,64 @@
|
|||
//
|
||||
// hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·CountBitsInt8PopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsBytePopCnt(SB)
|
||||
|
||||
TEXT ·CountBitsInt16PopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint16PopCnt(SB)
|
||||
|
||||
TEXT ·CountBitsInt32PopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint32PopCnt(SB)
|
||||
|
||||
TEXT ·CountBitsInt64PopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint64PopCnt(SB)
|
||||
|
||||
TEXT ·CountBitsBytePopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint8PopCnt(SB)
|
||||
|
||||
TEXT ·CountBitsRunePopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint32PopCnt(SB)
|
||||
|
||||
TEXT ·CountBitsUint8PopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX
|
||||
MOVB x+0(FP), AX
|
||||
POPCNTQ AX, AX
|
||||
MOVQ AX, ret+8(FP)
|
||||
RET
|
||||
|
||||
TEXT ·CountBitsUint16PopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX
|
||||
MOVW x+0(FP), AX
|
||||
POPCNTQ AX, AX
|
||||
MOVQ AX, ret+8(FP)
|
||||
RET
|
||||
|
||||
TEXT ·CountBitsUint32PopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX
|
||||
MOVL x+0(FP), AX
|
||||
POPCNTQ AX, AX
|
||||
MOVQ AX, ret+8(FP)
|
||||
RET
|
||||
|
||||
TEXT ·CountBitsUint64PopCnt(SB),NOSPLIT,$0
|
||||
POPCNTQ x+0(FP), AX
|
||||
MOVQ AX, ret+8(FP)
|
||||
RET
|
||||
|
||||
// func hasPopCnt() (ret bool)
|
||||
TEXT ·HasPopCnt(SB),NOSPLIT,$0
|
||||
MOVL $1, AX
|
||||
CPUID
|
||||
SHRL $23, CX // bit 23: Advanced Bit Manipulation Bit (ABM) -> POPCNTQ
|
||||
ANDL $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
|
@ -0,0 +1,134 @@
|
|||
//
|
||||
// Package hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package hamming
|
||||
|
||||
import "strconv"
|
||||
|
||||
// References: check out Hacker's Delight, about p. 70
|
||||
|
||||
func table() [256]uint8 {
|
||||
return [256]uint8{
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
|
||||
}
|
||||
}
|
||||
|
||||
// CountBitsByteAlt table-less, branch-free implementation
|
||||
func CountBitsByteAlt(x byte) int {
|
||||
x = (x & 0x55) + ((x >> 1) & 0x55)
|
||||
x = (x & 0x33) + ((x >> 2) & 0x33)
|
||||
return int((x & 0x0f) + ((x >> 4) & 0x0f))
|
||||
}
|
||||
|
||||
// CountBitsInt8 count 1's in x
|
||||
func CountBitsInt8(x int8) int { return CountBitsByte(byte(x)) }
|
||||
|
||||
// CountBitsInt16 count 1's in x
|
||||
func CountBitsInt16(x int16) int { return CountBitsUint16(uint16(x)) }
|
||||
|
||||
// CountBitsInt32 count 1's in x
|
||||
func CountBitsInt32(x int32) int { return CountBitsUint32(uint32(x)) }
|
||||
|
||||
// CountBitsInt64 count 1's in x
|
||||
func CountBitsInt64(x int64) int { return CountBitsUint64(uint64(x)) }
|
||||
|
||||
// CountBitsInt count 1's in x
|
||||
func CountBitsInt(x int) int { return CountBitsUint(uint(x)) }
|
||||
|
||||
// CountBitsByte count 1's in x
|
||||
func CountBitsByte(x byte) int { return CountBitsUint8(x) }
|
||||
|
||||
// CountBitsRune count 1's in x
|
||||
func CountBitsRune(x rune) int { return CountBitsInt32(x) }
|
||||
|
||||
// CountBitsUint8 count 1's in x
|
||||
func CountBitsUint8(x uint8) int { return int(table()[x]) }
|
||||
|
||||
// CountBitsUint16 count 1's in x
|
||||
func CountBitsUint16(x uint16) int {
|
||||
return int(table()[x&0xFF] + table()[(x>>8)&0xFF])
|
||||
}
|
||||
|
||||
const (
|
||||
m1d uint32 = 0x55555555
|
||||
m2d = 0x33333333
|
||||
m4d = 0x0f0f0f0f
|
||||
)
|
||||
|
||||
// CountBitsUint32 count 1's in x
|
||||
func CountBitsUint32(x uint32) int {
|
||||
x -= ((x >> 1) & m1d)
|
||||
x = (x & m2d) + ((x >> 2) & m2d)
|
||||
x = (x + (x >> 4)) & m4d
|
||||
x += x >> 8
|
||||
x += x >> 16
|
||||
return int(x & 0x3f)
|
||||
}
|
||||
|
||||
const (
|
||||
m1q uint64 = 0x5555555555555555
|
||||
m2q = 0x3333333333333333
|
||||
m4q = 0x0f0f0f0f0f0f0f0f
|
||||
hq = 0x0101010101010101
|
||||
)
|
||||
|
||||
// CountBitsUint64 count 1's in x
|
||||
func CountBitsUint64(x uint64) int {
|
||||
// put count of each 2 bits into those 2 bits
|
||||
x -= (x >> 1) & m1q
|
||||
|
||||
// put count of each 4 bits into those 4 bits
|
||||
x = (x & m2q) + ((x >> 2) & m2q)
|
||||
|
||||
// put count of each 8 bits into those 8 bits
|
||||
x = (x + (x >> 4)) & m4q
|
||||
|
||||
// returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
|
||||
return int((x * hq) >> 56)
|
||||
}
|
||||
|
||||
// CountBitsUint64Alt count 1's in x
|
||||
func CountBitsUint64Alt(x uint64) int {
|
||||
return CountBitsUint32(uint32(x>>32)) + CountBitsUint32(uint32(x))
|
||||
}
|
||||
|
||||
// CountBitsUintReference count 1's in x
|
||||
func CountBitsUintReference(x uint) int {
|
||||
c := 0
|
||||
for x != 0 {
|
||||
x &= x - 1
|
||||
c++
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsUint count 1's in x
|
||||
func CountBitsUint(x uint) int {
|
||||
if strconv.IntSize == 64 {
|
||||
return CountBitsUint64(uint64(x))
|
||||
} else if strconv.IntSize == 32 {
|
||||
return CountBitsUint32(uint32(x))
|
||||
}
|
||||
panic("strconv.IntSize must be 32 or 64 bits")
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
//
|
||||
// Package hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package hamming
|
||||
|
||||
// CountBitsInt8s count 1's in b
|
||||
func CountBitsInt8s(b []int8) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsInt8(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsInt16s count 1's in b
|
||||
func CountBitsInt16s(b []int16) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsInt16(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsInt32s count 1's in b
|
||||
func CountBitsInt32s(b []int32) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsInt32(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsInt64s count 1's in b
|
||||
func CountBitsInt64s(b []int64) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsInt64(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsInts count 1's in b
|
||||
func CountBitsInts(b []int) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsInt(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsUint8s count 1's in b
|
||||
func CountBitsUint8s(b []uint8) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsUint8(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsUint16s count 1's in b
|
||||
func CountBitsUint16s(b []uint16) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsUint16(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsUint32s count 1's in b
|
||||
func CountBitsUint32s(b []uint32) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsUint32(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsUint64s count 1's in b
|
||||
func CountBitsUint64s(b []uint64) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsUint64(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsUints count 1's in b
|
||||
func CountBitsUints(b []uint) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsUint(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsBytes count 1's in b
|
||||
func CountBitsBytes(b []byte) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsByte(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsRunes count 1's in b
|
||||
func CountBitsRunes(b []rune) int {
|
||||
c := 0
|
||||
for _, x := range b {
|
||||
c += CountBitsRune(x)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// CountBitsString count 1's in s
|
||||
func CountBitsString(s string) int {
|
||||
return CountBitsBytes([]byte(s))
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
//
|
||||
// Package hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package hamming
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// CountBitsInt8sPopCnt count 1's in x
|
||||
func CountBitsInt8sPopCnt(x []int8) (ret int)
|
||||
|
||||
// CountBitsInt16sPopCnt count 1's in x
|
||||
func CountBitsInt16sPopCnt(x []int16) (ret int)
|
||||
|
||||
// CountBitsInt32sPopCnt count 1's in x
|
||||
func CountBitsInt32sPopCnt(x []int32) (ret int)
|
||||
|
||||
// CountBitsInt64sPopCnt count 1's in x
|
||||
func CountBitsInt64sPopCnt(x []int64) (ret int)
|
||||
|
||||
// CountBitsIntsPopCnt count 1's in x
|
||||
func CountBitsIntsPopCnt(x []int) int {
|
||||
if strconv.IntSize == 64 {
|
||||
y := (*[]int64)(unsafe.Pointer(&x)) // #nosec G103
|
||||
return CountBitsInt64sPopCnt(*y)
|
||||
} else if strconv.IntSize == 32 {
|
||||
y := (*[]int32)(unsafe.Pointer(&x)) // #nosec G103
|
||||
return CountBitsInt32sPopCnt(*y)
|
||||
}
|
||||
panic("strconv.IntSize must be 32 or 64 bits")
|
||||
}
|
||||
|
||||
// CountBitsUint8sPopCnt count 1's in x
|
||||
func CountBitsUint8sPopCnt(x []uint8) (ret int)
|
||||
|
||||
// CountBitsUint16sPopCnt count 1's in x
|
||||
func CountBitsUint16sPopCnt(x []uint16) (ret int)
|
||||
|
||||
// CountBitsUint32sPopCnt count 1's in x
|
||||
func CountBitsUint32sPopCnt(x []uint32) (ret int)
|
||||
|
||||
// CountBitsUint64sPopCnt count 1's in x
|
||||
func CountBitsUint64sPopCnt(x []uint64) (ret int)
|
||||
|
||||
// CountBitsUintsPopCnt count 1's in x
|
||||
func CountBitsUintsPopCnt(x []uint) int {
|
||||
if strconv.IntSize == 64 {
|
||||
y := (*[]uint64)(unsafe.Pointer(&x)) // #nosec G103
|
||||
return CountBitsUint64sPopCnt(*y)
|
||||
} else if strconv.IntSize == 32 {
|
||||
y := (*[]uint32)(unsafe.Pointer(&x)) // #nosec G103
|
||||
return CountBitsUint32sPopCnt(*y)
|
||||
}
|
||||
panic("strconv.IntSize must be 32 or 64 bits")
|
||||
}
|
||||
|
||||
// CountBitsBytesPopCnt count 1's in x
|
||||
func CountBitsBytesPopCnt(x []byte) (ret int)
|
||||
|
||||
// CountBitsRunesPopCnt count 1's in x
|
||||
func CountBitsRunesPopCnt(x []rune) (ret int)
|
||||
|
||||
// CountBitsStringPopCnt count 1's in s
|
||||
func CountBitsStringPopCnt(s string) (ret int)
|
|
@ -0,0 +1,370 @@
|
|||
//
|
||||
// hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// type SliceHeader struct {
|
||||
// Data uintptr 0
|
||||
// Len int 8
|
||||
// Cap int 16
|
||||
// }
|
||||
|
||||
// 0 x.Data
|
||||
// 8 x.Len
|
||||
// 16 x.Cap
|
||||
// 24 ret
|
||||
|
||||
// type StringHeader struct {
|
||||
// Data uintptr 0
|
||||
// Len int 8
|
||||
// }
|
||||
|
||||
// 0 x.Data
|
||||
// 8 x.Len
|
||||
// 16 ret
|
||||
|
||||
// func CountBitsInt8sPopCnt(x []int8) (ret int)
|
||||
TEXT ·CountBitsInt8sPopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint8sPopCnt(SB)
|
||||
|
||||
// func CountBitsInt16sPopCnt(x []int16) (ret int)
|
||||
TEXT ·CountBitsInt16sPopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint16sPopCnt(SB)
|
||||
|
||||
// func CountBitsInt32sPopCnt(x []int32) (ret int)
|
||||
TEXT ·CountBitsInt32sPopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint32sPopCnt(SB)
|
||||
|
||||
// func CountBitsInt64sPopCnt(x []int64) (ret int)
|
||||
TEXT ·CountBitsInt64sPopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint64sPopCnt(SB)
|
||||
|
||||
// func CountBitsUint8sPopCnt(x []uint8) (ret int)
|
||||
TEXT ·CountBitsUint8sPopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX // ret = 0
|
||||
MOVQ x+8(FP), CX // x.Len -> CX
|
||||
|
||||
test_negative_slice_len:
|
||||
MOVQ CX, BX // x.Len < 0 ---> x.Len[63] != 0
|
||||
SHRQ $63, BX
|
||||
JNZ done
|
||||
|
||||
MOVQ x+0(FP), DI // x.Data -> DI
|
||||
|
||||
CMPQ CX, $32 // x.Len >= 32
|
||||
JL unrolled_loop_skip
|
||||
|
||||
unrolled_loop_setup:
|
||||
XORQ R9, R9
|
||||
XORQ BX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
|
||||
SUBQ $32, CX
|
||||
|
||||
POPCNTQ 0(DI), R10
|
||||
ADDQ R10, R9
|
||||
POPCNTQ 8(DI), R11
|
||||
ADDQ R11, AX
|
||||
POPCNTQ 16(DI), R12
|
||||
ADDQ R12, BX
|
||||
POPCNTQ 24(DI), R13
|
||||
ADDQ R13, DX
|
||||
|
||||
ADDQ $32, DI
|
||||
CMPQ CX, $32 // x.Len >= 32
|
||||
JGE unrolled_loop
|
||||
|
||||
unrolled_loop_done:
|
||||
ADDQ R9, AX
|
||||
ADDQ BX, DX
|
||||
ADDQ DX, AX
|
||||
|
||||
XORQ BX, BX
|
||||
|
||||
unrolled_loop_skip:
|
||||
CMPQ CX, $0
|
||||
JZ done
|
||||
|
||||
XORQ DX, DX
|
||||
|
||||
remainder_loop:
|
||||
MOVB 0(DI), DL
|
||||
POPCNTQ DX, BX
|
||||
ADDQ BX, AX
|
||||
|
||||
INCQ DI
|
||||
DECQ CX
|
||||
JNZ remainder_loop
|
||||
|
||||
done:
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func CountBitsUint16sPopCnt(x []uint16) (ret int)
|
||||
TEXT ·CountBitsUint16sPopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX // ret = 0
|
||||
MOVQ x+8(FP), CX // x.Len -> CX
|
||||
|
||||
test_negative_slice_len:
|
||||
MOVQ CX, BX // x.Len*2 < 0 ---> x.Len[63:62] != 0
|
||||
SHLQ $1, CX
|
||||
SHRQ $62, BX
|
||||
JNZ done
|
||||
|
||||
MOVQ x+0(FP), DI // x.Data -> DI
|
||||
|
||||
|
||||
CMPQ CX, $32 // x.Len*2 >= 32
|
||||
JL unrolled_loop_skip
|
||||
|
||||
unrolled_loop_setup:
|
||||
XORQ R9, R9
|
||||
XORQ BX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
|
||||
SUBQ $32, CX
|
||||
|
||||
POPCNTQ 0(DI), R10
|
||||
ADDQ R10, R9
|
||||
POPCNTQ 8(DI), R11
|
||||
ADDQ R11, AX
|
||||
POPCNTQ 16(DI), R12
|
||||
ADDQ R12, BX
|
||||
POPCNTQ 24(DI), R13
|
||||
ADDQ R13, DX
|
||||
|
||||
ADDQ $32, DI
|
||||
CMPQ CX, $32 // x.Len*2 >= 32
|
||||
JGE unrolled_loop
|
||||
|
||||
unrolled_loop_done:
|
||||
ADDQ R9, AX
|
||||
ADDQ BX, DX
|
||||
ADDQ DX, AX
|
||||
|
||||
XORQ BX, BX
|
||||
|
||||
unrolled_loop_skip:
|
||||
CMPQ CX, $0
|
||||
JZ done
|
||||
|
||||
XORQ DX, DX
|
||||
|
||||
remainder_loop:
|
||||
MOVW 0(DI), DX
|
||||
POPCNTQ DX, BX
|
||||
ADDQ BX, AX
|
||||
|
||||
ADDQ $2, DI
|
||||
SUBQ $2, CX
|
||||
JNZ remainder_loop
|
||||
|
||||
done:
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func CountBitsUint32sPopCnt(x []uint32) (ret int)
|
||||
TEXT ·CountBitsUint32sPopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX // ret = 0
|
||||
MOVQ x+8(FP), CX // x.Len -> CX
|
||||
MOVQ CX, BX
|
||||
MOVQ x+0(FP), DI // x.Data -> DI
|
||||
|
||||
test_negative_slice_len:
|
||||
SHLQ $2, CX // x.Len*4 < 0 ---> x.Len[63:61] != 0
|
||||
SHRQ $61, BX
|
||||
JNZ done
|
||||
|
||||
|
||||
|
||||
CMPQ CX, $32 // x.Len*4 >= 32
|
||||
JL unrolled_loop_skip
|
||||
|
||||
unrolled_loop_setup:
|
||||
XORQ R9, R9
|
||||
XORQ BX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
|
||||
SUBQ $32, CX
|
||||
|
||||
POPCNTQ 0(DI), R10 // r9 += popcntq(QW DI+0)
|
||||
ADDQ R10, R9
|
||||
POPCNTQ 8(DI), R11 // ax += popcntq(QW DI+8)
|
||||
ADDQ R11, AX
|
||||
POPCNTQ 16(DI), R12 // bx += popcntq(QW DI+16)
|
||||
ADDQ R12, BX
|
||||
POPCNTQ 24(DI), R13 // dx += popcntq(QW DI+24)
|
||||
ADDQ R13, DX
|
||||
|
||||
ADDQ $32, DI
|
||||
CMPQ CX, $32 // x.Len*4 >= 32
|
||||
JGE unrolled_loop
|
||||
|
||||
unrolled_loop_done:
|
||||
ADDQ R9, AX // ax = (ax + r9) + (bx + dx)
|
||||
ADDQ BX, DX
|
||||
ADDQ DX, AX
|
||||
|
||||
XORQ BX, BX
|
||||
|
||||
unrolled_loop_skip:
|
||||
CMPQ CX, $0
|
||||
JZ done
|
||||
|
||||
XORQ DX, DX
|
||||
remainder_loop:
|
||||
MOVB (DI), DX // ax += popcnt(DB 0(DI))
|
||||
POPCNTQ DX, BX
|
||||
ADDQ BX, AX
|
||||
|
||||
INCQ DI
|
||||
DECQ CX
|
||||
JNZ remainder_loop
|
||||
|
||||
done:
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func CountBitsUint64sPopCnt(x []uint64) (ret int)
|
||||
TEXT ·CountBitsUint64sPopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX // ret = 0
|
||||
MOVQ x+8(FP), CX // x.Len -> CX
|
||||
|
||||
test_negative_slice_len:
|
||||
MOVQ CX, BX // x.Len*8 < 0 ---> x.Len[63:60] != 0
|
||||
SHLQ $3, CX
|
||||
SHRQ $60, BX
|
||||
JNZ done
|
||||
|
||||
MOVQ x+0(FP), DI // x.Data -> DI
|
||||
|
||||
|
||||
CMPQ CX, $32 // x.Len*8 >= 32
|
||||
JL unrolled_loop_skip
|
||||
|
||||
unrolled_loop_setup:
|
||||
XORQ R9, R9
|
||||
XORQ BX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
|
||||
SUBQ $32, CX
|
||||
|
||||
POPCNTQ 0(DI), R10
|
||||
ADDQ R10, R9
|
||||
POPCNTQ 8(DI), R11
|
||||
ADDQ R11, AX
|
||||
POPCNTQ 16(DI), R12
|
||||
ADDQ R12, BX
|
||||
POPCNTQ 24(DI), R13
|
||||
ADDQ R13, DX
|
||||
|
||||
ADDQ $32, DI
|
||||
CMPQ CX, $32 // x.Len*4 >= 32
|
||||
JGE unrolled_loop
|
||||
|
||||
unrolled_loop_done:
|
||||
ADDQ R9, AX
|
||||
ADDQ BX, DX
|
||||
ADDQ DX, AX
|
||||
|
||||
XORQ BX, BX
|
||||
|
||||
unrolled_loop_skip:
|
||||
CMPQ CX, $0
|
||||
JZ done
|
||||
|
||||
XORQ DX, DX
|
||||
|
||||
remainder_loop:
|
||||
MOVQ 0(DI), DX
|
||||
POPCNTQ DX, BX
|
||||
ADDQ BX, AX
|
||||
|
||||
ADDQ $8, DI
|
||||
SUBQ $8, CX
|
||||
JNZ remainder_loop
|
||||
|
||||
done:
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func CountBitsBytesPopCnt(x []byte) (ret int)
|
||||
TEXT ·CountBitsBytesPopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint8sPopCnt(SB)
|
||||
|
||||
// func CountBitsRunesPopCnt(x []rune) (ret int)
|
||||
TEXT ·CountBitsRunesPopCnt(SB),NOSPLIT,$0
|
||||
JMP ·CountBitsUint32sPopCnt(SB)
|
||||
|
||||
// func CountBitsStringPopCnt(s string) (ret int)
|
||||
TEXT ·CountBitsStringPopCnt(SB),NOSPLIT,$0
|
||||
XORQ AX, AX // ret = 0
|
||||
MOVQ x+8(FP), CX // x.Len -> CX
|
||||
|
||||
test_negative_slice_len:
|
||||
MOVQ CX, BX // x.Len < 0 ---> x.Len[63] != 0
|
||||
SHRQ $63, BX
|
||||
JNZ done
|
||||
|
||||
MOVQ x+0(FP), DI // x.Data -> DI
|
||||
|
||||
CMPQ CX, $32 // x.Len >= 32
|
||||
JL unrolled_loop_skip
|
||||
|
||||
unrolled_loop_setup:
|
||||
XORQ R9, R9
|
||||
XORQ BX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
|
||||
SUBQ $32, CX
|
||||
|
||||
POPCNTQ 0(DI), R10
|
||||
ADDQ R10, R9
|
||||
POPCNTQ 8(DI), R11
|
||||
ADDQ R11, AX
|
||||
POPCNTQ 16(DI), R12
|
||||
ADDQ R12, BX
|
||||
POPCNTQ 24(DI), R13
|
||||
ADDQ R13, DX
|
||||
|
||||
ADDQ $32, DI
|
||||
CMPQ CX, $32 // x.Len >= 32
|
||||
JGE unrolled_loop
|
||||
|
||||
unrolled_loop_done:
|
||||
ADDQ R9, AX
|
||||
ADDQ BX, DX
|
||||
ADDQ DX, AX
|
||||
|
||||
XORQ BX, BX
|
||||
|
||||
unrolled_loop_skip:
|
||||
CMPQ CX, $0
|
||||
JZ done
|
||||
|
||||
XORQ DX, DX
|
||||
|
||||
remainder_loop:
|
||||
MOVB 0(DI), DL
|
||||
POPCNTQ DX, BX
|
||||
ADDQ BX, AX
|
||||
|
||||
INCQ DI
|
||||
DECQ CX
|
||||
JNZ remainder_loop
|
||||
|
||||
done:
|
||||
MOVQ AX, ret+16(FP)
|
||||
RET
|
|
@ -0,0 +1,144 @@
|
|||
//
|
||||
// Package hamming distance calculations in Go
|
||||
//
|
||||
// https://github.com/steakknife/hamming
|
||||
//
|
||||
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
|
||||
//
|
||||
// MIT license
|
||||
//
|
||||
package hamming
|
||||
|
||||
// Int8s hamming distance of two int8 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Int8s(b0, b1 []int8) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Int8(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Int16s hamming distance of two int16 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Int16s(b0, b1 []int16) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Int16(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Int32s hamming distance of two int32 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Int32s(b0, b1 []int32) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Int32(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Int64s hamming distance of two int64 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Int64s(b0, b1 []int64) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Int64(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Ints hamming distance of two int buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Ints(b0, b1 []int) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Int(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Uint8s hamming distance of two uint8 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Uint8s(b0, b1 []uint8) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Uint8(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Uint16s hamming distance of two uint16 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Uint16s(b0, b1 []uint16) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Uint16(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Uint32s hamming distance of two uint32 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Uint32s(b0, b1 []uint32) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Uint32(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Uint64s hamming distance of two uint64 buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Uint64s(b0, b1 []uint64) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Uint64(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Uints hamming distance of two uint buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Uints(b0, b1 []uint) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Uint(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Bytes hamming distance of two byte buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Bytes(b0, b1 []byte) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Byte(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Runes hamming distance of two rune buffers, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Runes(b0, b1 []rune) int {
|
||||
d := 0
|
||||
for i, x := range b0 {
|
||||
d += Rune(x, b1[i])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Strings hamming distance of two strings, of which the size of b0
|
||||
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
|
||||
func Strings(b0, b1 string) int {
|
||||
return Runes(runes(b0), runes(b1))
|
||||
}
|
||||
|
||||
// runize string
|
||||
func runes(s string) (r []rune) {
|
||||
for _, ch := range s {
|
||||
r = append(r, ch)
|
||||
}
|
||||
return
|
||||
}
|
|
@ -436,6 +436,18 @@
|
|||
"revision": "8537d3370df43a30a3d450c023783d2e43432b89",
|
||||
"revisionTime": "2019-03-16T09:03:35Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "1mI7DMaBgFAsU0aCrW5yCKyAPdM=",
|
||||
"path": "github.com/steakknife/bloomfilter",
|
||||
"revision": "6819c0d2a57025e1700378ee59b7382d71987f14",
|
||||
"revisionTime": "2018-09-22T17:46:46Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "uuF97bplG/+iQ/nfNSQGZOmTKBE=",
|
||||
"path": "github.com/steakknife/hamming",
|
||||
"revision": "c99c65617cd3d686aea8365fe563d6542f01d940",
|
||||
"revisionTime": "2018-09-06T05:59:17Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "mGbTYZ8dHVTiPTTJu3ktp+84pPI=",
|
||||
"path": "github.com/stretchr/testify/assert",
|
||||
|
|
Loading…
Reference in New Issue