From 9b6e8f6195da1d52ba35295d9d8a3a9f24ec30b0 Mon Sep 17 00:00:00 2001 From: obscuren Date: Thu, 30 Apr 2015 12:38:16 +0200 Subject: [PATCH 01/15] eth, eth/downloader: remove bad peers from peer set Peers in the eth protocol handler are now being ignored for catch up. --- eth/downloader/downloader.go | 18 ++++++++---------- eth/handler.go | 28 +++++++++++++++++++++------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 4cd927fd5..a48b60716 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -30,7 +30,7 @@ var ( errLowTd = errors.New("peer's TD is too low") errBusy = errors.New("busy") errUnknownPeer = errors.New("peer's unknown or unhealthy") - errBadPeer = errors.New("action from bad peer ignored") + ErrBadPeer = errors.New("action from bad peer ignored") errTimeout = errors.New("timeout") errEmptyHashSet = errors.New("empty hash set by peer") errPeersUnavailable = errors.New("no peers available or all peers tried for block download process") @@ -376,7 +376,7 @@ func (d *Downloader) AddBlock(id string, block *types.Block, td *big.Int) error // and add the block. Otherwise just ignore it if peer == nil { glog.V(logger.Detail).Infof("Ignored block from bad peer %s\n", id) - return errBadPeer + return ErrBadPeer } peer.mu.Lock() @@ -422,10 +422,7 @@ func (d *Downloader) process(peer *peer) error { return nil } - var ( - blocks = d.queue.blocks - err error - ) + var blocks = d.queue.blocks glog.V(logger.Debug).Infof("Inserting chain with %d blocks (#%v - #%v)\n", len(blocks), blocks[0].Number(), blocks[len(blocks)-1].Number()) // Loop untill we're out of blocks @@ -435,7 +432,7 @@ func (d *Downloader) process(peer *peer) error { // processing and start requesting the `block.hash` so that it's parent and // grandparents can be requested and queued. var i int - i, err = d.insertChain(blocks[:max]) + i, err := d.insertChain(blocks[:max]) if err != nil && core.IsParentErr(err) { // Ignore the missing blocks. Handler should take care of anything that's missing. glog.V(logger.Debug).Infof("Ignored block with missing parent (%d)\n", i) @@ -447,8 +444,9 @@ func (d *Downloader) process(peer *peer) error { d.UnregisterPeer(d.activePeer) // Reset chain completely. This needs much, much improvement. // instead: check all blocks leading down to this block false block and remove it - blocks = nil - break + d.queue.blocks = nil + + return ErrBadPeer } blocks = blocks[max:] } @@ -459,7 +457,7 @@ func (d *Downloader) process(peer *peer) error { } else { d.queue.blocks = blocks } - return err + return nil } func (d *Downloader) isFetchingHashes() bool { diff --git a/eth/handler.go b/eth/handler.go index fecd71632..6cc69aa26 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -122,6 +122,12 @@ func NewProtocolManager(protocolVersion, networkId int, mux *event.TypeMux, txpo return manager } +func (pm *ProtocolManager) removePeer(peer *peer) { + pm.pmu.Lock() + defer pm.pmu.Unlock() + delete(pm.peers, peer.id) +} + func (pm *ProtocolManager) syncHandler() { // itimer is used to determine when to start ignoring `minDesiredPeerCount` itimer := time.NewTimer(peerCountTimeout) @@ -172,7 +178,10 @@ func (pm *ProtocolManager) synchronise(peer *peer) { glog.V(logger.Info).Infof("Synchronisation attempt using %s TD=%v\n", peer.id, peer.td) // Get the hashes from the peer (synchronously) err := pm.downloader.Synchronise(peer.id, peer.recentHash) - if err != nil { + if err != nil && err == downloader.ErrBadPeer { + glog.V(logger.Debug).Infoln("removed peer from peer set due to bad action") + pm.removePeer(peer) + } else if err != nil { // handle error glog.V(logger.Debug).Infoln("error downloading:", err) } @@ -214,10 +223,8 @@ func (pm *ProtocolManager) handle(p *peer) error { pm.downloader.RegisterPeer(p.id, p.recentHash, p.requestHashes, p.requestBlocks) defer func() { - pm.pmu.Lock() - defer pm.pmu.Unlock() - delete(pm.peers, p.id) pm.downloader.UnregisterPeer(p.id) + pm.removePeer(p) }() // propagate existing transactions. new transactions appearing @@ -379,16 +386,23 @@ func (self *ProtocolManager) handleMsg(p *peer) error { // if the parent does not exists we delegate to the downloader. if self.chainman.HasBlock(request.Block.ParentHash()) { if _, err := self.chainman.InsertChain(types.Blocks{request.Block}); err != nil { - // handle error + glog.V(logger.Error).Infoln("removed peer (", p.id, ") due to block error") + + self.removePeer(p) + return nil } self.BroadcastBlock(hash, request.Block) } else { // adding blocks is synchronous go func() { - // TODO check parent error err := self.downloader.AddBlock(p.id, request.Block, request.TD) - if err != nil { + if err != nil && err == downloader.ErrBadPeer { + glog.V(logger.Error).Infoln("removed peer (", p.id, ") with err:", err) + + self.removePeer(p) + return + } else if err != nil { glog.V(logger.Detail).Infoln("downloader err:", err) return } From 28b39267d990352883df1c093fd6c36cd532cfdf Mon Sep 17 00:00:00 2001 From: obscuren Date: Thu, 30 Apr 2015 13:41:47 +0200 Subject: [PATCH 02/15] core, eth: verify td of received blocks --- core/chain_manager.go | 6 ++++-- eth/handler.go | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/core/chain_manager.go b/core/chain_manager.go index 4fdb2edce..e97ed307c 100644 --- a/core/chain_manager.go +++ b/core/chain_manager.go @@ -511,6 +511,10 @@ func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { if block == nil { continue } + // Setting block.Td regardless of error (known for example) prevents errors down the line + // in the protocol handler + block.Td = new(big.Int).Set(CalculateTD(block, self.GetBlock(block.ParentHash()))) + // Call in to the block processor and check for errors. It's likely that if one block fails // all others will fail too (unless a known block is returned). logs, err := self.processor.Process(block) @@ -545,8 +549,6 @@ func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { return i, err } - block.Td = new(big.Int).Set(CalculateTD(block, self.GetBlock(block.ParentHash()))) - self.mu.Lock() { cblock := self.currentBlock diff --git a/eth/handler.go b/eth/handler.go index 6cc69aa26..6c1a92c77 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -125,6 +125,7 @@ func NewProtocolManager(protocolVersion, networkId int, mux *event.TypeMux, txpo func (pm *ProtocolManager) removePeer(peer *peer) { pm.pmu.Lock() defer pm.pmu.Unlock() + pm.downloader.UnregisterPeer(peer.id) delete(pm.peers, peer.id) } @@ -223,7 +224,6 @@ func (pm *ProtocolManager) handle(p *peer) error { pm.downloader.RegisterPeer(p.id, p.recentHash, p.requestHashes, p.requestBlocks) defer func() { - pm.downloader.UnregisterPeer(p.id) pm.removePeer(p) }() @@ -392,6 +392,12 @@ func (self *ProtocolManager) handleMsg(p *peer) error { return nil } + + if err := self.verifyTd(p, request); err != nil { + glog.V(logger.Error).Infoln(err) + // XXX for now return nil so it won't disconnect (we should in the future) + return nil + } self.BroadcastBlock(hash, request.Block) } else { // adding blocks is synchronous @@ -406,6 +412,10 @@ func (self *ProtocolManager) handleMsg(p *peer) error { glog.V(logger.Detail).Infoln("downloader err:", err) return } + if err := self.verifyTd(p, request); err != nil { + glog.V(logger.Error).Infoln(err) + return + } self.BroadcastBlock(hash, request.Block) }() } @@ -415,6 +425,16 @@ func (self *ProtocolManager) handleMsg(p *peer) error { return nil } +func (pm *ProtocolManager) verifyTd(peer *peer, request newBlockMsgData) error { + if request.Block.Td.Cmp(request.TD) != 0 { + glog.V(logger.Detail).Infoln(peer) + + return fmt.Errorf("invalid TD on block(%v) from peer(%s): block.td=%v, request.td=%v", request.Block.Number(), peer.id, request.Block.Td, request.TD) + } + + return nil +} + // BroadcastBlock will propagate the block to its connected peers. It will sort // out which peers do not contain the block in their block set and will do a // sqrt(peers) to determine the amount of peers we broadcast to. From e4dba36892477f3ef614dd4e4f1234ae4a4e26d0 Mon Sep 17 00:00:00 2001 From: obscuren Date: Thu, 30 Apr 2015 14:55:21 +0200 Subject: [PATCH 03/15] core: check for parent in calc TD. TD = (N != 0 == parent.TD) || (== D) --- core/chain_manager.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/chain_manager.go b/core/chain_manager.go index e97ed307c..9f62d3b47 100644 --- a/core/chain_manager.go +++ b/core/chain_manager.go @@ -49,6 +49,10 @@ func CalcDifficulty(block, parent *types.Header) *big.Int { } func CalculateTD(block, parent *types.Block) *big.Int { + if parent == nil { + return block.Difficulty() + } + td := new(big.Int).Add(parent.Td, block.Header().Difficulty) return td From 15873fafc02e444c38de722277ab2461cb9b82c5 Mon Sep 17 00:00:00 2001 From: obscuren Date: Thu, 30 Apr 2015 17:50:23 +0200 Subject: [PATCH 04/15] core: added a wait group to chain manager for graceful shutdown --- core/chain_manager.go | 8 ++++++++ eth/backend.go | 1 + 2 files changed, 9 insertions(+) diff --git a/core/chain_manager.go b/core/chain_manager.go index 9f62d3b47..80fd6cd71 100644 --- a/core/chain_manager.go +++ b/core/chain_manager.go @@ -93,6 +93,7 @@ type ChainManager struct { futureBlocks *BlockCache quit chan struct{} + wg sync.WaitGroup } func NewChainManager(blockDb, stateDb common.Database, mux *event.TypeMux) *ChainManager { @@ -482,6 +483,10 @@ func (self *ChainManager) CalcTotalDiff(block *types.Block) (*big.Int, error) { func (bc *ChainManager) Stop() { close(bc.quit) + + bc.wg.Wait() + + glog.V(logger.Info).Infoln("Chain manager stopped") } type queueEvent struct { @@ -504,6 +509,9 @@ func (self *ChainManager) procFutureBlocks() { // InsertChain will attempt to insert the given chain in to the canonical chain or, otherwise, create a fork. It an error is returned // it will return the index number of the failing block as well an error describing what went wrong (for possible errors see core/errors.go). func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { + self.wg.Add(1) + defer self.wg.Done() + // A queued approach to delivering events. This is generally faster than direct delivery and requires much less mutex acquiring. var ( queue = make([]interface{}, len(chain)) diff --git a/eth/backend.go b/eth/backend.go index c5fa328b0..11a63cca9 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -454,6 +454,7 @@ func (self *Ethereum) SuggestPeer(nodeURL string) error { func (s *Ethereum) Stop() { s.txSub.Unsubscribe() // quits txBroadcastLoop + s.chainManager.Stop() s.protocolManager.Stop() s.txPool.Stop() s.eventMux.Stop() From 8595198c1b56364bb9589a912d2a9797b93a3357 Mon Sep 17 00:00:00 2001 From: obscuren Date: Thu, 30 Apr 2015 17:51:47 +0200 Subject: [PATCH 05/15] eth/downloader: delete blocks from queue --- eth/downloader/downloader.go | 16 ++++++++++++++-- eth/handler.go | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index a48b60716..016f290fc 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -354,6 +354,11 @@ func (d *Downloader) AddHashes(id string, hashes []common.Hash) error { return fmt.Errorf("received hashes from %s while active peer is %s", id, d.activePeer) } + if glog.V(logger.Detail) && len(hashes) != 0 { + from, to := hashes[0], hashes[len(hashes)-1] + glog.Infof("adding %d (T=%d) hashes [ %x / %x ] from: %s\n", len(hashes), d.queue.hashPool.Size(), from[:4], to[:4], id) + } + d.hashCh <- hashes return nil @@ -448,10 +453,17 @@ func (d *Downloader) process(peer *peer) error { return ErrBadPeer } - blocks = blocks[max:] + + // delete the blocks from the slice and let them be garbage collected + // without this slice trick the blocks would stay in memory until nil + // would be assigned to d.queue.blocks + copy(blocks, blocks[max:]) + for k, n := len(blocks)-max, len(blocks); k < n; k++ { + blocks[k] = nil + } + blocks = blocks[:len(blocks)-max] } - // This will allow the GC to remove the in memory blocks if len(blocks) == 0 { d.queue.blocks = nil } else { diff --git a/eth/handler.go b/eth/handler.go index 6c1a92c77..2b432f95d 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -455,7 +455,7 @@ func (pm *ProtocolManager) BroadcastBlock(hash common.Hash, block *types.Block) for _, peer := range peers { peer.sendNewBlock(block) } - glog.V(logger.Detail).Infoln("broadcast block to", len(peers), "peers. Total propagation time:", time.Since(block.ReceivedAt)) + glog.V(logger.Detail).Infoln("broadcast block to", len(peers), "peers. Total processing time:", time.Since(block.ReceivedAt)) } // BroadcastTx will propagate the block to its connected peers. It will sort From 016f152b36106130fa42514ef6cfacc09dfc3142 Mon Sep 17 00:00:00 2001 From: obscuren Date: Fri, 1 May 2015 00:23:51 +0200 Subject: [PATCH 06/15] eth, eth/downloader: Moved block processing & graceful shutdown The downloader is no longer responsible for processing blocks. The eth-protocol handler now takes care of this instead. Added graceful shutdown during block processing. Closes #846 --- eth/backend.go | 4 +- eth/downloader/downloader.go | 281 ++++++++++++++---------------- eth/downloader/downloader_test.go | 55 +++--- eth/downloader/queue.go | 58 +++++- eth/handler.go | 139 +++------------ eth/sync.go | 101 +++++++++++ 6 files changed, 347 insertions(+), 291 deletions(-) create mode 100644 eth/sync.go diff --git a/eth/backend.go b/eth/backend.go index 11a63cca9..a8b0fae50 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -219,7 +219,7 @@ func New(config *Config) (*Ethereum, error) { } eth.chainManager = core.NewChainManager(blockDb, stateDb, eth.EventMux()) - eth.downloader = downloader.New(eth.chainManager.HasBlock, eth.chainManager.InsertChain) + eth.downloader = downloader.New(eth.chainManager.HasBlock, eth.chainManager.GetBlock) eth.pow = ethash.New(eth.chainManager) eth.txPool = core.NewTxPool(eth.EventMux(), eth.chainManager.State, eth.chainManager.GasLimit) eth.blockProcessor = core.NewBlockProcessor(stateDb, extraDb, eth.pow, eth.txPool, eth.chainManager, eth.EventMux()) @@ -454,8 +454,8 @@ func (self *Ethereum) SuggestPeer(nodeURL string) error { func (s *Ethereum) Stop() { s.txSub.Unsubscribe() // quits txBroadcastLoop - s.chainManager.Stop() s.protocolManager.Stop() + s.chainManager.Stop() s.txPool.Stop() s.eventMux.Stop() if s.whisper != nil { diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 016f290fc..c1dc9d3a8 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -3,14 +3,11 @@ package downloader import ( "errors" "fmt" - "math" - "math/big" "sync" "sync/atomic" "time" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/logger" "github.com/ethereum/go-ethereum/logger/glog" @@ -27,16 +24,21 @@ var ( minDesiredPeerCount = 5 // Amount of peers desired to start syncing blockTtl = 20 * time.Second // The amount of time it takes for a block request to time out - errLowTd = errors.New("peer's TD is too low") - errBusy = errors.New("busy") - errUnknownPeer = errors.New("peer's unknown or unhealthy") - ErrBadPeer = errors.New("action from bad peer ignored") - errTimeout = errors.New("timeout") - errEmptyHashSet = errors.New("empty hash set by peer") - errPeersUnavailable = errors.New("no peers available or all peers tried for block download process") + errLowTd = errors.New("peer's TD is too low") + errBusy = errors.New("busy") + errUnknownPeer = errors.New("peer's unknown or unhealthy") + ErrBadPeer = errors.New("action from bad peer ignored") + errNoPeers = errors.New("no peers to keep download active") + errPendingQueue = errors.New("pending items in queue") + errTimeout = errors.New("timeout") + errEmptyHashSet = errors.New("empty hash set by peer") + errPeersUnavailable = errors.New("no peers available or all peers tried for block download process") + errAlreadyInPool = errors.New("hash already in pool") + errBlockNumberOverflow = errors.New("received block which overflows") ) type hashCheckFn func(common.Hash) bool +type getBlockFn func(common.Hash) *types.Block type chainInsertFn func(types.Blocks) (int, error) type hashIterFn func() (common.Hash, error) @@ -58,13 +60,12 @@ type Downloader struct { activePeer string // Callbacks - hasBlock hashCheckFn - insertChain chainInsertFn + hasBlock hashCheckFn + getBlock getBlockFn // Status fetchingHashes int32 downloadingBlocks int32 - processingBlocks int32 // Channels newPeerCh chan *peer @@ -72,15 +73,15 @@ type Downloader struct { blockCh chan blockPack } -func New(hasBlock hashCheckFn, insertChain chainInsertFn) *Downloader { +func New(hasBlock hashCheckFn, getBlock getBlockFn) *Downloader { downloader := &Downloader{ - queue: newqueue(), - peers: make(peers), - hasBlock: hasBlock, - insertChain: insertChain, - newPeerCh: make(chan *peer, 1), - hashCh: make(chan []common.Hash, 1), - blockCh: make(chan blockPack, 1), + queue: newqueue(), + peers: make(peers), + hasBlock: hasBlock, + getBlock: getBlock, + newPeerCh: make(chan *peer, 1), + hashCh: make(chan []common.Hash, 1), + blockCh: make(chan blockPack, 1), } return downloader @@ -126,6 +127,12 @@ func (d *Downloader) Synchronise(id string, hash common.Hash) error { return errBusy } + // When a synchronisation attempt is made while the queue stil + // contains items we abort the sync attempt + if d.queue.size() > 0 { + return errPendingQueue + } + // Fetch the peer using the id or throw an error if the peer couldn't be found p := d.peers[id] if p == nil { @@ -138,30 +145,87 @@ func (d *Downloader) Synchronise(id string, hash common.Hash) error { return err } - return d.process(p) + return nil } -func (d *Downloader) getFromPeer(p *peer, hash common.Hash, ignoreInitial bool) error { +// Done lets the downloader know that whatever previous hashes were taken +// are processed. If the block count reaches zero and done is called +// we reset the queue for the next batch of incoming hashes and blocks. +func (d *Downloader) Done() { + d.queue.mu.Lock() + defer d.queue.mu.Unlock() + + if len(d.queue.blocks) == 0 { + d.queue.resetNoTS() + } +} + +// TakeBlocks takes blocks from the queue and yields them to the blockTaker handler +// it's possible it yields no blocks +func (d *Downloader) TakeBlocks() types.Blocks { + d.queue.mu.Lock() + defer d.queue.mu.Unlock() + + var blocks types.Blocks + if len(d.queue.blocks) > 0 { + // Make sure the parent hash is known + if d.queue.blocks[0] != nil && !d.hasBlock(d.queue.blocks[0].ParentHash()) { + return nil + } + + for _, block := range d.queue.blocks { + if block == nil { + break + } + + blocks = append(blocks, block) + } + d.queue.blockOffset += len(blocks) + // delete the blocks from the slice and let them be garbage collected + // without this slice trick the blocks would stay in memory until nil + // would be assigned to d.queue.blocks + copy(d.queue.blocks, d.queue.blocks[len(blocks):]) + for k, n := len(d.queue.blocks)-len(blocks), len(d.queue.blocks); k < n; k++ { + d.queue.blocks[k] = nil + } + d.queue.blocks = d.queue.blocks[:len(d.queue.blocks)-len(blocks)] + + //d.queue.blocks = d.queue.blocks[len(blocks):] + if len(d.queue.blocks) == 0 { + d.queue.blocks = nil + } + + } + + return blocks +} + +func (d *Downloader) Has(hash common.Hash) bool { + return d.queue.has(hash) +} + +func (d *Downloader) getFromPeer(p *peer, hash common.Hash, ignoreInitial bool) (err error) { d.activePeer = p.id + defer func() { + // reset on error + if err != nil { + d.queue.reset() + } + }() glog.V(logger.Detail).Infoln("Synchronising with the network using:", p.id) // Start the fetcher. This will block the update entirely // interupts need to be send to the appropriate channels // respectively. - if err := d.startFetchingHashes(p, hash, ignoreInitial); err != nil { - // handle error - glog.V(logger.Debug).Infoln("Error fetching hashes:", err) - // XXX Reset + if err = d.startFetchingHashes(p, hash, ignoreInitial); err != nil { return err } // Start fetching blocks in paralel. The strategy is simple // take any available peers, seserve a chunk for each peer available, // let the peer deliver the chunkn and periodically check if a peer - // has timedout. When done downloading, process blocks. - if err := d.startFetchingBlocks(p); err != nil { - glog.V(logger.Debug).Infoln("Error downloading blocks:", err) - // XXX reset + // has timedout. + if err = d.startFetchingBlocks(p); err != nil { return err } @@ -175,6 +239,10 @@ func (d *Downloader) startFetchingHashes(p *peer, hash common.Hash, ignoreInitia atomic.StoreInt32(&d.fetchingHashes, 1) defer atomic.StoreInt32(&d.fetchingHashes, 0) + if d.queue.has(hash) { + return errAlreadyInPool + } + glog.V(logger.Debug).Infof("Downloading hashes (%x) from %s", hash.Bytes()[:4], p.id) start := time.Now() @@ -196,10 +264,13 @@ out: case hashes := <-d.hashCh: failureResponseTimer.Reset(hashTtl) - var done bool // determines whether we're done fetching hashes (i.e. common hash found) + var ( + done bool // determines whether we're done fetching hashes (i.e. common hash found) + hash common.Hash // current and common hash + ) hashSet := set.New() - for _, hash := range hashes { - if d.hasBlock(hash) { + for _, hash = range hashes { + if d.hasBlock(hash) || d.queue.has(hash) { glog.V(logger.Debug).Infof("Found common hash %x\n", hash[:4]) done = true @@ -220,6 +291,14 @@ out: // Get the next set of hashes p.getHashes(hashes[len(hashes)-1]) } else { // we're done + // The offset of the queue is determined by the highest known block + var offset int + if block := d.getBlock(hash); block != nil { + offset = int(block.NumberU64() + 1) + } + // allocate proper size for the queueue + d.queue.alloc(offset, d.queue.hashPool.Size()) + break out } case <-failureResponseTimer.C: @@ -257,11 +336,27 @@ out: // If the peer was previously banned and failed to deliver it's pack // in a reasonable time frame, ignore it's message. if d.peers[blockPack.peerId] != nil { + err := d.queue.deliver(blockPack.peerId, blockPack.blocks) + if err != nil { + glog.V(logger.Debug).Infof("deliver failed for peer %s: %v\n", blockPack.peerId, err) + // FIXME d.UnregisterPeer(blockPack.peerId) + break + } + + if glog.V(logger.Debug) { + glog.Infof("adding %d blocks from: %s\n", len(blockPack.blocks), blockPack.peerId) + } d.peers[blockPack.peerId].promote() - d.queue.deliver(blockPack.peerId, blockPack.blocks) d.peers.setState(blockPack.peerId, idleState) } case <-ticker.C: + // after removing bad peers make sure we actually have suffucient peer left to keep downlading + if len(d.peers) == 0 { + d.queue.reset() + + return errNoPeers + } + // If there are unrequested hashes left start fetching // from the available peers. if d.queue.hashPool.Size() > 0 { @@ -310,7 +405,7 @@ out: if time.Since(chunk.itime) > blockTtl { badPeers = append(badPeers, pid) // remove peer as good peer from peer list - //d.UnregisterPeer(pid) + // FIXME d.UnregisterPeer(pid) } } d.queue.mu.Unlock() @@ -364,114 +459,6 @@ func (d *Downloader) AddHashes(id string, hashes []common.Hash) error { return nil } -// Add an (unrequested) block to the downloader. This is usually done through the -// NewBlockMsg by the protocol handler. -// Adding blocks is done synchronously. if there are missing blocks, blocks will be -// fetched first. If the downloader is busy or if some other processed failed an error -// will be returned. -func (d *Downloader) AddBlock(id string, block *types.Block, td *big.Int) error { - hash := block.Hash() - - if d.hasBlock(hash) { - return fmt.Errorf("known block %x", hash.Bytes()[:4]) - } - - peer := d.peers.getPeer(id) - // if the peer is in our healthy list of peers; update the td - // and add the block. Otherwise just ignore it - if peer == nil { - glog.V(logger.Detail).Infof("Ignored block from bad peer %s\n", id) - return ErrBadPeer - } - - peer.mu.Lock() - peer.recentHash = block.Hash() - peer.mu.Unlock() - peer.promote() - - glog.V(logger.Detail).Infoln("Inserting new block from:", id) - d.queue.addBlock(id, block) - - // if neither go ahead to process - if d.isBusy() { - return errBusy - } - - // Check if the parent of the received block is known. - // If the block is not know, request it otherwise, request. - phash := block.ParentHash() - if !d.hasBlock(phash) { - glog.V(logger.Detail).Infof("Missing parent %x, requires fetching\n", phash.Bytes()[:4]) - - // Get the missing hashes from the peer (synchronously) - err := d.getFromPeer(peer, peer.recentHash, true) - if err != nil { - return err - } - } - - return d.process(peer) -} - -func (d *Downloader) process(peer *peer) error { - atomic.StoreInt32(&d.processingBlocks, 1) - defer atomic.StoreInt32(&d.processingBlocks, 0) - - // XXX this will move when optimised - // Sort the blocks by number. This bit needs much improvement. Right now - // it assumes full honesty form peers (i.e. it's not checked when the blocks - // link). We should at least check whihc queue match. This code could move - // to a seperate goroutine where it periodically checks for linked pieces. - types.BlockBy(types.Number).Sort(d.queue.blocks) - if len(d.queue.blocks) == 0 { - return nil - } - - var blocks = d.queue.blocks - glog.V(logger.Debug).Infof("Inserting chain with %d blocks (#%v - #%v)\n", len(blocks), blocks[0].Number(), blocks[len(blocks)-1].Number()) - - // Loop untill we're out of blocks - for len(blocks) != 0 { - max := int(math.Min(float64(len(blocks)), 256)) - // TODO check for parent error. When there's a parent error we should stop - // processing and start requesting the `block.hash` so that it's parent and - // grandparents can be requested and queued. - var i int - i, err := d.insertChain(blocks[:max]) - if err != nil && core.IsParentErr(err) { - // Ignore the missing blocks. Handler should take care of anything that's missing. - glog.V(logger.Debug).Infof("Ignored block with missing parent (%d)\n", i) - blocks = blocks[i+1:] - - continue - } else if err != nil { - // immediatly unregister the false peer but do not disconnect - d.UnregisterPeer(d.activePeer) - // Reset chain completely. This needs much, much improvement. - // instead: check all blocks leading down to this block false block and remove it - d.queue.blocks = nil - - return ErrBadPeer - } - - // delete the blocks from the slice and let them be garbage collected - // without this slice trick the blocks would stay in memory until nil - // would be assigned to d.queue.blocks - copy(blocks, blocks[max:]) - for k, n := len(blocks)-max, len(blocks); k < n; k++ { - blocks[k] = nil - } - blocks = blocks[:len(blocks)-max] - } - - if len(blocks) == 0 { - d.queue.blocks = nil - } else { - d.queue.blocks = blocks - } - return nil -} - func (d *Downloader) isFetchingHashes() bool { return atomic.LoadInt32(&d.fetchingHashes) == 1 } @@ -480,12 +467,8 @@ func (d *Downloader) isDownloadingBlocks() bool { return atomic.LoadInt32(&d.downloadingBlocks) == 1 } -func (d *Downloader) isProcessing() bool { - return atomic.LoadInt32(&d.processingBlocks) == 1 -} - func (d *Downloader) isBusy() bool { - return d.isFetchingHashes() || d.isDownloadingBlocks() || d.isProcessing() + return d.isFetchingHashes() || d.isDownloadingBlocks() } func (d *Downloader) IsBusy() bool { diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index 5518163ca..d13818b37 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -8,8 +8,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/logger" - "github.com/ethereum/go-ethereum/logger/glog" ) var knownHash = common.Hash{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} @@ -28,7 +26,7 @@ func createHashes(start, amount int) (hashes []common.Hash) { func createBlocksFromHashes(hashes []common.Hash) map[common.Hash]*types.Block { blocks := make(map[common.Hash]*types.Block) for i, hash := range hashes { - header := &types.Header{Number: big.NewInt(int64(i))} + header := &types.Header{Number: big.NewInt(int64(len(hashes) - i))} blocks[hash] = types.NewBlockWithHeader(header) blocks[hash].HeaderHash = hash } @@ -43,13 +41,11 @@ type downloadTester struct { t *testing.T pcount int done chan bool - - insertedBlocks int } func newTester(t *testing.T, hashes []common.Hash, blocks map[common.Hash]*types.Block) *downloadTester { tester := &downloadTester{t: t, hashes: hashes, blocks: blocks, done: make(chan bool)} - downloader := New(tester.hasBlock, tester.insertChain) + downloader := New(tester.hasBlock, tester.getBlock) tester.downloader = downloader return tester @@ -62,10 +58,8 @@ func (dl *downloadTester) hasBlock(hash common.Hash) bool { return false } -func (dl *downloadTester) insertChain(blocks types.Blocks) (int, error) { - dl.insertedBlocks += len(blocks) - - return 0, nil +func (dl *downloadTester) getBlock(hash common.Hash) *types.Block { + return dl.blocks[knownHash] } func (dl *downloadTester) getHashes(hash common.Hash) error { @@ -102,9 +96,6 @@ func (dl *downloadTester) badBlocksPeer(id string, td *big.Int, hash common.Hash } func TestDownload(t *testing.T) { - glog.SetV(logger.Detail) - glog.SetToStderr(true) - minDesiredPeerCount = 4 blockTtl = 1 * time.Second @@ -123,15 +114,13 @@ func TestDownload(t *testing.T) { t.Error("download error", err) } - if tester.insertedBlocks != targetBlocks { - t.Error("expected", targetBlocks, "have", tester.insertedBlocks) + inqueue := len(tester.downloader.queue.blocks) + if inqueue != targetBlocks { + t.Error("expected", targetBlocks, "have", inqueue) } } func TestMissing(t *testing.T) { - glog.SetV(logger.Detail) - glog.SetToStderr(true) - targetBlocks := 1000 hashes := createHashes(0, 1000) extraHashes := createHashes(1001, 1003) @@ -148,7 +137,33 @@ func TestMissing(t *testing.T) { t.Error("download error", err) } - if tester.insertedBlocks != targetBlocks { - t.Error("expected", targetBlocks, "have", tester.insertedBlocks) + inqueue := len(tester.downloader.queue.blocks) + if inqueue != targetBlocks { + t.Error("expected", targetBlocks, "have", inqueue) + } +} + +func TestTaking(t *testing.T) { + minDesiredPeerCount = 4 + blockTtl = 1 * time.Second + + targetBlocks := 1000 + hashes := createHashes(0, targetBlocks) + blocks := createBlocksFromHashes(hashes) + tester := newTester(t, hashes, blocks) + + tester.newPeer("peer1", big.NewInt(10000), hashes[0]) + tester.newPeer("peer2", big.NewInt(0), common.Hash{}) + tester.badBlocksPeer("peer3", big.NewInt(0), common.Hash{}) + tester.badBlocksPeer("peer4", big.NewInt(0), common.Hash{}) + + err := tester.downloader.Synchronise("peer1", hashes[0]) + if err != nil { + t.Error("download error", err) + } + + bs1 := tester.downloader.TakeBlocks(1000) + if len(bs1) != 1000 { + t.Error("expected to take 1000, got", len(bs1)) } } diff --git a/eth/downloader/queue.go b/eth/downloader/queue.go index a21a44706..13768229f 100644 --- a/eth/downloader/queue.go +++ b/eth/downloader/queue.go @@ -18,7 +18,9 @@ type queue struct { mu sync.Mutex fetching map[string]*chunk - blocks []*types.Block + + blockOffset int + blocks []*types.Block } func newqueue() *queue { @@ -34,6 +36,10 @@ func (c *queue) reset() { c.mu.Lock() defer c.mu.Unlock() + c.resetNoTS() +} +func (c *queue) resetNoTS() { + c.blockOffset = 0 c.hashPool.Clear() c.fetchPool.Clear() c.blockHashes.Clear() @@ -41,6 +47,10 @@ func (c *queue) reset() { c.fetching = make(map[string]*chunk) } +func (c *queue) size() int { + return c.hashPool.Size() + c.blockHashes.Size() + c.fetchPool.Size() +} + // reserve a `max` set of hashes for `p` peer. func (c *queue) get(p *peer, max int) *chunk { c.mu.Lock() @@ -89,7 +99,7 @@ func (c *queue) get(p *peer, max int) *chunk { } func (c *queue) has(hash common.Hash) bool { - return c.hashPool.Has(hash) || c.fetchPool.Has(hash) + return c.hashPool.Has(hash) || c.fetchPool.Has(hash) || c.blockHashes.Has(hash) } func (c *queue) addBlock(id string, block *types.Block) { @@ -103,8 +113,24 @@ func (c *queue) addBlock(id string, block *types.Block) { } } +func (c *queue) getBlock(hash common.Hash) *types.Block { + c.mu.Lock() + defer c.mu.Unlock() + + if !c.blockHashes.Has(hash) { + return nil + } + + for _, block := range c.blocks { + if block.Hash() == hash { + return block + } + } + return nil +} + // deliver delivers a chunk to the queue that was requested of the peer -func (c *queue) deliver(id string, blocks []*types.Block) { +func (c *queue) deliver(id string, blocks []*types.Block) error { c.mu.Lock() defer c.mu.Unlock() @@ -124,11 +150,35 @@ func (c *queue) deliver(id string, blocks []*types.Block) { // merge block hashes c.blockHashes.Merge(blockHashes) // Add the blocks - c.blocks = append(c.blocks, blocks...) + for _, block := range blocks { + // See (1) for future limitation + n := int(block.NumberU64()) - c.blockOffset + if n > len(c.blocks) || n < 0 { + return errBlockNumberOverflow + } + c.blocks[n] = block + } // Add back whatever couldn't be delivered c.hashPool.Merge(chunk.hashes) c.fetchPool.Separate(chunk.hashes) } + + return nil +} + +func (c *queue) alloc(offset, size int) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.blockOffset < offset { + c.blockOffset = offset + } + + // (1) XXX at some point we could limit allocation to memory and use the disk + // to store future blocks. + if len(c.blocks) < size { + c.blocks = append(c.blocks, make([]*types.Block, size)...) + } } // puts puts sets of hashes on to the queue for fetching diff --git a/eth/handler.go b/eth/handler.go index 2b432f95d..1e0663816 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -1,39 +1,5 @@ package eth -// XXX Fair warning, most of the code is re-used from the old protocol. Please be aware that most of this will actually change -// The idea is that most of the calls within the protocol will become synchronous. -// Block downloading and block processing will be complete seperate processes -/* -# Possible scenarios - -// Synching scenario -// Use the best peer to synchronise -blocks, err := pm.downloader.Synchronise() -if err != nil { - // handle - break -} -pm.chainman.InsertChain(blocks) - -// Receiving block with known parent -if parent_exist { - if err := pm.chainman.InsertChain(block); err != nil { - // handle - break - } - pm.BroadcastBlock(block) -} - -// Receiving block with unknown parent -blocks, err := pm.downloader.SynchroniseWithPeer(peer) -if err != nil { - // handle - break -} -pm.chainman.InsertChain(blocks) - -*/ - import ( "fmt" "math" @@ -54,7 +20,9 @@ import ( const ( peerCountTimeout = 12 * time.Second // Amount of time it takes for the peer handler to ignore minDesiredPeerCount - minDesiredPeerCount = 5 // Amount of peers desired to start syncing + blockProcTimer = 500 * time.Millisecond + minDesiredPeerCount = 5 // Amount of peers desired to start syncing + blockProcAmount = 256 ) func errResp(code errCode, format string, v ...interface{}) error { @@ -91,6 +59,10 @@ type ProtocolManager struct { newPeerCh chan *peer quitSync chan struct{} + // wait group is used for graceful shutdowns during downloading + // and processing + wg sync.WaitGroup + quit bool } // NewProtocolManager returns a new ethereum sub protocol manager. The Ethereum sub protocol manages peers capable @@ -129,65 +101,6 @@ func (pm *ProtocolManager) removePeer(peer *peer) { delete(pm.peers, peer.id) } -func (pm *ProtocolManager) syncHandler() { - // itimer is used to determine when to start ignoring `minDesiredPeerCount` - itimer := time.NewTimer(peerCountTimeout) -out: - for { - select { - case <-pm.newPeerCh: - // Meet the `minDesiredPeerCount` before we select our best peer - if len(pm.peers) < minDesiredPeerCount { - break - } - - // Find the best peer - peer := getBestPeer(pm.peers) - if peer == nil { - glog.V(logger.Debug).Infoln("Sync attempt cancelled. No peers available") - } - - itimer.Stop() - go pm.synchronise(peer) - case <-itimer.C: - // The timer will make sure that the downloader keeps an active state - // in which it attempts to always check the network for highest td peers - // Either select the peer or restart the timer if no peers could - // be selected. - if peer := getBestPeer(pm.peers); peer != nil { - go pm.synchronise(peer) - } else { - itimer.Reset(5 * time.Second) - } - case <-pm.quitSync: - break out - } - } -} - -func (pm *ProtocolManager) synchronise(peer *peer) { - // Make sure the peer's TD is higher than our own. If not drop. - if peer.td.Cmp(pm.chainman.Td()) <= 0 { - return - } - // Check downloader if it's busy so it doesn't show the sync message - // for every attempty - if pm.downloader.IsBusy() { - return - } - - glog.V(logger.Info).Infof("Synchronisation attempt using %s TD=%v\n", peer.id, peer.td) - // Get the hashes from the peer (synchronously) - err := pm.downloader.Synchronise(peer.id, peer.recentHash) - if err != nil && err == downloader.ErrBadPeer { - glog.V(logger.Debug).Infoln("removed peer from peer set due to bad action") - pm.removePeer(peer) - } else if err != nil { - // handle error - glog.V(logger.Debug).Infoln("error downloading:", err) - } -} - func (pm *ProtocolManager) Start() { // broadcast transactions pm.txSub = pm.eventMux.Subscribe(core.TxPreEvent{}) @@ -197,18 +110,26 @@ func (pm *ProtocolManager) Start() { pm.minedBlockSub = pm.eventMux.Subscribe(core.NewMinedBlockEvent{}) go pm.minedBroadcastLoop() - // sync handler - go pm.syncHandler() + go pm.update() } func (pm *ProtocolManager) Stop() { + // Showing a log message. During download / process this could actually + // take between 5 to 10 seconds and therefor feedback is required. + glog.V(logger.Info).Infoln("Stopping ethereum protocol handler...") + + pm.quit = true pm.txSub.Unsubscribe() // quits txBroadcastLoop pm.minedBlockSub.Unsubscribe() // quits blockBroadcastLoop close(pm.quitSync) // quits the sync handler + + // Wait for any process action + pm.wg.Wait() + + glog.V(logger.Info).Infoln("Ethereum protocol handler stopped") } func (pm *ProtocolManager) newPeer(pv, nv int, p *p2p.Peer, rw p2p.MsgReadWriter) *peer { - td, current, genesis := pm.chainman.Status() return newPeer(pv, nv, genesis, current, td, p, rw) @@ -359,6 +280,9 @@ func (self *ProtocolManager) handleMsg(p *peer) error { // Add the block hash as a known hash to the peer. This will later be used to determine // who should receive this. p.blockHashes.Add(hash) + // update the peer info + p.recentHash = hash + p.td = request.TD _, chainHead, _ := self.chainman.Status() @@ -383,7 +307,7 @@ func (self *ProtocolManager) handleMsg(p *peer) error { // Attempt to insert the newly received by checking if the parent exists. // if the parent exists we process the block and propagate to our peers - // if the parent does not exists we delegate to the downloader. + // otherwise synchronise with the peer if self.chainman.HasBlock(request.Block.ParentHash()) { if _, err := self.chainman.InsertChain(types.Blocks{request.Block}); err != nil { glog.V(logger.Error).Infoln("removed peer (", p.id, ") due to block error") @@ -400,24 +324,7 @@ func (self *ProtocolManager) handleMsg(p *peer) error { } self.BroadcastBlock(hash, request.Block) } else { - // adding blocks is synchronous - go func() { - err := self.downloader.AddBlock(p.id, request.Block, request.TD) - if err != nil && err == downloader.ErrBadPeer { - glog.V(logger.Error).Infoln("removed peer (", p.id, ") with err:", err) - - self.removePeer(p) - return - } else if err != nil { - glog.V(logger.Detail).Infoln("downloader err:", err) - return - } - if err := self.verifyTd(p, request); err != nil { - glog.V(logger.Error).Infoln(err) - return - } - self.BroadcastBlock(hash, request.Block) - }() + go self.synchronise(p) } default: return errResp(ErrInvalidMsgCode, "%v", msg.Code) diff --git a/eth/sync.go b/eth/sync.go new file mode 100644 index 000000000..bcf4b231f --- /dev/null +++ b/eth/sync.go @@ -0,0 +1,101 @@ +package eth + +import ( + "math" + "time" + + "github.com/ethereum/go-ethereum/eth/downloader" + "github.com/ethereum/go-ethereum/logger" + "github.com/ethereum/go-ethereum/logger/glog" +) + +// Sync contains all synchronisation code for the eth protocol + +func (pm *ProtocolManager) update() { + // itimer is used to determine when to start ignoring `minDesiredPeerCount` + itimer := time.NewTimer(peerCountTimeout) + // btimer is used for picking of blocks from the downloader + btimer := time.NewTicker(blockProcTimer) +out: + for { + select { + case <-pm.newPeerCh: + // Meet the `minDesiredPeerCount` before we select our best peer + if len(pm.peers) < minDesiredPeerCount { + break + } + + // Find the best peer + peer := getBestPeer(pm.peers) + if peer == nil { + glog.V(logger.Debug).Infoln("Sync attempt cancelled. No peers available") + } + + itimer.Stop() + go pm.synchronise(peer) + case <-itimer.C: + // The timer will make sure that the downloader keeps an active state + // in which it attempts to always check the network for highest td peers + // Either select the peer or restart the timer if no peers could + // be selected. + if peer := getBestPeer(pm.peers); peer != nil { + go pm.synchronise(peer) + } else { + itimer.Reset(5 * time.Second) + } + case <-btimer.C: + go pm.processBlocks() + case <-pm.quitSync: + break out + } + } +} + +// processBlocks will attempt to reconstruct a chain by checking the first item and check if it's +// a known parent. The first block in the chain may be unknown during downloading. When the +// downloader isn't downloading blocks will be dropped with an unknown parent until either it +// has depleted the list or found a known parent. +func (pm *ProtocolManager) processBlocks() error { + pm.wg.Add(1) + defer pm.wg.Done() + + blocks := pm.downloader.TakeBlocks() + if len(blocks) == 0 { + return nil + } + defer pm.downloader.Done() + + glog.V(logger.Debug).Infof("Inserting chain with %d blocks (#%v - #%v)\n", len(blocks), blocks[0].Number(), blocks[len(blocks)-1].Number()) + + for len(blocks) != 0 && !pm.quit { + max := int(math.Min(float64(len(blocks)), float64(blockProcAmount))) + _, err := pm.chainman.InsertChain(blocks[:max]) + if err != nil { + return err + } + blocks = blocks[max:] + } + return nil +} + +func (pm *ProtocolManager) synchronise(peer *peer) { + // Make sure the peer's TD is higher than our own. If not drop. + if peer.td.Cmp(pm.chainman.Td()) <= 0 { + return + } + // Check downloader if it's busy so it doesn't show the sync message + // for every attempty + if pm.downloader.IsBusy() { + return + } + + // Get the hashes from the peer (synchronously) + err := pm.downloader.Synchronise(peer.id, peer.recentHash) + if err != nil && err == downloader.ErrBadPeer { + glog.V(logger.Debug).Infoln("removed peer from peer set due to bad action") + pm.removePeer(peer) + } else if err != nil { + // handle error + glog.V(logger.Detail).Infoln("error downloading:", err) + } +} From b298928c49d1af3dc1165b295fe43fc8d0d6061d Mon Sep 17 00:00:00 2001 From: obscuren Date: Fri, 1 May 2015 16:00:30 +0200 Subject: [PATCH 07/15] core: added 'ignored' status --- core/chain_manager.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/chain_manager.go b/core/chain_manager.go index 80fd6cd71..e172c5cdb 100644 --- a/core/chain_manager.go +++ b/core/chain_manager.go @@ -516,7 +516,7 @@ func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { var ( queue = make([]interface{}, len(chain)) queueEvent = queueEvent{queue: queue} - stats struct{ queued, processed int } + stats struct{ queued, processed, ignored int } tstart = time.Now() ) for i, block := range chain { @@ -532,6 +532,7 @@ func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { logs, err := self.processor.Process(block) if err != nil { if IsKnownBlockErr(err) { + stats.ignored++ continue } @@ -624,7 +625,7 @@ func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { if (stats.queued > 0 || stats.processed > 0) && bool(glog.V(logger.Info)) { tend := time.Since(tstart) start, end := chain[0], chain[len(chain)-1] - glog.Infof("imported %d block(s) %d queued in %v. #%v [%x / %x]\n", stats.processed, stats.queued, tend, end.Number(), start.Hash().Bytes()[:4], end.Hash().Bytes()[:4]) + glog.Infof("imported %d block(s) (%d queued %d ignored) in %v. #%v [%x / %x]\n", stats.processed, stats.queued, stats.ignored, tend, end.Number(), start.Hash().Bytes()[:4], end.Hash().Bytes()[:4]) } go self.eventMux.Post(queueEvent) From c6ad3aec05e1c42c3e4a222d1e8306598d5254f3 Mon Sep 17 00:00:00 2001 From: obscuren Date: Fri, 1 May 2015 16:30:02 +0200 Subject: [PATCH 08/15] eth,core: changed NewTicker to Tick --- core/chain_manager.go | 4 ++-- eth/sync.go | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/chain_manager.go b/core/chain_manager.go index e172c5cdb..4dfa67b8d 100644 --- a/core/chain_manager.go +++ b/core/chain_manager.go @@ -669,7 +669,7 @@ func (self *ChainManager) merge(oldBlock, newBlock *types.Block) { func (self *ChainManager) update() { events := self.eventMux.Subscribe(queueEvent{}) - futureTimer := time.NewTicker(5 * time.Second) + futureTimer := time.Tick(5 * time.Second) out: for { select { @@ -696,7 +696,7 @@ out: self.eventMux.Post(event) } } - case <-futureTimer.C: + case <-futureTimer: self.procFutureBlocks() case <-self.quit: break out diff --git a/eth/sync.go b/eth/sync.go index bcf4b231f..16d85d1f1 100644 --- a/eth/sync.go +++ b/eth/sync.go @@ -15,8 +15,8 @@ func (pm *ProtocolManager) update() { // itimer is used to determine when to start ignoring `minDesiredPeerCount` itimer := time.NewTimer(peerCountTimeout) // btimer is used for picking of blocks from the downloader - btimer := time.NewTicker(blockProcTimer) -out: + btimer := time.Tick(blockProcTimer) + for { select { case <-pm.newPeerCh: @@ -43,10 +43,10 @@ out: } else { itimer.Reset(5 * time.Second) } - case <-btimer.C: + case <-btimer: go pm.processBlocks() case <-pm.quitSync: - break out + return } } } From 25bc88113f8d78a265e03be670a4c9ae4ee0bdbe Mon Sep 17 00:00:00 2001 From: obscuren Date: Sun, 3 May 2015 14:09:33 +0200 Subject: [PATCH 09/15] eth/downloader: added additional tests --- eth/downloader/downloader_test.go | 15 ++++++-- eth/downloader/queue_test.go | 62 +++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 eth/downloader/queue_test.go diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index d13818b37..fe68ea914 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -23,12 +23,19 @@ func createHashes(start, amount int) (hashes []common.Hash) { return } +func createBlock(i int, prevHash, hash common.Hash) *types.Block { + header := &types.Header{Number: big.NewInt(int64(i))} + block := types.NewBlockWithHeader(header) + block.HeaderHash = hash + block.ParentHeaderHash = knownHash + return block +} + func createBlocksFromHashes(hashes []common.Hash) map[common.Hash]*types.Block { blocks := make(map[common.Hash]*types.Block) + for i, hash := range hashes { - header := &types.Header{Number: big.NewInt(int64(len(hashes) - i))} - blocks[hash] = types.NewBlockWithHeader(header) - blocks[hash].HeaderHash = hash + blocks[hash] = createBlock(len(hashes)-i, knownHash, hash) } return blocks @@ -162,7 +169,7 @@ func TestTaking(t *testing.T) { t.Error("download error", err) } - bs1 := tester.downloader.TakeBlocks(1000) + bs1 := tester.downloader.TakeBlocks() if len(bs1) != 1000 { t.Error("expected to take 1000, got", len(bs1)) } diff --git a/eth/downloader/queue_test.go b/eth/downloader/queue_test.go new file mode 100644 index 000000000..b163bd9c7 --- /dev/null +++ b/eth/downloader/queue_test.go @@ -0,0 +1,62 @@ +package downloader + +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "gopkg.in/fatih/set.v0" +) + +func createHashSet(hashes []common.Hash) *set.Set { + hset := set.New() + + for _, hash := range hashes { + hset.Add(hash) + } + + return hset +} + +func createBlocksFromHashSet(hashes *set.Set) []*types.Block { + blocks := make([]*types.Block, hashes.Size()) + + var i int + hashes.Each(func(v interface{}) bool { + blocks[i] = createBlock(i, common.Hash{}, v.(common.Hash)) + i++ + return true + }) + + return blocks +} + +func TestChunking(t *testing.T) { + queue := newqueue() + peer1 := newPeer("peer1", common.Hash{}, nil, nil) + peer2 := newPeer("peer2", common.Hash{}, nil, nil) + + // 99 + 1 (1 == known genesis hash) + hashes := createHashes(0, 99) + hashSet := createHashSet(hashes) + queue.put(hashSet) + + chunk1 := queue.get(peer1, 99) + if chunk1 == nil { + t.Errorf("chunk1 is nil") + t.FailNow() + } + chunk2 := queue.get(peer2, 99) + if chunk2 == nil { + t.Errorf("chunk2 is nil") + t.FailNow() + } + + if chunk1.hashes.Size() != 99 { + t.Error("expected chunk1 hashes to be 99, got", chunk1.hashes.Size()) + } + + if chunk2.hashes.Size() != 1 { + t.Error("expected chunk1 hashes to be 1, got", chunk2.hashes.Size()) + } +} From c5b8acbaf0ae662a6b72568d155880d28d1034a1 Mon Sep 17 00:00:00 2001 From: obscuren Date: Sun, 3 May 2015 14:09:50 +0200 Subject: [PATCH 10/15] core: print ignored blocks --- core/chain_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/chain_manager.go b/core/chain_manager.go index 4dfa67b8d..bde5a71d7 100644 --- a/core/chain_manager.go +++ b/core/chain_manager.go @@ -622,7 +622,7 @@ func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { } - if (stats.queued > 0 || stats.processed > 0) && bool(glog.V(logger.Info)) { + if (stats.queued > 0 || stats.processed > 0 || stats.ignored > 0) && bool(glog.V(logger.Info)) { tend := time.Since(tstart) start, end := chain[0], chain[len(chain)-1] glog.Infof("imported %d block(s) (%d queued %d ignored) in %v. #%v [%x / %x]\n", stats.processed, stats.queued, stats.ignored, tend, end.Number(), start.Hash().Bytes()[:4], end.Hash().Bytes()[:4]) From 493181ea09e5ac1e8798e4e97260fa5bff84ea1c Mon Sep 17 00:00:00 2001 From: obscuren Date: Sun, 3 May 2015 14:10:31 +0200 Subject: [PATCH 11/15] eth/downloader: changed has to blockHashes for chain linking --- eth/downloader/downloader.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index c1dc9d3a8..a484ce0a7 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -270,7 +270,7 @@ out: ) hashSet := set.New() for _, hash = range hashes { - if d.hasBlock(hash) || d.queue.has(hash) { + if d.hasBlock(hash) || d.queue.blockHashes.Has(hash) { glog.V(logger.Debug).Infof("Found common hash %x\n", hash[:4]) done = true From f2a2b2ac70632b878fea393f698168b65adbac2f Mon Sep 17 00:00:00 2001 From: obscuren Date: Sun, 3 May 2015 14:11:00 +0200 Subject: [PATCH 12/15] eth/downloader: put back hashes on block overflow error --- eth/downloader/queue.go | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/eth/downloader/queue.go b/eth/downloader/queue.go index 13768229f..1b63a5ffb 100644 --- a/eth/downloader/queue.go +++ b/eth/downloader/queue.go @@ -1,6 +1,7 @@ package downloader import ( + "fmt" "math" "sync" "time" @@ -102,17 +103,6 @@ func (c *queue) has(hash common.Hash) bool { return c.hashPool.Has(hash) || c.fetchPool.Has(hash) || c.blockHashes.Has(hash) } -func (c *queue) addBlock(id string, block *types.Block) { - c.mu.Lock() - defer c.mu.Unlock() - - // when adding a block make sure it doesn't already exist - if !c.blockHashes.Has(block.Hash()) { - c.hashPool.Remove(block.Hash()) - c.blocks = append(c.blocks, block) - } -} - func (c *queue) getBlock(hash common.Hash) *types.Block { c.mu.Lock() defer c.mu.Unlock() @@ -130,7 +120,7 @@ func (c *queue) getBlock(hash common.Hash) *types.Block { } // deliver delivers a chunk to the queue that was requested of the peer -func (c *queue) deliver(id string, blocks []*types.Block) error { +func (c *queue) deliver(id string, blocks []*types.Block) (err error) { c.mu.Lock() defer c.mu.Unlock() @@ -145,25 +135,30 @@ func (c *queue) deliver(id string, blocks []*types.Block) error { chunk.peer.ignored.Merge(chunk.hashes) } + // Add the blocks + for i, block := range blocks { + // See (1) for future limitation + n := int(block.NumberU64()) - c.blockOffset + if n > len(c.blocks) || n < 0 { + // set the error and set the blocks which could be processed + // abort the rest of the blocks (FIXME this could be improved) + err = fmt.Errorf("received block which overflow (N=%v O=%v)", block.Number(), c.blockOffset) + blocks = blocks[:i] + break + } + c.blocks[n] = block + } // seperate the blocks and the hashes blockHashes := chunk.fetchedHashes(blocks) // merge block hashes c.blockHashes.Merge(blockHashes) - // Add the blocks - for _, block := range blocks { - // See (1) for future limitation - n := int(block.NumberU64()) - c.blockOffset - if n > len(c.blocks) || n < 0 { - return errBlockNumberOverflow - } - c.blocks[n] = block - } // Add back whatever couldn't be delivered c.hashPool.Merge(chunk.hashes) + // Remove the hashes from the fetch pool c.fetchPool.Separate(chunk.hashes) } - return nil + return } func (c *queue) alloc(offset, size int) { From ba2236fa513e06603d3fa2a6d721be3879d7f50e Mon Sep 17 00:00:00 2001 From: obscuren Date: Sun, 3 May 2015 14:11:47 +0200 Subject: [PATCH 13/15] cmd/geth, eth: bump version & tmp fix for incorrect TD peers --- cmd/geth/main.go | 2 +- eth/sync.go | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cmd/geth/main.go b/cmd/geth/main.go index ef007051c..0f5ebf5be 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -47,7 +47,7 @@ import _ "net/http/pprof" const ( ClientIdentifier = "Geth" - Version = "0.9.14" + Version = "0.9.15" ) var ( diff --git a/eth/sync.go b/eth/sync.go index 16d85d1f1..9e8b21a7c 100644 --- a/eth/sync.go +++ b/eth/sync.go @@ -89,6 +89,13 @@ func (pm *ProtocolManager) synchronise(peer *peer) { return } + // FIXME if we have the hash in our chain and the TD of the peer is + // much higher than ours, something is wrong with us or the peer. + // Check if the hash is on our own chain + if pm.chainman.HasBlock(peer.recentHash) { + return + } + // Get the hashes from the peer (synchronously) err := pm.downloader.Synchronise(peer.id, peer.recentHash) if err != nil && err == downloader.ErrBadPeer { From 1470b22e9051f48fbbeb136cc4c0be0877e9f9a7 Mon Sep 17 00:00:00 2001 From: obscuren Date: Sun, 3 May 2015 16:09:10 +0200 Subject: [PATCH 14/15] downloader: hash downloading recovery If a peer fails to respond (disconnect, etc) during hash downloading switch to a different peer which has it's current_hash in the queue's peer set. --- eth/downloader/downloader.go | 73 +++++++++++++++++++++++-------- eth/downloader/downloader_test.go | 27 +++++++----- 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index a484ce0a7..15f4cb0a3 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -53,6 +53,11 @@ type syncPack struct { ignoreInitial bool } +type hashPack struct { + peerId string + hashes []common.Hash +} + type Downloader struct { mu sync.RWMutex queue *queue @@ -69,7 +74,7 @@ type Downloader struct { // Channels newPeerCh chan *peer - hashCh chan []common.Hash + hashCh chan hashPack blockCh chan blockPack } @@ -80,7 +85,7 @@ func New(hasBlock hashCheckFn, getBlock getBlockFn) *Downloader { hasBlock: hasBlock, getBlock: getBlock, newPeerCh: make(chan *peer, 1), - hashCh: make(chan []common.Hash, 1), + hashCh: make(chan hashPack, 1), blockCh: make(chan blockPack, 1), } @@ -235,15 +240,15 @@ func (d *Downloader) getFromPeer(p *peer, hash common.Hash, ignoreInitial bool) } // XXX Make synchronous -func (d *Downloader) startFetchingHashes(p *peer, hash common.Hash, ignoreInitial bool) error { +func (d *Downloader) startFetchingHashes(p *peer, h common.Hash, ignoreInitial bool) error { atomic.StoreInt32(&d.fetchingHashes, 1) defer atomic.StoreInt32(&d.fetchingHashes, 0) - if d.queue.has(hash) { + if d.queue.has(h) { return errAlreadyInPool } - glog.V(logger.Debug).Infof("Downloading hashes (%x) from %s", hash.Bytes()[:4], p.id) + glog.V(logger.Debug).Infof("Downloading hashes (%x) from %s", h[:4], p.id) start := time.Now() @@ -251,22 +256,34 @@ func (d *Downloader) startFetchingHashes(p *peer, hash common.Hash, ignoreInitia // In such circumstances we don't need to download the block so don't add it to the queue. if !ignoreInitial { // Add the hash to the queue first - d.queue.hashPool.Add(hash) + d.queue.hashPool.Add(h) } // Get the first batch of hashes - p.getHashes(hash) + p.getHashes(h) - failureResponseTimer := time.NewTimer(hashTtl) + var ( + failureResponseTimer = time.NewTimer(hashTtl) + attemptedPeers = make(map[string]bool) // attempted peers will help with retries + activePeer = p // active peer will help determine the current active peer + hash common.Hash // common and last hash + ) + attemptedPeers[p.id] = true out: for { select { - case hashes := <-d.hashCh: + case hashPack := <-d.hashCh: + // make sure the active peer is giving us the hashes + if hashPack.peerId != activePeer.id { + glog.V(logger.Debug).Infof("Received hashes from incorrect peer(%s)\n", hashPack.peerId) + break + } + failureResponseTimer.Reset(hashTtl) var ( - done bool // determines whether we're done fetching hashes (i.e. common hash found) - hash common.Hash // current and common hash + hashes = hashPack.hashes + done bool // determines whether we're done fetching hashes (i.e. common hash found) ) hashSet := set.New() for _, hash = range hashes { @@ -283,13 +300,13 @@ out: // Add hashes to the chunk set if len(hashes) == 0 { // Make sure the peer actually gave you something valid - glog.V(logger.Debug).Infof("Peer (%s) responded with empty hash set\n", p.id) + glog.V(logger.Debug).Infof("Peer (%s) responded with empty hash set\n", activePeer.id) d.queue.reset() return errEmptyHashSet } else if !done { // Check if we're done fetching // Get the next set of hashes - p.getHashes(hashes[len(hashes)-1]) + activePeer.getHashes(hash) } else { // we're done // The offset of the queue is determined by the highest known block var offset int @@ -303,12 +320,30 @@ out: } case <-failureResponseTimer.C: glog.V(logger.Debug).Infof("Peer (%s) didn't respond in time for hash request\n", p.id) - // TODO instead of reseting the queue select a new peer from which we can start downloading hashes. - // 1. check for peer's best hash to be included in the current hash set; - // 2. resume from last point (hashes[len(hashes)-1]) using the newly selected peer. - d.queue.reset() - return errTimeout + var p *peer // p will be set if a peer can be found + // Attempt to find a new peer by checking inclusion of peers best hash in our + // already fetched hash list. This can't guarantee 100% correctness but does + // a fair job. This is always either correct or false incorrect. + for id, peer := range d.peers { + if d.queue.hashPool.Has(peer.recentHash) && !attemptedPeers[id] { + p = peer + break + } + } + + // if all peers have been tried, abort the process entirely or if the hash is + // the zero hash. + if p == nil || (hash == common.Hash{}) { + d.queue.reset() + return errTimeout + } + + // set p to the active peer. this will invalidate any hashes that may be returned + // by our previous (delayed) peer. + activePeer = p + p.getHashes(hash) + glog.V(logger.Debug).Infof("Hash fetching switched to new peer(%s)\n", p.id) } } glog.V(logger.Detail).Infof("Downloaded hashes (%d) in %v\n", d.queue.hashPool.Size(), time.Since(start)) @@ -454,7 +489,7 @@ func (d *Downloader) AddHashes(id string, hashes []common.Hash) error { glog.Infof("adding %d (T=%d) hashes [ %x / %x ] from: %s\n", len(hashes), d.queue.hashPool.Size(), from[:4], to[:4], id) } - d.hashCh <- hashes + d.hashCh <- hashPack{id, hashes} return nil } diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index fe68ea914..872ea02eb 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -42,12 +42,13 @@ func createBlocksFromHashes(hashes []common.Hash) map[common.Hash]*types.Block { } type downloadTester struct { - downloader *Downloader - hashes []common.Hash - blocks map[common.Hash]*types.Block - t *testing.T - pcount int - done chan bool + downloader *Downloader + hashes []common.Hash + blocks map[common.Hash]*types.Block + t *testing.T + pcount int + done chan bool + activePeerId string } func newTester(t *testing.T, hashes []common.Hash, blocks map[common.Hash]*types.Block) *downloadTester { @@ -58,6 +59,11 @@ func newTester(t *testing.T, hashes []common.Hash, blocks map[common.Hash]*types return tester } +func (dl *downloadTester) sync(peerId string, hash common.Hash) error { + dl.activePeerId = peerId + return dl.downloader.Synchronise(peerId, hash) +} + func (dl *downloadTester) hasBlock(hash common.Hash) bool { if knownHash == hash { return true @@ -70,7 +76,7 @@ func (dl *downloadTester) getBlock(hash common.Hash) *types.Block { } func (dl *downloadTester) getHashes(hash common.Hash) error { - dl.downloader.hashCh <- dl.hashes + dl.downloader.AddHashes(dl.activePeerId, dl.hashes) return nil } @@ -115,8 +121,9 @@ func TestDownload(t *testing.T) { tester.newPeer("peer2", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer3", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer4", big.NewInt(0), common.Hash{}) + tester.activePeerId = "peer1" - err := tester.downloader.Synchronise("peer1", hashes[0]) + err := tester.sync("peer1", hashes[0]) if err != nil { t.Error("download error", err) } @@ -139,7 +146,7 @@ func TestMissing(t *testing.T) { hashes = append(extraHashes, hashes[:len(hashes)-1]...) tester.newPeer("peer2", big.NewInt(0), common.Hash{}) - err := tester.downloader.Synchronise("peer1", hashes[0]) + err := tester.sync("peer1", hashes[0]) if err != nil { t.Error("download error", err) } @@ -164,7 +171,7 @@ func TestTaking(t *testing.T) { tester.badBlocksPeer("peer3", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer4", big.NewInt(0), common.Hash{}) - err := tester.downloader.Synchronise("peer1", hashes[0]) + err := tester.sync("peer1", hashes[0]) if err != nil { t.Error("download error", err) } From 37770ed0d37f9c8ece37fecd443e7c70e915a48a Mon Sep 17 00:00:00 2001 From: obscuren Date: Sun, 3 May 2015 21:44:44 +0200 Subject: [PATCH 15/15] core: added unix timestamp to debug output for block proc --- core/chain_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/chain_manager.go b/core/chain_manager.go index bde5a71d7..29830188e 100644 --- a/core/chain_manager.go +++ b/core/chain_manager.go @@ -604,7 +604,7 @@ func (self *ChainManager) InsertChain(chain types.Blocks) (int, error) { queueEvent.canonicalCount++ if glog.V(logger.Debug) { - glog.Infof("inserted block #%d (%d TXs %d UNCs) (%x...)\n", block.Number(), len(block.Transactions()), len(block.Uncles()), block.Hash().Bytes()[0:4]) + glog.Infof("[%v] inserted block #%d (%d TXs %d UNCs) (%x...)\n", time.Now().UnixNano(), block.Number(), len(block.Transactions()), len(block.Uncles()), block.Hash().Bytes()[0:4]) } } else { if glog.V(logger.Detail) {