op-geth/swarm/network/stream/visualized_snapshot_sync_si...

343 lines
9.8 KiB
Go
Raw Normal View History

// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build withserver
package stream
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"sync"
"testing"
"time"
"github.com/ethereum/go-ethereum/node"
"github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/ethereum/go-ethereum/p2p/protocols"
"github.com/ethereum/go-ethereum/p2p/simulations"
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/swarm/log"
"github.com/ethereum/go-ethereum/swarm/network/simulation"
"github.com/ethereum/go-ethereum/swarm/state"
"github.com/ethereum/go-ethereum/swarm/storage"
)
/*
The tests in this file need to be executed with
-tags=withserver
Also, they will stall if executed stand-alone, because they wait
for the visualization frontend to send a POST /runsim message.
*/
//setup the sim, evaluate nodeCount and chunkCount and create the sim
func setupSim(serviceMap map[string]simulation.ServiceFunc) (int, int, *simulation.Simulation) {
nodeCount := *nodes
chunkCount := *chunks
if nodeCount == 0 || chunkCount == 0 {
nodeCount = 32
chunkCount = 1
}
//setup the simulation with server, which means the sim won't run
//until it receives a POST /runsim from the frontend
sim := simulation.New(serviceMap).WithServer(":8888")
return nodeCount, chunkCount, sim
}
//This test requests bogus hashes into the network
func TestNonExistingHashesWithServer(t *testing.T) {
nodeCount, _, sim := setupSim(retrievalSimServiceMap)
defer sim.Close()
err := sim.UploadSnapshot(fmt.Sprintf("testing/snapshot_%d.json", nodeCount))
if err != nil {
panic(err)
}
//in order to get some meaningful visualization, it is beneficial
//to define a minimum duration of this test
testDuration := 20 * time.Second
swarm: fix network/stream data races (#19051) * swarm/network/stream: newStreamerTester cleanup only if err is nil * swarm/network/stream: raise newStreamerTester waitForPeers timeout * swarm/network/stream: fix data races in GetPeerSubscriptions * swarm/storage: prevent data race on LDBStore.batchesC https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461775049 * swarm/network/stream: fix TestGetSubscriptionsRPC data race https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461768477 * swarm/network/stream: correctly use Simulation.Run callback https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461783804 * swarm/network: protect addrCountC in Kademlia.AddrCountC function https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-462273444 * p2p/simulations: fix a deadlock calling getRandomNode with lock https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-462317407 * swarm/network/stream: terminate disconnect goruotines in tests * swarm/network/stream: reduce memory consumption when testing data races * swarm/network/stream: add watchDisconnections helper function * swarm/network/stream: add concurrent counter for tests * swarm/network/stream: rename race/norace test files and use const * swarm/network/stream: remove watchSim and its panic * swarm/network/stream: pass context in watchDisconnections * swarm/network/stream: add concurrent safe bool for watchDisconnections * swarm/storage: fix LDBStore.batchesC data race by not closing it
2019-02-13 12:03:23 +00:00
result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) (err error) {
disconnected := watchDisconnections(ctx, sim)
defer func() {
if err != nil {
if yes, ok := disconnected.Load().(bool); ok && yes {
err = errors.New("disconnect events received")
}
}
}()
//check on the node's FileStore (netstore)
id := sim.Net.GetRandomUpNode().ID()
item, ok := sim.NodeItem(id, bucketKeyFileStore)
if !ok {
swarm: fix network/stream data races (#19051) * swarm/network/stream: newStreamerTester cleanup only if err is nil * swarm/network/stream: raise newStreamerTester waitForPeers timeout * swarm/network/stream: fix data races in GetPeerSubscriptions * swarm/storage: prevent data race on LDBStore.batchesC https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461775049 * swarm/network/stream: fix TestGetSubscriptionsRPC data race https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461768477 * swarm/network/stream: correctly use Simulation.Run callback https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461783804 * swarm/network: protect addrCountC in Kademlia.AddrCountC function https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-462273444 * p2p/simulations: fix a deadlock calling getRandomNode with lock https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-462317407 * swarm/network/stream: terminate disconnect goruotines in tests * swarm/network/stream: reduce memory consumption when testing data races * swarm/network/stream: add watchDisconnections helper function * swarm/network/stream: add concurrent counter for tests * swarm/network/stream: rename race/norace test files and use const * swarm/network/stream: remove watchSim and its panic * swarm/network/stream: pass context in watchDisconnections * swarm/network/stream: add concurrent safe bool for watchDisconnections * swarm/storage: fix LDBStore.batchesC data race by not closing it
2019-02-13 12:03:23 +00:00
return errors.New("No filestore")
}
fileStore := item.(*storage.FileStore)
//create a bogus hash
fakeHash := storage.GenerateRandomChunk(1000).Address()
//try to retrieve it - will propagate RetrieveRequestMsg into the network
reader, _ := fileStore.Retrieve(context.TODO(), fakeHash)
if _, err := reader.Size(ctx, nil); err != nil {
log.Debug("expected error for non-existing chunk")
}
//sleep so that the frontend can have something to display
time.Sleep(testDuration)
return nil
})
if result.Error != nil {
sendSimTerminatedEvent(sim)
t.Fatal(result.Error)
}
sendSimTerminatedEvent(sim)
}
//send a termination event to the frontend
func sendSimTerminatedEvent(sim *simulation.Simulation) {
evt := &simulations.Event{
Type: EventTypeSimTerminated,
Control: false,
}
sim.Net.Events().Send(evt)
}
//This test is the same as the snapshot sync test,
//but with a HTTP server
//It also sends some custom events so that the frontend
//can visualize messages like SendOfferedMsg, WantedHashesMsg, DeliveryMsg
func TestSnapshotSyncWithServer(t *testing.T) {
//define a wrapper object to be able to pass around data
wrapper := &netWrapper{}
sim := simulation.New(map[string]simulation.ServiceFunc{
"streamer": func(ctx *adapters.ServiceContext, bucket *sync.Map) (s node.Service, cleanup func(), err error) {
addr, netStore, delivery, clean, err := newNetStoreAndDeliveryWithRequestFunc(ctx, bucket, dummyRequestFromPeers)
if err != nil {
return nil, nil, err
}
r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{
Retrieval: RetrievalDisabled,
Syncing: SyncingAutoSubscribe,
SyncUpdateDelay: 3 * time.Second,
}, nil)
tr := &testRegistry{
Registry: r,
w: wrapper,
}
bucket.Store(bucketKeyRegistry, tr)
cleanup = func() {
tr.Close()
clean()
}
return tr, cleanup, nil
},
}).WithServer(":8888") //start with the HTTP server
swarm/network: Revised depth and health for Kademlia (#18354) * swarm/network: Revised depth calculation with tests * swarm/network: WIP remove redundant "full" function * swarm/network: WIP peerpot refactor * swarm/network: Make test methods submethod of peerpot and embed kad * swarm/network: Remove commented out code * swarm/network: Rename health test functions * swarm/network: Too many n's * swarm/network: Change hive Healthy func to accept addresses * swarm/network: Add Healthy proxy method for api in hive * swarm/network: Skip failing test out of scope for PR * swarm/network: Skip all tests dependent on SuggestPeers * swarm/network: Remove commented code and useless kad Pof member * swarm/network: Remove more unused code, add counter on depth test errors * swarm/network: WIP Create Healthy assertion tests * swarm/network: Roll back health related methods receiver change * swarm/network: Hardwire network minproxbinsize in swarm sim * swarm/network: Rework Health test to strict Pending add test for saturation And add test for as many as possible up to saturation * swarm/network: Skip discovery tests (dependent on SuggestPeer) * swarm/network: Remove useless minProxBinSize in stream * swarm/network: Remove unnecessary testing.T param to assert health * swarm/network: Implement t.Helper() in checkHealth * swarm/network: Rename check back to assert now that we have helper magic * swarm/network: Revert WaitTillHealthy change (deferred to nxt PR) * swarm/network: Kademlia tests GotNN => ConnectNN * swarm/network: Renames and comments * swarm/network: Add comments
2018-12-22 05:53:30 +00:00
nodeCount, chunkCount, sim := setupSim(simServiceMap)
defer sim.Close()
Ci race detector handle failing tests (#19143) * swarm/storage: increase mget timeout in common_test.go TestDbStoreCorrect_1k sometimes timed out with -race on Travis. --- FAIL: TestDbStoreCorrect_1k (24.63s) common_test.go:194: testStore failed: timed out after 10s * swarm: remove unused vars from TestSnapshotSyncWithServer nodeCount and chunkCount is returned from setupSim and those values we use. * swarm: move race/norace helpers from stream to testutil As we will need to use the flag in other packages, too. * swarm: refactor TestSwarmNetwork case Extract long running test cases for better visibility. * swarm/network: skip TestSyncingViaGlobalSync with -race As panics on Travis. panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x7e351b] * swarm: run TestSwarmNetwork with fewer nodes with -race As otherwise we always get test failure with `network_test.go:374: context deadline exceeded` even with raised `Timeout`. * swarm/network: run TestDeliveryFromNodes with fewer nodes with -race Test on Travis times out with 8 or more nodes if -race flag is present. * swarm/network: smaller node count for discovery tests with -race TestDiscoveryPersistenceSimulationSimAdapters failed on Travis with `-race` flag present. The failure was due to extensive memory usage, coming from the CGO runtime. Using a smaller node count resolves the issue. === RUN TestDiscoveryPersistenceSimulationSimAdapter ==7227==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12) FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0) FAIL github.com/ethereum/go-ethereum/swarm/network/simulations/discovery 804.826s * swarm/network: run TestFileRetrieval with fewer nodes with -race Otherwise we get a failure due to extensive memory usage, as the CGO runtime cannot allocate more bytes. === RUN TestFileRetrieval ==7366==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12) FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0) FAIL github.com/ethereum/go-ethereum/swarm/network/stream 155.165s * swarm/network: run TestRetrieval with fewer nodes with -race Otherwise we get a failure due to extensive memory usage, as the CGO runtime cannot allocate more bytes ("ThreadSanitizer failed to allocate"). * swarm/network: skip flaky TestGetSubscriptionsRPC on Travis w/ -race Test fails a lot with something like: streamer_test.go:1332: Real subscriptions and expected amount don't match; real: 0, expected: 20 * swarm/storage: skip TestDB_SubscribePull* tests on Travis w/ -race Travis just hangs... ok github.com/ethereum/go-ethereum/swarm/storage/feed/lookup 1.307s keepalive keepalive keepalive or panics after a while. Without these tests the race detector job is now stable. Let's invetigate these tests in a separate issue: https://github.com/ethersphere/go-ethereum/issues/1245
2019-02-20 21:57:42 +00:00
log.Info(fmt.Sprintf("Running the simulation with %d nodes and %d chunks", nodeCount, chunkCount))
log.Info("Initializing test config")
conf := &synctestConfig{}
//map of discover ID to indexes of chunks expected at that ID
conf.idToChunksMap = make(map[enode.ID][]int)
//map of overlay address to discover ID
conf.addrToIDMap = make(map[string]enode.ID)
//array where the generated chunk hashes will be stored
conf.hashes = make([]storage.Address, 0)
//pass the network to the wrapper object
wrapper.setNetwork(sim.Net)
err := sim.UploadSnapshot(fmt.Sprintf("testing/snapshot_%d.json", nodeCount))
if err != nil {
panic(err)
}
//run the sim
result := runSim(conf, ctx, sim, chunkCount)
//send terminated event
evt := &simulations.Event{
Type: EventTypeSimTerminated,
Control: false,
}
go sim.Net.Events().Send(evt)
if result.Error != nil {
panic(result.Error)
}
log.Info("Simulation ended")
}
//testRegistry embeds registry
//it allows to replace the protocol run function
type testRegistry struct {
*Registry
w *netWrapper
}
//Protocols replaces the protocol's run function
func (tr *testRegistry) Protocols() []p2p.Protocol {
regProto := tr.Registry.Protocols()
//set the `stream` protocol's run function with the testRegistry's one
regProto[0].Run = tr.runProto
return regProto
}
//runProto is the new overwritten protocol's run function for this test
func (tr *testRegistry) runProto(p *p2p.Peer, rw p2p.MsgReadWriter) error {
//create a custom rw message ReadWriter
testRw := &testMsgReadWriter{
MsgReadWriter: rw,
Peer: p,
w: tr.w,
Registry: tr.Registry,
}
//now run the actual upper layer `Registry`'s protocol function
return tr.runProtocol(p, testRw)
}
//testMsgReadWriter is a custom rw
//it will allow us to re-use the message twice
type testMsgReadWriter struct {
*Registry
p2p.MsgReadWriter
*p2p.Peer
w *netWrapper
}
//netWrapper wrapper object so we can pass data around
type netWrapper struct {
net *simulations.Network
}
//set the network to the wrapper for later use (used inside the custom rw)
func (w *netWrapper) setNetwork(n *simulations.Network) {
w.net = n
}
//get he network from the wrapper (used inside the custom rw)
func (w *netWrapper) getNetwork() *simulations.Network {
return w.net
}
// ReadMsg reads a message from the underlying MsgReadWriter and emits a
// "message received" event
//we do this because we are interested in the Payload of the message for custom use
//in this test, but messages can only be consumed once (stream io.Reader)
func (ev *testMsgReadWriter) ReadMsg() (p2p.Msg, error) {
//read the message from the underlying rw
msg, err := ev.MsgReadWriter.ReadMsg()
if err != nil {
return msg, err
}
//don't do anything with message codes we actually are not needing/reading
subCodes := []uint64{1, 2, 10}
found := false
for _, c := range subCodes {
if c == msg.Code {
found = true
}
}
//just return if not a msg code we are interested in
if !found {
return msg, nil
}
//we use a io.TeeReader so that we can read the message twice
//the Payload is a io.Reader, so if we read from it, the actual protocol handler
//cannot access it anymore.
//But we need that handler to be able to consume the message as normal,
//as if we would not do anything here with that message
var buf bytes.Buffer
tee := io.TeeReader(msg.Payload, &buf)
mcp := &p2p.Msg{
Code: msg.Code,
Size: msg.Size,
ReceivedAt: msg.ReceivedAt,
Payload: tee,
}
//assign the copy for later use
msg.Payload = &buf
//now let's look into the message
var wmsg protocols.WrappedMsg
err = mcp.Decode(&wmsg)
if err != nil {
log.Error(err.Error())
return msg, err
}
//create a new message from the code
val, ok := ev.Registry.GetSpec().NewMsg(mcp.Code)
if !ok {
return msg, errors.New(fmt.Sprintf("Invalid message code: %v", msg.Code))
}
//decode it
if err := rlp.DecodeBytes(wmsg.Payload, val); err != nil {
return msg, errors.New(fmt.Sprintf("Decoding error <= %v: %v", msg, err))
}
//now for every message type we are interested in, create a custom event and send it
var evt *simulations.Event
switch val := val.(type) {
case *OfferedHashesMsg:
evt = &simulations.Event{
Type: EventTypeChunkOffered,
Node: ev.w.getNetwork().GetNode(ev.ID()),
Control: false,
Data: val.Hashes,
}
case *WantedHashesMsg:
evt = &simulations.Event{
Type: EventTypeChunkWanted,
Node: ev.w.getNetwork().GetNode(ev.ID()),
Control: false,
}
case *ChunkDeliveryMsgSyncing:
evt = &simulations.Event{
Type: EventTypeChunkDelivered,
Node: ev.w.getNetwork().GetNode(ev.ID()),
Control: false,
Data: val.Addr.String(),
}
}
if evt != nil {
//send custom event to feed; frontend will listen to it and display
ev.w.getNetwork().Events().Send(evt)
}
return msg, nil
}