## Nim-LibP2P ## Copyright (c) 2019 Status Research & Development GmbH ## Licensed under either of ## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) ## * MIT license ([LICENSE-MIT](LICENSE-MIT)) ## at your option. ## This file may not be copied, modified, or distributed except according to ## those terms. import std/[tables, sets, options, sequtils, random, algorithm] import chronos, chronicles, metrics import ./pubsub, ./floodsub, ./pubsubpeer, ./peertable, ./mcache, ./timedcache, ./rpc/[messages, message], ../protocol, ../../stream/connection, ../../peerinfo, ../../peerid, ../../utility import stew/results export results logScope: topics = "gossipsub" const GossipSubCodec* = "/meshsub/1.1.0" GossipSubCodec_10* = "/meshsub/1.0.0" # overlay parameters const GossipSubD* = 6 GossipSubDlo* = 4 GossipSubDhi* = 12 # gossip parameters const GossipSubHistoryLength* = 5 GossipSubHistoryGossip* = 3 GossipBackoffPeriod* = 1.minutes # heartbeat interval const GossipSubHeartbeatInitialDelay* = 100.millis GossipSubHeartbeatInterval* = 1.seconds # fanout ttl const GossipSubFanoutTTL* = 1.minutes const BackoffSlackTime = 2 # seconds IWantPeerBudget = 25 # 25 messages per second ( reset every heartbeat ) IHavePeerBudget = 10 type TopicInfo* = object # gossip 1.1 related graftTime: Moment meshTime: Duration inMesh: bool meshMessageDeliveriesActive: bool firstMessageDeliveries: float64 meshMessageDeliveries: float64 meshFailurePenalty: float64 invalidMessageDeliveries: float64 TopicParams* = object topicWeight*: float64 # p1 timeInMeshWeight*: float64 timeInMeshQuantum*: Duration timeInMeshCap*: float64 # p2 firstMessageDeliveriesWeight*: float64 firstMessageDeliveriesDecay*: float64 firstMessageDeliveriesCap*: float64 # p3 meshMessageDeliveriesWeight*: float64 meshMessageDeliveriesDecay*: float64 meshMessageDeliveriesThreshold*: float64 meshMessageDeliveriesCap*: float64 meshMessageDeliveriesActivation*: Duration meshMessageDeliveriesWindow*: Duration # p3b meshFailurePenaltyWeight*: float64 meshFailurePenaltyDecay*: float64 # p4 invalidMessageDeliveriesWeight*: float64 invalidMessageDeliveriesDecay*: float64 PeerStats* = object topicInfos*: Table[string, TopicInfo] expire*: Moment # updated on disconnect, to retain scores until expire GossipSubParams* = object explicit: bool pruneBackoff*: Duration floodPublish*: bool gossipFactor*: float64 dScore*: int dOut*: int dLazy*: int gossipThreshold*: float64 publishThreshold*: float64 graylistThreshold*: float64 acceptPXThreshold*: float64 opportunisticGraftThreshold*: float64 decayInterval*: Duration decayToZero*: float64 retainScore*: Duration appSpecificWeight*: float64 ipColocationFactorWeight*: float64 ipColocationFactorThreshold*: float64 behaviourPenaltyWeight*: float64 behaviourPenaltyDecay*: float64 directPeers*: seq[PeerId] GossipSub* = ref object of FloodSub mesh*: PeerTable # peers that we send messages to when we are subscribed to the topic fanout*: PeerTable # peers that we send messages to when we're not subscribed to the topic gossipsub*: PeerTable # peers that are subscribed to a topic explicit*: PeerTable # directpeers that we keep alive explicitly backingOff*: Table[PeerID, Moment] # explicit (always connected/forward) peers lastFanoutPubSub*: Table[string, Moment] # last publish time for fanout topics gossip*: Table[string, seq[ControlIHave]] # pending gossip control*: Table[string, ControlMessage] # pending control messages mcache*: MCache # messages cache heartbeatFut: Future[void] # cancellation future for heartbeat interval heartbeatRunning: bool peerStats: Table[PubSubPeer, PeerStats] parameters*: GossipSubParams topicParams*: Table[string, TopicParams] directPeersLoop: Future[void] peersInIP: Table[MultiAddress, HashSet[PubSubPeer]] heartbeatEvents*: seq[AsyncEvent] when not defined(release): prunedPeers: HashSet[PubSubPeer] when defined(libp2p_expensive_metrics): declareGauge(libp2p_gossipsub_peers_per_topic_mesh, "gossipsub peers per topic in mesh", labels = ["topic"]) declareGauge(libp2p_gossipsub_peers_per_topic_fanout, "gossipsub peers per topic in fanout", labels = ["topic"]) declareGauge(libp2p_gossipsub_peers_per_topic_gossipsub, "gossipsub peers per topic in gossipsub", labels = ["topic"]) proc init*(_: type[GossipSubParams]): GossipSubParams = GossipSubParams( explicit: true, pruneBackoff: 1.minutes, floodPublish: true, gossipFactor: 0.25, dScore: 4, dOut: GossipSubDlo - 1, dLazy: GossipSubD, gossipThreshold: -10, publishThreshold: -100, graylistThreshold: -10000, opportunisticGraftThreshold: 1, decayInterval: 1.seconds, decayToZero: 0.01, retainScore: 10.seconds, appSpecificWeight: 1.0, ipColocationFactorWeight: 0.0, ipColocationFactorThreshold: 1.0, behaviourPenaltyWeight: -1.0, behaviourPenaltyDecay: 0.999, ) proc validateParameters*(parameters: GossipSubParams): Result[void, cstring] = if (parameters.dOut >= GossipSubDlo) or (parameters.dOut > (GossipSubD div 2)): err("gossipsub: dOut parameter error, Number of outbound connections to keep in the mesh. Must be less than D_lo and at most D/2") elif parameters.gossipThreshold >= 0: err("gossipsub: gossipThreshold parameter error, Must be < 0") elif parameters.publishThreshold >= parameters.gossipThreshold: err("gossipsub: publishThreshold parameter error, Must be < gossipThreshold") elif parameters.graylistThreshold >= parameters.publishThreshold: err("gossipsub: graylistThreshold parameter error, Must be < publishThreshold") elif parameters.acceptPXThreshold < 0: err("gossipsub: acceptPXThreshold parameter error, Must be >= 0") elif parameters.opportunisticGraftThreshold < 0: err("gossipsub: opportunisticGraftThreshold parameter error, Must be >= 0") elif parameters.decayToZero > 0.5 or parameters.decayToZero <= 0.0: err("gossipsub: decayToZero parameter error, Should be close to 0.0") elif parameters.appSpecificWeight < 0: err("gossipsub: appSpecificWeight parameter error, Must be positive") elif parameters.ipColocationFactorWeight > 0: err("gossipsub: ipColocationFactorWeight parameter error, Must be negative or 0") elif parameters.ipColocationFactorThreshold < 1.0: err("gossipsub: ipColocationFactorThreshold parameter error, Must be at least 1") elif parameters.behaviourPenaltyWeight >= 0: err("gossipsub: behaviourPenaltyWeight parameter error, Must be negative") elif parameters.behaviourPenaltyDecay < 0 or parameters.behaviourPenaltyDecay >= 1: err("gossipsub: behaviourPenaltyDecay parameter error, Must be between 0 and 1") else: ok() proc init*(_: type[TopicParams]): TopicParams = TopicParams( topicWeight: 1.0, timeInMeshWeight: 0.01, timeInMeshQuantum: 1.seconds, timeInMeshCap: 10.0, firstMessageDeliveriesWeight: 1.0, firstMessageDeliveriesDecay: 0.5, firstMessageDeliveriesCap: 10.0, meshMessageDeliveriesWeight: -1.0, meshMessageDeliveriesDecay: 0.5, meshMessageDeliveriesCap: 10, meshMessageDeliveriesThreshold: 1, meshMessageDeliveriesWindow: 5.milliseconds, meshMessageDeliveriesActivation: 10.seconds, meshFailurePenaltyWeight: -1.0, meshFailurePenaltyDecay: 0.5, invalidMessageDeliveriesWeight: -1.0, invalidMessageDeliveriesDecay: 0.5 ) proc validateParameters*(parameters: TopicParams): Result[void, cstring] = if parameters.timeInMeshWeight <= 0.0 or parameters.timeInMeshWeight > 1.0: err("gossipsub: timeInMeshWeight parameter error, Must be a small positive value") elif parameters.timeInMeshCap <= 0.0: err("gossipsub: timeInMeshCap parameter error, Should be a positive value") elif parameters.firstMessageDeliveriesWeight <= 0.0: err("gossipsub: firstMessageDeliveriesWeight parameter error, Should be a positive value") elif parameters.meshMessageDeliveriesWeight >= 0.0: err("gossipsub: meshMessageDeliveriesWeight parameter error, Should be a negative value") elif parameters.meshMessageDeliveriesThreshold <= 0.0: err("gossipsub: meshMessageDeliveriesThreshold parameter error, Should be a positive value") elif parameters.meshMessageDeliveriesCap < parameters.meshMessageDeliveriesThreshold: err("gossipsub: meshMessageDeliveriesCap parameter error, Should be >= meshMessageDeliveriesThreshold") elif parameters.meshMessageDeliveriesWindow > 100.milliseconds: err("gossipsub: meshMessageDeliveriesWindow parameter error, Should be small, 1-5ms") elif parameters.meshFailurePenaltyWeight >= 0.0: err("gossipsub: meshFailurePenaltyWeight parameter error, Should be a negative value") elif parameters.invalidMessageDeliveriesWeight >= 0.0: err("gossipsub: invalidMessageDeliveriesWeight parameter error, Should be a negative value") else: ok() method init*(g: GossipSub) = proc handler(conn: Connection, proto: string) {.async.} = ## main protocol handler that gets triggered on every ## connection for a protocol string ## e.g. ``/floodsub/1.0.0``, etc... ## try: await g.handleConn(conn, proto) except CancelledError: # This is top-level procedure which will work as separate task, so it # do not need to propogate CancelledError. trace "Unexpected cancellation in gossipsub handler", conn except CatchableError as exc: trace "GossipSub handler leaks an error", exc = exc.msg, conn g.handler = handler g.codecs &= GossipSubCodec g.codecs &= GossipSubCodec_10 method onNewPeer(g: GossipSub, peer: PubSubPeer) = if peer notin g.peerStats: # new peer g.peerStats[peer] = PeerStats() peer.iWantBudget = IWantPeerBudget peer.iHaveBudget = IHavePeerBudget return else: # we knew this peer discard proc grafted(g: GossipSub, p: PubSubPeer, topic: string) = g.peerStats.withValue(p, stats) do: var info = stats.topicInfos.getOrDefault(topic) info.graftTime = Moment.now() info.meshTime = 0.seconds info.inMesh = true info.meshMessageDeliveriesActive = false # mgetOrPut does not work, so we gotta do this without referencing stats.topicInfos[topic] = info assert(g.peerStats[p].topicInfos[topic].inMesh == true) trace "grafted", p do: doAssert(false, "grafted: peerStats key not found for " & $p) proc pruned(g: GossipSub, p: PubSubPeer, topic: string) = g.peerStats.withValue(p, stats) do: when not defined(release): g.prunedPeers.incl(p) var info = stats.topicInfos[topic] let topicParams = g.topicParams.mgetOrPut(topic, TopicParams.init()) # penalize a peer that delivered no message let threshold = topicParams.meshMessageDeliveriesThreshold if info.inMesh and info.meshMessageDeliveriesActive and info.meshMessageDeliveries < threshold: let deficit = threshold - info.meshMessageDeliveries info.meshFailurePenalty += deficit * deficit info.inMesh = false # mgetOrPut does not work, so we gotta do this without referencing stats.topicInfos[topic] = info trace "pruned", p do: when not defined(release): if p in g.prunedPeers: doAssert(false, "pruned: Dupe prune " & $p) else: doAssert(false, "pruned: TopicInfo key not found for " & $p) else: doAssert(false, "pruned: TopicInfo key not found for " & $p) proc peerExchangeList(g: GossipSub, topic: string): seq[PeerInfoMsg] = var peers = g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()).toSeq() peers.keepIf do (x: PubSubPeer) -> bool: x.score >= 0.0 # by spec, larger then Dhi, but let's put some hard caps peers.setLen(min(peers.len, GossipSubDhi * 2)) peers.map do (x: PubSubPeer) -> PeerInfoMsg: PeerInfoMsg(peerID: x.peerId.getBytes()) proc replenishFanout(g: GossipSub, topic: string) = ## get fanout peers for a topic logScope: topic trace "about to replenish fanout" if g.fanout.peers(topic) < GossipSubDLo: trace "replenishing fanout", peers = g.fanout.peers(topic) if topic in g.gossipsub: for peer in g.gossipsub[topic]: if g.fanout.addPeer(topic, peer): if g.fanout.peers(topic) == GossipSubD: break when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.peers(topic).int64, labelValues = [topic]) trace "fanout replenished with peers", peers = g.fanout.peers(topic) proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = logScope: topic mesh = g.mesh.peers(topic) gossipsub = g.gossipsub.peers(topic) trace "rebalancing mesh" # create a mesh topic that we're subscribing to var grafts, prunes, grafting: seq[PubSubPeer] let npeers = g.mesh.peers(topic) if npeers < GossipSubDlo: trace "replenishing mesh", peers = g.mesh.peers(topic) # replenish the mesh if we're below Dlo grafts = toSeq( g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()) - g.mesh.getOrDefault(topic, initHashSet[PubSubPeer]()) ) grafts.keepIf do (x: PubSubPeer) -> bool: # avoid negative score peers x.score >= 0.0 and # don't pick explicit peers x.peerId notin g.parameters.directPeers and # and avoid peers we are backing off x.peerId notin g.backingOff # sort peers by score grafts.sort do (x, y: PubSubPeer) -> int: let peerx = x.score peery = y.score if peerx < peery: -1 elif peerx == peery: 0 else: 1 # Graft peers so we reach a count of D grafts.setLen(min(grafts.len, GossipSubD - g.mesh.peers(topic))) trace "grafting", grafts = grafts.len for peer in grafts: if g.mesh.addPeer(topic, peer): g.grafted(peer, topic) g.fanout.removePeer(topic, peer) grafting &= peer elif npeers < g.parameters.dOut: trace "replenishing mesh outbound quota", peers = g.mesh.peers(topic) # replenish the mesh if we're below Dlo grafts = toSeq( g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()) - g.mesh.getOrDefault(topic, initHashSet[PubSubPeer]()) ) grafts.keepIf do (x: PubSubPeer) -> bool: # get only outbound ones x.outbound and # avoid negative score peers x.score >= 0.0 and # don't pick explicit peers x.peerId notin g.parameters.directPeers and # and avoid peers we are backing off x.peerId notin g.backingOff # sort peers by score grafts.sort do (x, y: PubSubPeer) -> int: let peerx = x.score peery = y.score if peerx < peery: -1 elif peerx == peery: 0 else: 1 # Graft peers so we reach a count of D grafts.setLen(min(grafts.len, g.parameters.dOut - g.mesh.peers(topic))) trace "grafting outbound peers", topic, peers = grafts.len for peer in grafts: if g.mesh.addPeer(topic, peer): g.grafted(peer, topic) g.fanout.removePeer(topic, peer) grafting &= peer if g.mesh.peers(topic) > GossipSubDhi: # prune peers if we've gone over Dhi prunes = toSeq(g.mesh[topic]) # sort peers by score (inverted) prunes.sort do (x, y: PubSubPeer) -> int: let peerx = x.score peery = y.score if peerx > peery: -1 elif peerx == peery: 0 else: 1 # keep high score peers if prunes.len > g.parameters.dScore: prunes.setLen(prunes.len - g.parameters.dScore) # we must try to keep outbound peers # to keep an outbound mesh quota # so we try to first prune inbound peers # if none we add up some outbound var outbound: seq[PubSubPeer] var inbound: seq[PubSubPeer] for peer in prunes: if peer.outbound: outbound &= peer else: inbound &= peer let pruneLen = inbound.len - GossipSubD if pruneLen > 0: # Ok we got some peers to prune, # for this heartbeat let's prune those shuffle(inbound) inbound.setLen(pruneLen) else: # We could not find any inbound to prune # Yet we are on Hi, so we need to cull outbound peers let keepDOutLen = outbound.len - g.parameters.dOut if keepDOutLen > 0: shuffle(outbound) outbound.setLen(keepDOutLen) inbound &= outbound trace "pruning", prunes = inbound.len for peer in inbound: g.pruned(peer, topic) g.mesh.removePeer(topic, peer) # opportunistic grafting, by spec mesh should not be empty... if g.mesh.peers(topic) > 1: var peers = toSeq(g.mesh[topic]) peers.sort do (x, y: PubSubPeer) -> int: let peerx = x.score peery = y.score if peerx < peery: -1 elif peerx == peery: 0 else: 1 let medianIdx = peers.len div 2 let median = peers[medianIdx] if median.score < g.parameters.opportunisticGraftThreshold: trace "median score below opportunistic threshold", score = median.score var avail = toSeq( g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()) - g.mesh.getOrDefault(topic, initHashSet[PubSubPeer]()) ) avail.keepIf do (x: PubSubPeer) -> bool: # avoid negative score peers x.score >= median.score and # don't pick explicit peers x.peerId notin g.parameters.directPeers and # and avoid peers we are backing off x.peerId notin g.backingOff # by spec, grab only 2 if avail.len > 2: avail.setLen(2) for peer in avail: if g.mesh.addPeer(topic, peer): g.grafted(peer, topic) grafting &= peer trace "opportunistic grafting", peer = $peer when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_gossipsub .set(g.gossipsub.peers(topic).int64, labelValues = [topic]) libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.peers(topic).int64, labelValues = [topic]) libp2p_gossipsub_peers_per_topic_mesh .set(g.mesh.peers(topic).int64, labelValues = [topic]) trace "mesh balanced" # Send changes to peers after table updates to avoid stale state if grafts.len > 0: let graft = RPCMsg(control: some(ControlMessage(graft: @[ControlGraft(topicID: topic)]))) g.broadcast(grafts, graft) if prunes.len > 0: let prune = RPCMsg(control: some(ControlMessage( prune: @[ControlPrune( topicID: topic, peers: g.peerExchangeList(topic), backoff: g.parameters.pruneBackoff.seconds.uint64)]))) g.broadcast(prunes, prune) proc dropFanoutPeers(g: GossipSub) = # drop peers that we haven't published to in # GossipSubFanoutTTL seconds let now = Moment.now() for topic in toSeq(g.lastFanoutPubSub.keys): let val = g.lastFanoutPubSub[topic] if now > val: g.fanout.del(topic) g.lastFanoutPubSub.del(topic) trace "dropping fanout topic", topic when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.peers(topic).int64, labelValues = [topic]) proc getGossipPeers(g: GossipSub): Table[PubSubPeer, ControlMessage] {.gcsafe.} = ## gossip iHave messages to peers ## trace "getting gossip peers (iHave)" let topics = toHashSet(toSeq(g.mesh.keys)) + toHashSet(toSeq(g.fanout.keys)) for topic in topics: if topic notin g.gossipsub: trace "topic not in gossip array, skipping", topicID = topic continue let mids = g.mcache.window(topic) if not mids.len > 0: continue let ihave = ControlIHave(topicID: topic, messageIDs: toSeq(mids)) let mesh = g.mesh.getOrDefault(topic) let fanout = g.fanout.getOrDefault(topic) let gossipPeers = mesh + fanout var allPeers = toSeq(g.gossipsub.getOrDefault(topic)) allPeers.keepIf do (x: PubSubPeer) -> bool: x.peerId notin g.parameters.directPeers and x notin gossipPeers and x.score >= g.parameters.gossipThreshold var target = g.parameters.dLazy let factor = (g.parameters.gossipFactor.float * allPeers.len.float).int if factor > target: target = min(factor, allPeers.len) if target < allPeers.len: shuffle(allPeers) allPeers.setLen(target) for peer in allPeers: if peer notin result: result[peer] = ControlMessage() result[peer].ihave.add(ihave) func `/`(a, b: Duration): float64 = let fa = float64(a.nanoseconds) / 1000000000 fb = float64(b.nanoseconds) / 1000000000 fa / fb proc colocationFactor(g: GossipSub, peer: PubSubPeer): float64 = if peer.connections.len == 0: 0.0 else: let # TODO, we are just using the first connections for now address = peer.connections[0].observedAddr ipPeers = g.peersInIP.getOrDefault(address) len = ipPeers.len.float64 if len > g.parameters.ipColocationFactorThreshold: let over = len - g.parameters.ipColocationFactorThreshold over * over else: # lazy update peersInIP if address notin g.peersInIP: g.peersInIP[address] = initHashSet[PubSubPeer]() g.peersInIP[address].incl(peer) 0.0 proc updateScores(g: GossipSub) = # avoid async trace "updating scores", peers = g.peers.len let now = Moment.now() var evicting: seq[PubSubPeer] for peer, stats in g.peerStats.mpairs: trace "updating peer score", peer if not peer.connected: if now > stats.expire: evicting.add(peer) trace "evicted peer from memory", peer continue # Per topic for topic, topicParams in g.topicParams: var info = stats.topicInfos.getOrDefault(topic) # Scoring var topicScore = 0'f64 if info.inMesh: info.meshTime = now - info.graftTime if info.meshTime > topicParams.meshMessageDeliveriesActivation: info.meshMessageDeliveriesActive = true # TODO verify this `/` accuracy/correctnes var p1 = info.meshTime / topicParams.timeInMeshQuantum if p1 > topicParams.timeInMeshCap: p1 = topicParams.timeInMeshCap trace "p1", peer, p1 topicScore += p1 * topicParams.timeInMeshWeight else: info.meshMessageDeliveriesActive = false topicScore += info.firstMessageDeliveries * topicParams.firstMessageDeliveriesWeight trace "p2", peer, p2 = info.firstMessageDeliveries if info.meshMessageDeliveriesActive: if info.meshMessageDeliveries < topicParams.meshMessageDeliveriesThreshold: let deficit = topicParams.meshMessageDeliveriesThreshold - info.meshMessageDeliveries let p3 = deficit * deficit trace "p3", peer, p3 topicScore += p3 * topicParams.meshMessageDeliveriesWeight topicScore += info.meshFailurePenalty * topicParams.meshFailurePenaltyWeight trace "p3b", peer, p3b = info.meshFailurePenalty topicScore += info.invalidMessageDeliveries * info.invalidMessageDeliveries * topicParams.invalidMessageDeliveriesWeight trace "p4", p4 = info.invalidMessageDeliveries * info.invalidMessageDeliveries trace "updated peer topic's scores", peer, topic, info, topicScore peer.score += topicScore * topicParams.topicWeight # Score decay info.firstMessageDeliveries *= topicParams.firstMessageDeliveriesDecay if info.firstMessageDeliveries < g.parameters.decayToZero: info.firstMessageDeliveries = 0 info.meshMessageDeliveries *= topicParams.meshMessageDeliveriesDecay if info.meshMessageDeliveries < g.parameters.decayToZero: info.meshMessageDeliveries = 0 info.meshFailurePenalty *= topicParams.meshFailurePenaltyDecay if info.meshFailurePenalty < g.parameters.decayToZero: info.meshFailurePenalty = 0 info.invalidMessageDeliveries *= topicParams.invalidMessageDeliveriesDecay if info.invalidMessageDeliveries < g.parameters.decayToZero: info.invalidMessageDeliveries = 0 # Wrap up # commit our changes, mgetOrPut does NOT work as wanted with value types (lent?) stats.topicInfos[topic] = info peer.score += peer.appScore * g.parameters.appSpecificWeight peer.score += peer.behaviourPenalty * peer.behaviourPenalty * g.parameters.behaviourPenaltyWeight peer.score += g.colocationFactor(peer) * g.parameters.ipColocationFactorWeight # decay behaviourPenalty peer.behaviourPenalty *= g.parameters.behaviourPenaltyDecay if peer.behaviourPenalty < g.parameters.decayToZero: peer.behaviourPenalty = 0 trace "updated peer's score", peer, score = peer.score for peer in evicting: g.peerStats.del(peer) proc heartbeat(g: GossipSub) {.async.} = while g.heartbeatRunning: try: trace "running heartbeat", instance = cast[int](g) # remove expired backoffs block: let now = Moment.now() var expired = toSeq(g.backingOff.pairs()) expired.keepIf do (pair: tuple[peer: PeerID, expire: Moment]) -> bool: now >= pair.expire for (peer, _) in expired: g.backingOff.del(peer) # reset IWANT budget # reset IHAVE cap block: for peer in g.peers.values: peer.iWantBudget = IWantPeerBudget peer.iHaveBudget = IHavePeerBudget g.updateScores() for t in toSeq(g.topics.keys): # prune every negative score peer # do this before relance # in order to avoid grafted -> pruned in the same cycle let meshPeers = g.mesh.getOrDefault(t) var prunes: seq[PubSubPeer] for peer in meshPeers: if peer.score < 0.0: g.pruned(peer, t) g.mesh.removePeer(t, peer) prunes &= peer let prune = RPCMsg(control: some(ControlMessage( prune: @[ControlPrune( topicID: t, peers: g.peerExchangeList(t), backoff: g.parameters.pruneBackoff.seconds.uint64)]))) g.broadcast(prunes, prune) await g.rebalanceMesh(t) g.dropFanoutPeers() # replenish known topics to the fanout for t in toSeq(g.fanout.keys): g.replenishFanout(t) let peers = g.getGossipPeers() for peer, control in peers: g.peers.withValue(peer.peerId, pubsubPeer) do: g.send( pubsubPeer[], RPCMsg(control: some(control))) g.mcache.shift() # shift the cache except CancelledError as exc: raise exc except CatchableError as exc: warn "exception ocurred in gossipsub heartbeat", exc = exc.msg, trace = exc.getStackTrace() assert(false, "exception ocurred in gossipsub heartbeat") for trigger in g.heartbeatEvents: trace "firing heartbeat event", instance = cast[int](g) trigger.fire() await sleepAsync(GossipSubHeartbeatInterval) method unsubscribePeer*(g: GossipSub, peer: PeerID) = ## handle peer disconnects ## trace "unsubscribing gossipsub peer", peer let pubSubPeer = g.peers.getOrDefault(peer) if pubSubPeer.isNil: trace "no peer to unsubscribe", peer return # remove from peer IPs collection too if pubSubPeer.connections.len > 0: # TODO, we are just using the first connections for now g.peersInIP.withValue(pubSubPeer.connections[0].observedAddr, s) do: s[].excl(pubSubPeer) for t in toSeq(g.gossipsub.keys): g.gossipsub.removePeer(t, pubSubPeer) # also try to remove from explicit table here g.explicit.removePeer(t, pubSubPeer) when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_gossipsub .set(g.gossipsub.peers(t).int64, labelValues = [t]) for t in toSeq(g.mesh.keys): if pubSubPeer in g.mesh[t]: g.pruned(pubSubPeer, t) g.mesh.removePeer(t, pubSubPeer) when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_mesh .set(g.mesh.peers(t).int64, labelValues = [t]) for t in toSeq(g.fanout.keys): g.fanout.removePeer(t, pubSubPeer) when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.peers(t).int64, labelValues = [t]) # don't retain bad score peers if pubSubPeer.score < 0.0: g.peerStats.del(pubSubPeer) return g.peerStats[pubSubPeer].expire = Moment.now() + g.parameters.retainScore for topic, info in g.peerStats[pubSubPeer].topicInfos.mpairs: info.firstMessageDeliveries = 0 procCall FloodSub(g).unsubscribePeer(peer) method subscribeTopic*(g: GossipSub, topic: string, subscribe: bool, peer: PubSubPeer) {.gcsafe.} = # Skip floodsub - we don't want it to add the peer to `g.floodsub` procCall PubSub(g).subscribeTopic(topic, subscribe, peer) logScope: peer topic g.onNewPeer(peer) if subscribe: trace "peer subscribed to topic" # subscribe remote peer to the topic discard g.gossipsub.addPeer(topic, peer) if peer.peerId in g.parameters.directPeers: discard g.explicit.addPeer(topic, peer) else: trace "peer unsubscribed from topic" # unsubscribe remote peer from the topic g.gossipsub.removePeer(topic, peer) g.mesh.removePeer(topic, peer) g.fanout.removePeer(topic, peer) if peer.peerId in g.parameters.directPeers: g.explicit.removePeer(topic, peer) when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_mesh .set(g.mesh.peers(topic).int64, labelValues = [topic]) libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.peers(topic).int64, labelValues = [topic]) when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_gossipsub .set(g.gossipsub.peers(topic).int64, labelValues = [topic]) trace "gossip peers", peers = g.gossipsub.peers(topic), topic proc handleGraft(g: GossipSub, peer: PubSubPeer, grafts: seq[ControlGraft]): seq[ControlPrune] = for graft in grafts: let topic = graft.topicID logScope: peer topic trace "peer grafted topic" # It is an error to GRAFT on a explicit peer if peer.peerId in g.parameters.directPeers: trace "attempt to graft an explicit peer", peer=peer.id, topicID=graft.topicID # and such an attempt should be logged and rejected with a PRUNE result.add(ControlPrune( topicID: graft.topicID, peers: @[], # omitting heavy computation here as the remote did something illegal backoff: g.parameters.pruneBackoff.seconds.uint64)) continue if peer.peerId in g.backingOff: trace "attempt to graft an backingOff peer", peer=peer.id, topicID=graft.topicID, expire=g.backingOff[peer.peerId] # and such an attempt should be logged and rejected with a PRUNE result.add(ControlPrune( topicID: graft.topicID, peers: @[], # omitting heavy computation here as the remote did something illegal backoff: g.parameters.pruneBackoff.seconds.uint64)) continue if peer notin g.peerStats: g.peerStats[peer] = PeerStats() # If they send us a graft before they send us a subscribe, what should # we do? For now, we add them to mesh but don't add them to gossipsub. if topic in g.topics: if g.mesh.peers(topic) < GossipSubDHi or peer.outbound: # In the spec, there's no mention of DHi here, but implicitly, a # peer will be removed from the mesh on next rebalance, so we don't want # this peer to push someone else out if g.mesh.addPeer(topic, peer): g.grafted(peer, topic) g.fanout.removePeer(topic, peer) else: trace "peer already in mesh" else: result.add(ControlPrune( topicID: topic, peers: g.peerExchangeList(topic), backoff: g.parameters.pruneBackoff.seconds.uint64)) else: trace "peer grafting topic we're not interested in", topic # gossip 1.1, we do not send a control message prune anymore when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_mesh .set(g.mesh.peers(topic).int64, labelValues = [topic]) libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.peers(topic).int64, labelValues = [topic]) proc handlePrune(g: GossipSub, peer: PubSubPeer, prunes: seq[ControlPrune]) = for prune in prunes: trace "peer pruned topic", peer, topic = prune.topicID # add peer backoff if prune.backoff > 0: let backoff = Moment.fromNow((prune.backoff + BackoffSlackTime).int64.seconds) let current = g.backingOff.getOrDefault(peer.peerId) if backoff > current: g.backingOff[peer.peerId] = backoff g.pruned(peer, prune.topicID) g.mesh.removePeer(prune.topicID, peer) when defined(libp2p_expensive_metrics): libp2p_gossipsub_peers_per_topic_mesh .set(g.mesh.peers(prune.topicID).int64, labelValues = [prune.topicID]) proc handleIHave(g: GossipSub, peer: PubSubPeer, ihaves: seq[ControlIHave]): ControlIWant = if peer.score < g.parameters.gossipThreshold: trace "ihave: ignoring low score peer", peer = $peer, score = peer.score elif peer.iHaveBudget == 0: trace "ihave: ignoring out of budget peer", peer = $peer, score = peer.score else: dec peer.iHaveBudget for ihave in ihaves: trace "peer sent ihave", peer, topic = ihave.topicID, msgs = ihave.messageIDs if ihave.topicID in g.mesh: for m in ihave.messageIDs: if m notin g.seen: result.messageIDs.add(m) proc handleIWant(g: GossipSub, peer: PubSubPeer, iwants: seq[ControlIWant]): seq[Message] = if peer.score < g.parameters.gossipThreshold: trace "iwant: ignoring low score peer", peer = $peer, score = peer.score else: for iwant in iwants: for mid in iwant.messageIDs: trace "peer sent iwant", peer, messageID = mid let msg = g.mcache.get(mid) if msg.isSome: # avoid spam if peer.iWantBudget > 0: result.add(msg.get()) dec peer.iWantBudget else: return proc punishPeer(g: GossipSub, peer: PubSubPeer, msg: Message) = for t in msg.topicIDs: # ensure we init a new topic if unknown let _ = g.topicParams.mgetOrPut(t, TopicParams.init()) # update stats var tstats = g.peerStats[peer].topicInfos.getOrDefault(t) tstats.invalidMessageDeliveries += 1 g.peerStats[peer].topicInfos[t] = tstats method rpcHandler*(g: GossipSub, peer: PubSubPeer, rpcMsg: RPCMsg) {.async.} = await procCall PubSub(g).rpcHandler(peer, rpcMsg) for msg in rpcMsg.messages: # for every message let msgId = g.msgIdProvider(msg) if g.seen.put(msgId): trace "Dropping already-seen message", msgId, peer # make sure to update score tho before continuing for t in msg.topicIDs: # for every topic in the message let topicParams = g.topicParams.mgetOrPut(t, TopicParams.init()) # if in mesh add more delivery score var stats = g.peerStats[peer].topicInfos.getOrDefault(t) if stats.inMesh: stats.meshMessageDeliveries += 1 if stats.meshMessageDeliveries > topicParams.meshMessageDeliveriesCap: stats.meshMessageDeliveries = topicParams.meshMessageDeliveriesCap # commit back to the table g.peerStats[peer].topicInfos[t] = stats continue g.mcache.put(msgId, msg) if g.verifySignature and not msg.verify(peer.peerId): debug "Dropping message due to failed signature verification", msgId, peer g.punishPeer(peer, msg) continue if not (await g.validate(msg)): trace "Dropping message due to failed validation", msgId, peer g.punishPeer(peer, msg) continue var toSendPeers = initHashSet[PubSubPeer]() for t in msg.topicIDs: # for every topic in the message let topicParams = g.topicParams.mgetOrPut(t, TopicParams.init()) # contribute to peer score first delivery var stats = g.peerStats[peer].topicInfos.getOrDefault(t) stats.firstMessageDeliveries += 1 if stats.firstMessageDeliveries > topicParams.firstMessageDeliveriesCap: stats.firstMessageDeliveries = topicParams.firstMessageDeliveriesCap # if in mesh add more delivery score if stats.inMesh: stats.meshMessageDeliveries += 1 if stats.meshMessageDeliveries > topicParams.meshMessageDeliveriesCap: stats.meshMessageDeliveries = topicParams.meshMessageDeliveriesCap # commit back to the table g.peerStats[peer].topicInfos[t] = stats g.floodsub.withValue(t, peers): toSendPeers.incl(peers[]) g.mesh.withValue(t, peers): toSendPeers.incl(peers[]) await handleData(g, t, msg.data) # In theory, if topics are the same in all messages, we could batch - we'd # also have to be careful to only include validated messages g.broadcast(toSeq(toSendPeers), RPCMsg(messages: @[msg])) trace "forwared message to peers", peers = toSendPeers.len, msgId, peer if rpcMsg.control.isSome: let control = rpcMsg.control.get() g.handlePrune(peer, control.prune) var respControl: ControlMessage respControl.iwant.add(g.handleIHave(peer, control.ihave)) respControl.prune.add(g.handleGraft(peer, control.graft)) let messages = g.handleIWant(peer, control.iwant) if respControl.graft.len > 0 or respControl.prune.len > 0 or respControl.ihave.len > 0 or messages.len > 0: debug "sending control message", msg = shortLog(respControl), peer g.send( peer, RPCMsg(control: some(respControl), messages: messages)) method subscribe*(g: GossipSub, topic: string, handler: TopicHandler) {.async.} = await procCall PubSub(g).subscribe(topic, handler) # if we have a fanout on this topic break it if topic in g.fanout: g.fanout.del(topic) await g.rebalanceMesh(topic) method unsubscribe*(g: GossipSub, topics: seq[TopicPair]) {.async.} = await procCall PubSub(g).unsubscribe(topics) for (topic, handler) in topics: # delete from mesh only if no handlers are left if topic notin g.topics: if topic in g.mesh: let peers = g.mesh[topic] g.mesh.del(topic) for peer in peers: g.pruned(peer, topic) let prune = RPCMsg(control: some(ControlMessage( prune: @[ControlPrune( topicID: topic, peers: g.peerExchangeList(topic), backoff: g.parameters.pruneBackoff.seconds.uint64)]))) g.broadcast(toSeq(peers), prune) method unsubscribeAll*(g: GossipSub, topic: string) {.async.} = await procCall PubSub(g).unsubscribeAll(topic) if topic in g.mesh: let peers = g.mesh.getOrDefault(topic) g.mesh.del(topic) for peer in peers: g.pruned(peer, topic) let prune = RPCMsg(control: some(ControlMessage( prune: @[ControlPrune( topicID: topic, peers: g.peerExchangeList(topic), backoff: g.parameters.pruneBackoff.seconds.uint64)]))) g.broadcast(toSeq(peers), prune) method publish*(g: GossipSub, topic: string, data: seq[byte]): Future[int] {.async.} = # base returns always 0 discard await procCall PubSub(g).publish(topic, data) logScope: topic trace "Publishing message on topic", data = data.shortLog if topic.len <= 0: # data could be 0/empty debug "Empty topic, skipping publish" return 0 var peers: HashSet[PubSubPeer] if g.parameters.floodPublish: # With flood publishing enabled, the mesh is used when propagating messages from other peers, # but a peer's own messages will always be published to all known peers in the topic. for peer in g.gossipsub.getOrDefault(topic): if peer.score >= g.parameters.publishThreshold: trace "publish: including flood/high score peer", peer = $peer peers.incl(peer) # add always direct peers peers.incl(g.explicit.getOrDefault(topic)) if topic in g.topics: # if we're subscribed use the mesh peers.incl(g.mesh.getOrDefault(topic)) else: # not subscribed, send to fanout peers # try optimistically peers.incl(g.fanout.getOrDefault(topic)) if peers.len == 0: # ok we had nothing.. let's try replenish inline g.replenishFanout(topic) peers.incl(g.fanout.getOrDefault(topic)) # even if we couldn't publish, # we still attempted to publish # on the topic, so it makes sense # to update the last topic publish # time g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) if peers.len == 0: debug "No peers for topic, skipping publish" return 0 inc g.msgSeqno let msg = Message.init(g.peerInfo, data, topic, g.msgSeqno, g.sign) msgId = g.msgIdProvider(msg) logScope: msgId trace "Created new message", msg = shortLog(msg), peers = peers.len if g.seen.put(msgId): # custom msgid providers might cause this trace "Dropping already-seen message" return 0 g.mcache.put(msgId, msg) g.broadcast(toSeq(peers), RPCMsg(messages: @[msg])) when defined(libp2p_expensive_metrics): if peers.len > 0: libp2p_pubsub_messages_published.inc(labelValues = [topic]) trace "Published message to peers" return peers.len proc maintainDirectPeers(g: GossipSub) {.async.} = while g.heartbeatRunning: for id in g.parameters.directPeers: let peer = g.peers.getOrDefault(id) if peer == nil: # this creates a new peer and assigns the current switch to it # as a result the next time we try to Send we will as well try to open a connection # see pubsubpeer.nim send and such discard g.getOrCreatePeer(id, g.codec) await sleepAsync(1.minutes) method start*(g: GossipSub) {.async.} = trace "gossipsub start" if not g.heartbeatFut.isNil: warn "Starting gossipsub twice" return g.heartbeatRunning = true g.heartbeatFut = g.heartbeat() g.directPeersLoop = g.maintainDirectPeers() method stop*(g: GossipSub) {.async.} = trace "gossipsub stop" if g.heartbeatFut.isNil: warn "Stopping gossipsub without starting it" return # stop heartbeat interval g.heartbeatRunning = false g.directPeersLoop.cancel() if not g.heartbeatFut.finished: trace "awaiting last heartbeat" await g.heartbeatFut trace "heartbeat stopped" g.heartbeatFut = nil method initPubSub*(g: GossipSub) = procCall FloodSub(g).initPubSub() if not g.parameters.explicit: g.parameters = GossipSubParams.init() g.parameters.validateParameters().tryGet() randomize() g.mcache = MCache.init(GossipSubHistoryGossip, GossipSubHistoryLength) g.mesh = initTable[string, HashSet[PubSubPeer]]() # meshes - topic to peer g.fanout = initTable[string, HashSet[PubSubPeer]]() # fanout - topic to peer g.gossipsub = initTable[string, HashSet[PubSubPeer]]()# topic to peer map of all gossipsub peers g.lastFanoutPubSub = initTable[string, Moment]() # last publish time for fanout topics g.gossip = initTable[string, seq[ControlIHave]]() # pending gossip g.control = initTable[string, ControlMessage]() # pending control messages