nim-libp2p/libp2p/protocols/pubsub/gossipsub.nim

## Nim-LibP2P
## Copyright (c) 2019 Status Research & Development GmbH
## Licensed under either of
##  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
##  * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.

import std/[tables, sets, options, sequtils, random, algorithm]
import chronos, chronicles, metrics
import ./pubsub,
       ./floodsub,
       ./pubsubpeer,
       ./peertable,
       ./mcache,
       ./timedcache,
       ./rpc/[messages, message],
       ../protocol,
       ../../stream/connection,
       ../../peerinfo,
       ../../peerid,
       ../../utility
import stew/results
export results

logScope:
  topics = "gossipsub"

const
  GossipSubCodec* = "/meshsub/1.1.0"
  GossipSubCodec_10* = "/meshsub/1.0.0"

# overlay parameters
const
  GossipSubD* = 6
  GossipSubDlo* = 4
  GossipSubDhi* = 12

# gossip parameters
const
  GossipSubHistoryLength* = 5
  GossipSubHistoryGossip* = 3
  GossipBackoffPeriod* = 1.minutes

# heartbeat interval
const
  GossipSubHeartbeatInitialDelay* = 100.millis
  GossipSubHeartbeatInterval* = 1.seconds

# fanout ttl
const
  GossipSubFanoutTTL* = 1.minutes

const
  BackoffSlackTime = 2 # seconds
  IWantPeerBudget = 25 # 25 messages per second ( reset every heartbeat )
  IHavePeerBudget = 10

type
  TopicInfo* = object
    # gossip 1.1 related
    graftTime: Moment
    meshTime: Duration
    inMesh: bool
    meshMessageDeliveriesActive: bool
    firstMessageDeliveries: float64
    meshMessageDeliveries: float64
    meshFailurePenalty: float64
    invalidMessageDeliveries: float64

  TopicParams* = object
    topicWeight*: float64

    # p1
    timeInMeshWeight*: float64
    timeInMeshQuantum*: Duration
    timeInMeshCap*: float64

    # p2
    firstMessageDeliveriesWeight*: float64
    firstMessageDeliveriesDecay*: float64
    firstMessageDeliveriesCap*: float64

    # p3
    meshMessageDeliveriesWeight*: float64
    meshMessageDeliveriesDecay*: float64
    meshMessageDeliveriesThreshold*: float64
    meshMessageDeliveriesCap*: float64
    meshMessageDeliveriesActivation*: Duration
    meshMessageDeliveriesWindow*: Duration

    # p3b
    meshFailurePenaltyWeight*: float64
    meshFailurePenaltyDecay*: float64

    # p4
    invalidMessageDeliveriesWeight*: float64
    invalidMessageDeliveriesDecay*: float64

  PeerStats* = object
    topicInfos*: Table[string, TopicInfo]
    expire*: Moment # updated on disconnect, to retain scores until expire

  GossipSubParams* = object
    explicit: bool
    pruneBackoff*: Duration
    floodPublish*: bool
    gossipFactor*: float64
    dScore*: int
    dOut*: int
    dLazy*: int

    gossipThreshold*: float64
    publishThreshold*: float64
    graylistThreshold*: float64
    acceptPXThreshold*: float64
    opportunisticGraftThreshold*: float64
    decayInterval*: Duration
    decayToZero*: float64
    retainScore*: Duration

    appSpecificWeight*: float64
    ipColocationFactorWeight*: float64
    ipColocationFactorThreshold*: float64
    behaviourPenaltyWeight*: float64
    behaviourPenaltyDecay*: float64

    directPeers*: seq[PeerId]

  GossipSub* = ref object of FloodSub
    mesh*: PeerTable                           # peers that we send messages to when we are subscribed to the topic
    fanout*: PeerTable                         # peers that we send messages to when we're not subscribed to the topic
    gossipsub*: PeerTable                      # peers that are subscribed to a topic
    explicit*: PeerTable                       # directpeers that we keep alive explicitly
    backingOff*: Table[PeerID, Moment]         # explicit (always connected/forward) peers
    lastFanoutPubSub*: Table[string, Moment]   # last publish time for fanout topics
    gossip*: Table[string, seq[ControlIHave]]  # pending gossip
    control*: Table[string, ControlMessage]    # pending control messages
    mcache*: MCache                            # messages cache
    heartbeatFut: Future[void]                 # cancellation future for heartbeat interval
    heartbeatRunning: bool

    peerStats: Table[PubSubPeer, PeerStats]
    parameters*: GossipSubParams
    topicParams*: Table[string, TopicParams]
    directPeersLoop: Future[void]
    peersInIP: Table[MultiAddress, HashSet[PubSubPeer]]

    heartbeatEvents*: seq[AsyncEvent]

    when not defined(release):
      prunedPeers: HashSet[PubSubPeer]

when defined(libp2p_expensive_metrics):
  declareGauge(libp2p_gossipsub_peers_per_topic_mesh,
    "gossipsub peers per topic in mesh",
    labels = ["topic"])

  declareGauge(libp2p_gossipsub_peers_per_topic_fanout,
    "gossipsub peers per topic in fanout",
    labels = ["topic"])

  declareGauge(libp2p_gossipsub_peers_per_topic_gossipsub,
    "gossipsub peers per topic in gossipsub",
    labels = ["topic"])

proc init*(_: type[GossipSubParams]): GossipSubParams =
  GossipSubParams(
      explicit: true,
      pruneBackoff: 1.minutes,
      floodPublish: true,
      gossipFactor: 0.25,
      dScore: 4,
      dOut: GossipSubDlo - 1,
      dLazy: GossipSubD,
      gossipThreshold: -10,
      publishThreshold: -100,
      graylistThreshold: -10000,
      opportunisticGraftThreshold: 1,
      decayInterval: 1.seconds,
      decayToZero: 0.01,
      retainScore: 10.seconds,
      appSpecificWeight: 1.0,
      ipColocationFactorWeight: 0.0,
      ipColocationFactorThreshold: 1.0,
      behaviourPenaltyWeight: -1.0,
      behaviourPenaltyDecay: 0.999,
    )

proc validateParameters*(parameters: GossipSubParams): Result[void, cstring] =
  if  (parameters.dOut >= GossipSubDlo) or
      (parameters.dOut > (GossipSubD div 2)):
    err("gossipsub: dOut parameter error, Number of outbound connections to keep in the mesh. Must be less than D_lo and at most D/2")
  elif parameters.gossipThreshold >= 0:
    err("gossipsub: gossipThreshold parameter error, Must be < 0")
  elif parameters.publishThreshold >= parameters.gossipThreshold:
    err("gossipsub: publishThreshold parameter error, Must be < gossipThreshold")
  elif parameters.graylistThreshold >= parameters.publishThreshold:
    err("gossipsub: graylistThreshold parameter error, Must be < publishThreshold")
  elif parameters.acceptPXThreshold < 0:
    err("gossipsub: acceptPXThreshold parameter error, Must be >= 0")
  elif parameters.opportunisticGraftThreshold < 0:
    err("gossipsub: opportunisticGraftThreshold parameter error, Must be >= 0")
  elif parameters.decayToZero > 0.5 or parameters.decayToZero <= 0.0:
    err("gossipsub: decayToZero parameter error, Should be close to 0.0")
  elif parameters.appSpecificWeight < 0:
    err("gossipsub: appSpecificWeight parameter error, Must be positive")
  elif parameters.ipColocationFactorWeight > 0:
    err("gossipsub: ipColocationFactorWeight parameter error, Must be negative or 0")
  elif parameters.ipColocationFactorThreshold < 1.0:
    err("gossipsub: ipColocationFactorThreshold parameter error, Must be at least 1")
  elif parameters.behaviourPenaltyWeight >= 0:
    err("gossipsub: behaviourPenaltyWeight parameter error, Must be negative")
  elif parameters.behaviourPenaltyDecay < 0 or parameters.behaviourPenaltyDecay >= 1:
    err("gossipsub: behaviourPenaltyDecay parameter error, Must be between 0 and 1")
  else:
    ok()

proc init*(_: type[TopicParams]): TopicParams =
  TopicParams(
    topicWeight: 1.0,
    timeInMeshWeight: 0.01,
    timeInMeshQuantum: 1.seconds,
    timeInMeshCap: 10.0,
    firstMessageDeliveriesWeight: 1.0,
    firstMessageDeliveriesDecay: 0.5,
    firstMessageDeliveriesCap: 10.0,
    meshMessageDeliveriesWeight: -1.0,
    meshMessageDeliveriesDecay: 0.5,
    meshMessageDeliveriesCap: 10,
    meshMessageDeliveriesThreshold: 1,
    meshMessageDeliveriesWindow: 5.milliseconds,
    meshMessageDeliveriesActivation: 10.seconds,
    meshFailurePenaltyWeight: -1.0,
    meshFailurePenaltyDecay: 0.5,
    invalidMessageDeliveriesWeight: -1.0,
    invalidMessageDeliveriesDecay: 0.5
  )

proc validateParameters*(parameters: TopicParams): Result[void, cstring] =
  if parameters.timeInMeshWeight <= 0.0 or parameters.timeInMeshWeight > 1.0:
    err("gossipsub: timeInMeshWeight parameter error, Must be a small positive value")
  elif parameters.timeInMeshCap <= 0.0:
    err("gossipsub: timeInMeshCap parameter error, Should be a positive value")
  elif parameters.firstMessageDeliveriesWeight <= 0.0:
    err("gossipsub: firstMessageDeliveriesWeight parameter error, Should be a positive value")
  elif parameters.meshMessageDeliveriesWeight >= 0.0:
    err("gossipsub: meshMessageDeliveriesWeight parameter error, Should be a negative value")
  elif parameters.meshMessageDeliveriesThreshold <= 0.0:
    err("gossipsub: meshMessageDeliveriesThreshold parameter error, Should be a positive value")
  elif parameters.meshMessageDeliveriesCap < parameters.meshMessageDeliveriesThreshold:
    err("gossipsub: meshMessageDeliveriesCap parameter error, Should be >= meshMessageDeliveriesThreshold")
  elif parameters.meshMessageDeliveriesWindow > 100.milliseconds:
    err("gossipsub: meshMessageDeliveriesWindow parameter error, Should be small, 1-5ms")
  elif parameters.meshFailurePenaltyWeight >= 0.0:
    err("gossipsub: meshFailurePenaltyWeight parameter error, Should be a negative value")
  elif parameters.invalidMessageDeliveriesWeight >= 0.0:
    err("gossipsub: invalidMessageDeliveriesWeight parameter error, Should be a negative value")
  else:
    ok()

method init*(g: GossipSub) =
  proc handler(conn: Connection, proto: string) {.async.} =
    ## main protocol handler that gets triggered on every
    ## connection for a protocol string
    ## e.g. ``/floodsub/1.0.0``, etc...
    ##
    try:
      await g.handleConn(conn, proto)
    except CancelledError:
      # This is top-level procedure which will work as separate task, so it
      # do not need to propogate CancelledError.
      trace "Unexpected cancellation in gossipsub handler", conn
    except CatchableError as exc:
      trace "GossipSub handler leaks an error", exc = exc.msg, conn

  g.handler = handler
  g.codecs &= GossipSubCodec
  g.codecs &= GossipSubCodec_10

method onNewPeer(g: GossipSub, peer: PubSubPeer) =
  if peer notin g.peerStats:
    # new peer
    g.peerStats[peer] = PeerStats()
    peer.iWantBudget = IWantPeerBudget
    peer.iHaveBudget = IHavePeerBudget
    return
  else:
    # we knew this peer
    discard

proc grafted(g: GossipSub, p: PubSubPeer, topic: string) =
  g.peerStats.withValue(p, stats) do:
    var info = stats.topicInfos.getOrDefault(topic)
    info.graftTime = Moment.now()
    info.meshTime = 0.seconds
    info.inMesh = true
    info.meshMessageDeliveriesActive = false

    # mgetOrPut does not work, so we gotta do this without referencing
    stats.topicInfos[topic] = info
    assert(g.peerStats[p].topicInfos[topic].inMesh == true)

    trace "grafted", p
  do:
    doAssert(false, "grafted: peerStats key not found for " & $p)

proc pruned(g: GossipSub, p: PubSubPeer, topic: string) =
  g.peerStats.withValue(p, stats) do:
    when not defined(release):
      g.prunedPeers.incl(p)

    var info = stats.topicInfos[topic]
    let topicParams = g.topicParams.mgetOrPut(topic, TopicParams.init())

    # penalize a peer that delivered no message
    let threshold = topicParams.meshMessageDeliveriesThreshold
    if info.inMesh and info.meshMessageDeliveriesActive and info.meshMessageDeliveries < threshold:
      let deficit = threshold - info.meshMessageDeliveries
      info.meshFailurePenalty += deficit * deficit

    info.inMesh = false

    # mgetOrPut does not work, so we gotta do this without referencing
    stats.topicInfos[topic] = info

    trace "pruned", p
  do:
    when not defined(release):
      if p in g.prunedPeers:
        doAssert(false, "pruned: Dupe prune " & $p)
      else:
        doAssert(false, "pruned: TopicInfo key not found for " & $p)
    else:
      doAssert(false, "pruned: TopicInfo key not found for " & $p)

proc peerExchangeList(g: GossipSub, topic: string): seq[PeerInfoMsg] =
  var peers = g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()).toSeq()
  peers.keepIf do (x: PubSubPeer) -> bool:
      x.score >= 0.0
  # by spec, larger then Dhi, but let's put some hard caps
  peers.setLen(min(peers.len, GossipSubDhi * 2))
  peers.map do (x: PubSubPeer) -> PeerInfoMsg:
    PeerInfoMsg(peerID: x.peerId.getBytes())

proc replenishFanout(g: GossipSub, topic: string) =
  ## get fanout peers for a topic
  logScope: topic
  trace "about to replenish fanout"

  if g.fanout.peers(topic) < GossipSubDLo:
    trace "replenishing fanout", peers = g.fanout.peers(topic)
    if topic in g.gossipsub:
      for peer in g.gossipsub[topic]:
        if g.fanout.addPeer(topic, peer):
          if g.fanout.peers(topic) == GossipSubD:
            break

  when defined(libp2p_expensive_metrics):
    libp2p_gossipsub_peers_per_topic_fanout
      .set(g.fanout.peers(topic).int64, labelValues = [topic])

  trace "fanout replenished with peers", peers = g.fanout.peers(topic)

proc rebalanceMesh(g: GossipSub, topic: string) {.async.} =
  logScope:
    topic
    mesh = g.mesh.peers(topic)
    gossipsub = g.gossipsub.peers(topic)

  trace "rebalancing mesh"

  # create a mesh topic that we're subscribing to

  var
    grafts, prunes, grafting: seq[PubSubPeer]

  let npeers = g.mesh.peers(topic)
  if npeers  < GossipSubDlo:
    trace "replenishing mesh", peers = g.mesh.peers(topic)
    # replenish the mesh if we're below Dlo
    grafts = toSeq(
      g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()) -
      g.mesh.getOrDefault(topic, initHashSet[PubSubPeer]())
    )

    grafts.keepIf do (x: PubSubPeer) -> bool:
      # avoid negative score peers
      x.score >= 0.0 and
      # don't pick explicit peers
      x.peerId notin g.parameters.directPeers and
      # and avoid peers we are backing off
      x.peerId notin g.backingOff

    # sort peers by score
    grafts.sort do (x, y: PubSubPeer) -> int:
      let
        peerx = x.score
        peery = y.score
      if peerx < peery: -1
      elif peerx == peery: 0
      else: 1

    # Graft peers so we reach a count of D
    grafts.setLen(min(grafts.len, GossipSubD - g.mesh.peers(topic)))

    trace "grafting", grafts = grafts.len
    for peer in grafts:
      if g.mesh.addPeer(topic, peer):
        g.grafted(peer, topic)
        g.fanout.removePeer(topic, peer)
        grafting &= peer

  elif npeers < g.parameters.dOut:
    trace "replenishing mesh outbound quota", peers = g.mesh.peers(topic)
    # replenish the mesh if we're below Dlo
    grafts = toSeq(
      g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()) -
      g.mesh.getOrDefault(topic, initHashSet[PubSubPeer]())
    )

    grafts.keepIf do (x: PubSubPeer) -> bool:
      # get only outbound ones
      x.outbound and
      # avoid negative score peers
      x.score >= 0.0 and
      # don't pick explicit peers
      x.peerId notin g.parameters.directPeers and
      # and avoid peers we are backing off
      x.peerId notin g.backingOff

    # sort peers by score
    grafts.sort do (x, y: PubSubPeer) -> int:
      let
        peerx = x.score
        peery = y.score
      if peerx < peery: -1
      elif peerx == peery: 0
      else: 1

    # Graft peers so we reach a count of D
    grafts.setLen(min(grafts.len, g.parameters.dOut - g.mesh.peers(topic)))

    trace "grafting outbound peers", topic, peers = grafts.len

    for peer in grafts:
      if g.mesh.addPeer(topic, peer):
        g.grafted(peer, topic)
        g.fanout.removePeer(topic, peer)
        grafting &= peer


  if g.mesh.peers(topic) > GossipSubDhi:
    # prune peers if we've gone over Dhi
    prunes = toSeq(g.mesh[topic])

    # sort peers by score (inverted)
    prunes.sort do (x, y: PubSubPeer) -> int:
      let
        peerx = x.score
        peery = y.score
      if peerx > peery: -1
      elif peerx == peery: 0
      else: 1

    # keep high score peers
    if prunes.len > g.parameters.dScore:
      prunes.setLen(prunes.len - g.parameters.dScore)
      # we must try to keep outbound peers
      # to keep an outbound mesh quota
      # so we try to first prune inbound peers
      # if none we add up some outbound
      var outbound: seq[PubSubPeer]
      var inbound: seq[PubSubPeer]
      for peer in prunes:
        if peer.outbound:
          outbound &= peer
        else:
          inbound &= peer

      let pruneLen = inbound.len - GossipSubD
      if pruneLen > 0:
        # Ok we got some peers to prune,
        # for this heartbeat let's prune those
        shuffle(inbound)
        inbound.setLen(pruneLen)
      else:
        # We could not find any inbound to prune
        # Yet we are on Hi, so we need to cull outbound peers
        let keepDOutLen = outbound.len - g.parameters.dOut
        if keepDOutLen > 0:
          shuffle(outbound)
          outbound.setLen(keepDOutLen)
        inbound &= outbound

      trace "pruning", prunes = inbound.len
      for peer in inbound:
        g.pruned(peer, topic)
        g.mesh.removePeer(topic, peer)

  # opportunistic grafting, by spec mesh should not be empty...
  if g.mesh.peers(topic) > 1:
    var peers = toSeq(g.mesh[topic])
    peers.sort do (x, y: PubSubPeer) -> int:
      let
        peerx = x.score
        peery = y.score
      if peerx < peery: -1
      elif peerx == peery: 0
      else: 1
    let medianIdx = peers.len div 2
    let median = peers[medianIdx]
    if median.score < g.parameters.opportunisticGraftThreshold:
      trace "median score below opportunistic threshold", score = median.score
      var avail = toSeq(
        g.gossipsub.getOrDefault(topic, initHashSet[PubSubPeer]()) -
        g.mesh.getOrDefault(topic, initHashSet[PubSubPeer]())
      )

      avail.keepIf do (x: PubSubPeer) -> bool:
        # avoid negative score peers
        x.score >= median.score and
        # don't pick explicit peers
        x.peerId notin g.parameters.directPeers and
        # and avoid peers we are backing off
        x.peerId notin g.backingOff

      # by spec, grab only 2
      if avail.len > 2:
        avail.setLen(2)

      for peer in avail:
        if g.mesh.addPeer(topic, peer):
          g.grafted(peer, topic)
          grafting &= peer
          trace "opportunistic grafting", peer = $peer

  when defined(libp2p_expensive_metrics):
    libp2p_gossipsub_peers_per_topic_gossipsub
      .set(g.gossipsub.peers(topic).int64, labelValues = [topic])

    libp2p_gossipsub_peers_per_topic_fanout
      .set(g.fanout.peers(topic).int64, labelValues = [topic])

    libp2p_gossipsub_peers_per_topic_mesh
      .set(g.mesh.peers(topic).int64, labelValues = [topic])

  trace "mesh balanced"

  # Send changes to peers after table updates to avoid stale state
  if grafts.len > 0:
    let graft = RPCMsg(control: some(ControlMessage(graft: @[ControlGraft(topicID: topic)])))
    g.broadcast(grafts, graft)
  if prunes.len > 0:
    let prune = RPCMsg(control: some(ControlMessage(
      prune: @[ControlPrune(
        topicID: topic,
        peers: g.peerExchangeList(topic),
        backoff: g.parameters.pruneBackoff.seconds.uint64)])))
    g.broadcast(prunes, prune)

proc dropFanoutPeers(g: GossipSub) =
  # drop peers that we haven't published to in
  # GossipSubFanoutTTL seconds
  let now = Moment.now()
  for topic in toSeq(g.lastFanoutPubSub.keys):
    let val = g.lastFanoutPubSub[topic]
    if now > val:
      g.fanout.del(topic)
      g.lastFanoutPubSub.del(topic)
      trace "dropping fanout topic", topic

    when defined(libp2p_expensive_metrics):
      libp2p_gossipsub_peers_per_topic_fanout
        .set(g.fanout.peers(topic).int64, labelValues = [topic])

proc getGossipPeers(g: GossipSub): Table[PubSubPeer, ControlMessage] {.gcsafe.} =
  ## gossip iHave messages to peers
  ##

  trace "getting gossip peers (iHave)"
  let topics = toHashSet(toSeq(g.mesh.keys)) + toHashSet(toSeq(g.fanout.keys))
  for topic in topics:
    if topic notin g.gossipsub:
      trace "topic not in gossip array, skipping", topicID = topic
      continue

    let mids = g.mcache.window(topic)
    if not mids.len > 0:
      continue

    let ihave = ControlIHave(topicID: topic, messageIDs: toSeq(mids))

    let mesh = g.mesh.getOrDefault(topic)
    let fanout = g.fanout.getOrDefault(topic)
    let gossipPeers = mesh + fanout
    var allPeers = toSeq(g.gossipsub.getOrDefault(topic))

    allPeers.keepIf do (x: PubSubPeer) -> bool:
      x.peerId notin g.parameters.directPeers and
      x notin gossipPeers and
      x.score >= g.parameters.gossipThreshold

    var target = g.parameters.dLazy
    let factor = (g.parameters.gossipFactor.float * allPeers.len.float).int
    if factor > target:
      target = min(factor, allPeers.len)

    if target < allPeers.len:
      shuffle(allPeers)
      allPeers.setLen(target)

    for peer in allPeers:
      if peer notin result:
        result[peer] = ControlMessage()
      result[peer].ihave.add(ihave)

func `/`(a, b: Duration): float64 =
  let
    fa = float64(a.nanoseconds) / 1000000000
    fb = float64(b.nanoseconds) / 1000000000
  fa / fb

proc colocationFactor(g: GossipSub, peer: PubSubPeer): float64 =
  if peer.connections.len == 0:
    0.0
  else:
    let
      # TODO, we are just using the first connections for now
      address = peer.connections[0].observedAddr
      ipPeers = g.peersInIP.getOrDefault(address)
      len = ipPeers.len.float64
    if len > g.parameters.ipColocationFactorThreshold:
      let over = len - g.parameters.ipColocationFactorThreshold
      over * over
    else:
      # lazy update peersInIP
      if address notin g.peersInIP:
        g.peersInIP[address] = initHashSet[PubSubPeer]()
      g.peersInIP[address].incl(peer)
      0.0

proc updateScores(g: GossipSub) = # avoid async
  trace "updating scores", peers = g.peers.len

  let now = Moment.now()
  var evicting: seq[PubSubPeer]

  for peer, stats in g.peerStats.mpairs:
    trace "updating peer score", peer

    if not peer.connected:
      if now > stats.expire:
        evicting.add(peer)
        trace "evicted peer from memory", peer
        continue

    # Per topic
    for topic, topicParams in g.topicParams:
      var info = stats.topicInfos.getOrDefault(topic)

      # Scoring
      var topicScore = 0'f64

      if info.inMesh:
        info.meshTime = now - info.graftTime
        if info.meshTime > topicParams.meshMessageDeliveriesActivation:
          info.meshMessageDeliveriesActive = true

        # TODO verify this `/` accuracy/correctnes
        var p1 = info.meshTime / topicParams.timeInMeshQuantum
        if p1 > topicParams.timeInMeshCap:
          p1 = topicParams.timeInMeshCap
        trace "p1", peer, p1
        topicScore += p1 * topicParams.timeInMeshWeight
      else:
        info.meshMessageDeliveriesActive = false

      topicScore += info.firstMessageDeliveries * topicParams.firstMessageDeliveriesWeight
      trace "p2", peer, p2 = info.firstMessageDeliveries

      if info.meshMessageDeliveriesActive:
        if info.meshMessageDeliveries < topicParams.meshMessageDeliveriesThreshold:
          let deficit = topicParams.meshMessageDeliveriesThreshold - info.meshMessageDeliveries
          let p3 = deficit * deficit
          trace "p3", peer, p3
          topicScore += p3 * topicParams.meshMessageDeliveriesWeight

      topicScore += info.meshFailurePenalty * topicParams.meshFailurePenaltyWeight
      trace "p3b", peer, p3b = info.meshFailurePenalty

      topicScore += info.invalidMessageDeliveries * info.invalidMessageDeliveries * topicParams.invalidMessageDeliveriesWeight
      trace "p4", p4 = info.invalidMessageDeliveries * info.invalidMessageDeliveries

      trace "updated peer topic's scores", peer, topic, info, topicScore

      peer.score += topicScore * topicParams.topicWeight

      # Score decay
      info.firstMessageDeliveries *= topicParams.firstMessageDeliveriesDecay
      if info.firstMessageDeliveries < g.parameters.decayToZero:
        info.firstMessageDeliveries = 0

      info.meshMessageDeliveries *= topicParams.meshMessageDeliveriesDecay
      if info.meshMessageDeliveries < g.parameters.decayToZero:
        info.meshMessageDeliveries = 0

      info.meshFailurePenalty *= topicParams.meshFailurePenaltyDecay
      if info.meshFailurePenalty < g.parameters.decayToZero:
        info.meshFailurePenalty = 0

      info.invalidMessageDeliveries *= topicParams.invalidMessageDeliveriesDecay
      if info.invalidMessageDeliveries < g.parameters.decayToZero:
        info.invalidMessageDeliveries = 0

      # Wrap up
      # commit our changes, mgetOrPut does NOT work as wanted with value types (lent?)
      stats.topicInfos[topic] = info

    peer.score += peer.appScore * g.parameters.appSpecificWeight

    peer.score += peer.behaviourPenalty * peer.behaviourPenalty * g.parameters.behaviourPenaltyWeight

    peer.score += g.colocationFactor(peer) * g.parameters.ipColocationFactorWeight

    # decay behaviourPenalty
    peer.behaviourPenalty *= g.parameters.behaviourPenaltyDecay
    if peer.behaviourPenalty < g.parameters.decayToZero:
      peer.behaviourPenalty = 0

    trace "updated peer's score", peer, score = peer.score

  for peer in evicting:
    g.peerStats.del(peer)

proc heartbeat(g: GossipSub) {.async.} =
  while g.heartbeatRunning:
    try:
      trace "running heartbeat", instance = cast[int](g)

      # remove expired backoffs
      block:
        let now = Moment.now()
        var expired = toSeq(g.backingOff.pairs())
        expired.keepIf do (pair: tuple[peer: PeerID, expire: Moment]) -> bool:
          now >= pair.expire
        for (peer, _) in expired:
          g.backingOff.del(peer)

      # reset IWANT budget
      # reset IHAVE cap
      block:
        for peer in g.peers.values:
          peer.iWantBudget = IWantPeerBudget
          peer.iHaveBudget = IHavePeerBudget

      g.updateScores()

      for t in toSeq(g.topics.keys):
        # prune every negative score peer
        # do this before relance
        # in order to avoid grafted -> pruned in the same cycle
        let meshPeers = g.mesh.getOrDefault(t)
        var prunes: seq[PubSubPeer]
        for peer in meshPeers:
          if peer.score < 0.0:
            g.pruned(peer, t)
            g.mesh.removePeer(t, peer)
            prunes &= peer
        let prune = RPCMsg(control: some(ControlMessage(
          prune: @[ControlPrune(
            topicID: t,
            peers: g.peerExchangeList(t),
            backoff: g.parameters.pruneBackoff.seconds.uint64)])))
        g.broadcast(prunes, prune)

        await g.rebalanceMesh(t)

      g.dropFanoutPeers()

      # replenish known topics to the fanout
      for t in toSeq(g.fanout.keys):
        g.replenishFanout(t)

      let peers = g.getGossipPeers()
      for peer, control in peers:
        g.peers.withValue(peer.peerId, pubsubPeer) do:
          g.send(
            pubsubPeer[],
            RPCMsg(control: some(control)))

      g.mcache.shift() # shift the cache
    except CancelledError as exc:
      raise exc
    except CatchableError as exc:
      warn "exception ocurred in gossipsub heartbeat", exc = exc.msg, trace = exc.getStackTrace()
      assert(false, "exception ocurred in gossipsub heartbeat")

    for trigger in g.heartbeatEvents:
      trace "firing heartbeat event", instance = cast[int](g)
      trigger.fire()

    await sleepAsync(GossipSubHeartbeatInterval)

method unsubscribePeer*(g: GossipSub, peer: PeerID) =
  ## handle peer disconnects
  ##

  trace "unsubscribing gossipsub peer", peer
  let pubSubPeer = g.peers.getOrDefault(peer)
  if pubSubPeer.isNil:
    trace "no peer to unsubscribe", peer
    return

  # remove from peer IPs collection too
  if pubSubPeer.connections.len > 0:
    # TODO, we are just using the first connections for now
    g.peersInIP.withValue(pubSubPeer.connections[0].observedAddr, s) do:
      s[].excl(pubSubPeer)

  for t in toSeq(g.gossipsub.keys):
    g.gossipsub.removePeer(t, pubSubPeer)
    # also try to remove from explicit table here
    g.explicit.removePeer(t, pubSubPeer)

    when defined(libp2p_expensive_metrics):
      libp2p_gossipsub_peers_per_topic_gossipsub
        .set(g.gossipsub.peers(t).int64, labelValues = [t])

  for t in toSeq(g.mesh.keys):
    if pubSubPeer in g.mesh[t]:
        g.pruned(pubSubPeer, t)
    g.mesh.removePeer(t, pubSubPeer)

    when defined(libp2p_expensive_metrics):
      libp2p_gossipsub_peers_per_topic_mesh
        .set(g.mesh.peers(t).int64, labelValues = [t])

  for t in toSeq(g.fanout.keys):
    g.fanout.removePeer(t, pubSubPeer)

    when defined(libp2p_expensive_metrics):
      libp2p_gossipsub_peers_per_topic_fanout
        .set(g.fanout.peers(t).int64, labelValues = [t])

    # don't retain bad score peers
    if pubSubPeer.score < 0.0:
      g.peerStats.del(pubSubPeer)
      return

    g.peerStats[pubSubPeer].expire = Moment.now() + g.parameters.retainScore
    for topic, info in g.peerStats[pubSubPeer].topicInfos.mpairs:
      info.firstMessageDeliveries = 0

  procCall FloodSub(g).unsubscribePeer(peer)

method subscribeTopic*(g: GossipSub,
                       topic: string,
                       subscribe: bool,
                       peer: PubSubPeer) {.gcsafe.} =
  # Skip floodsub - we don't want it to add the peer to `g.floodsub`
  procCall PubSub(g).subscribeTopic(topic, subscribe, peer)

  logScope:
    peer
    topic

  g.onNewPeer(peer)

  if subscribe:
    trace "peer subscribed to topic"
    # subscribe remote peer to the topic
    discard g.gossipsub.addPeer(topic, peer)
    if peer.peerId in g.parameters.directPeers:
      discard g.explicit.addPeer(topic, peer)
  else:
    trace "peer unsubscribed from topic"
    # unsubscribe remote peer from the topic
    g.gossipsub.removePeer(topic, peer)
    g.mesh.removePeer(topic, peer)
    g.fanout.removePeer(topic, peer)
    if peer.peerId in g.parameters.directPeers:
      g.explicit.removePeer(topic, peer)

    when defined(libp2p_expensive_metrics):
      libp2p_gossipsub_peers_per_topic_mesh
        .set(g.mesh.peers(topic).int64, labelValues = [topic])
      libp2p_gossipsub_peers_per_topic_fanout
        .set(g.fanout.peers(topic).int64, labelValues = [topic])

  when defined(libp2p_expensive_metrics):
    libp2p_gossipsub_peers_per_topic_gossipsub
      .set(g.gossipsub.peers(topic).int64, labelValues = [topic])

  trace "gossip peers", peers = g.gossipsub.peers(topic), topic

proc handleGraft(g: GossipSub,
                 peer: PubSubPeer,
                 grafts: seq[ControlGraft]): seq[ControlPrune] =
  for graft in grafts:
    let topic = graft.topicID
    logScope:
      peer
      topic

    trace "peer grafted topic"

    # It is an error to GRAFT on a explicit peer
    if peer.peerId in g.parameters.directPeers:
      trace "attempt to graft an explicit peer",  peer=peer.id,
                                                  topicID=graft.topicID
      # and such an attempt should be logged and rejected with a PRUNE
      result.add(ControlPrune(
        topicID: graft.topicID,
        peers: @[], # omitting heavy computation here as the remote did something illegal
        backoff: g.parameters.pruneBackoff.seconds.uint64))
      continue

    if peer.peerId in g.backingOff:
      trace "attempt to graft an backingOff peer",  peer=peer.id,
                                                    topicID=graft.topicID,
                                                    expire=g.backingOff[peer.peerId]
      # and such an attempt should be logged and rejected with a PRUNE
      result.add(ControlPrune(
        topicID: graft.topicID,
        peers: @[], # omitting heavy computation here as the remote did something illegal
        backoff: g.parameters.pruneBackoff.seconds.uint64))
      continue

    if peer notin g.peerStats:
      g.peerStats[peer] = PeerStats()

    # If they send us a graft before they send us a subscribe, what should
    # we do? For now, we add them to mesh but don't add them to gossipsub.
    if topic in g.topics:
      if g.mesh.peers(topic) < GossipSubDHi or peer.outbound:
        # In the spec, there's no mention of DHi here, but implicitly, a
        # peer will be removed from the mesh on next rebalance, so we don't want
        # this peer to push someone else out
        if g.mesh.addPeer(topic, peer):
          g.grafted(peer, topic)
          g.fanout.removePeer(topic, peer)
        else:
          trace "peer already in mesh"
      else:
        result.add(ControlPrune(
          topicID: topic,
          peers: g.peerExchangeList(topic),
          backoff: g.parameters.pruneBackoff.seconds.uint64))
    else:
      trace "peer grafting topic we're not interested in", topic
      # gossip 1.1, we do not send a control message prune anymore

    when defined(libp2p_expensive_metrics):
      libp2p_gossipsub_peers_per_topic_mesh
        .set(g.mesh.peers(topic).int64, labelValues = [topic])
      libp2p_gossipsub_peers_per_topic_fanout
        .set(g.fanout.peers(topic).int64, labelValues = [topic])

proc handlePrune(g: GossipSub, peer: PubSubPeer, prunes: seq[ControlPrune]) =
  for prune in prunes:
    trace "peer pruned topic", peer, topic = prune.topicID

    # add peer backoff
    if prune.backoff > 0:
      let backoff = Moment.fromNow((prune.backoff + BackoffSlackTime).int64.seconds)
      let current = g.backingOff.getOrDefault(peer.peerId)
      if backoff > current:
        g.backingOff[peer.peerId] = backoff

    g.pruned(peer, prune.topicID)
    g.mesh.removePeer(prune.topicID, peer)

    when defined(libp2p_expensive_metrics):
      libp2p_gossipsub_peers_per_topic_mesh
        .set(g.mesh.peers(prune.topicID).int64, labelValues = [prune.topicID])

proc handleIHave(g: GossipSub,
                 peer: PubSubPeer,
                 ihaves: seq[ControlIHave]): ControlIWant =
  if peer.score < g.parameters.gossipThreshold:
    trace "ihave: ignoring low score peer", peer = $peer, score = peer.score
  elif peer.iHaveBudget == 0:
    trace "ihave: ignoring out of budget peer", peer = $peer, score = peer.score
  else:
    dec peer.iHaveBudget
    for ihave in ihaves:
      trace "peer sent ihave",
        peer, topic = ihave.topicID, msgs = ihave.messageIDs

      if ihave.topicID in g.mesh:
        for m in ihave.messageIDs:
          if m notin g.seen:
            result.messageIDs.add(m)

proc handleIWant(g: GossipSub,
                 peer: PubSubPeer,
                 iwants: seq[ControlIWant]): seq[Message] =
  if peer.score < g.parameters.gossipThreshold:
    trace "iwant: ignoring low score peer", peer = $peer, score = peer.score
  else:
    for iwant in iwants:
      for mid in iwant.messageIDs:
        trace "peer sent iwant", peer, messageID = mid
        let msg = g.mcache.get(mid)
        if msg.isSome:
          # avoid spam
          if peer.iWantBudget > 0:
            result.add(msg.get())
            dec peer.iWantBudget
          else:
            return

proc punishPeer(g: GossipSub, peer: PubSubPeer, msg: Message) =
  for t in msg.topicIDs:
    # ensure we init a new topic if unknown
    let _ = g.topicParams.mgetOrPut(t, TopicParams.init())
    # update stats
    var tstats = g.peerStats[peer].topicInfos.getOrDefault(t)
    tstats.invalidMessageDeliveries += 1
    g.peerStats[peer].topicInfos[t] = tstats

method rpcHandler*(g: GossipSub,
                  peer: PubSubPeer,
                  rpcMsg: RPCMsg) {.async.} =
  await procCall PubSub(g).rpcHandler(peer, rpcMsg)

  for msg in rpcMsg.messages:                         # for every message
    let msgId = g.msgIdProvider(msg)

    if g.seen.put(msgId):
      trace "Dropping already-seen message", msgId, peer

      # make sure to update score tho before continuing
      for t in msg.topicIDs:                     # for every topic in the message
        let topicParams = g.topicParams.mgetOrPut(t, TopicParams.init())
                                                # if in mesh add more delivery score
        var stats = g.peerStats[peer].topicInfos.getOrDefault(t)
        if stats.inMesh:
          stats.meshMessageDeliveries += 1
          if stats.meshMessageDeliveries > topicParams.meshMessageDeliveriesCap:
            stats.meshMessageDeliveries = topicParams.meshMessageDeliveriesCap

                                                # commit back to the table
        g.peerStats[peer].topicInfos[t] = stats

      continue

    g.mcache.put(msgId, msg)

    if g.verifySignature and not msg.verify(peer.peerId):
      debug "Dropping message due to failed signature verification", msgId, peer
      g.punishPeer(peer, msg)
      continue

    if not (await g.validate(msg)):
      trace "Dropping message due to failed validation", msgId, peer
      g.punishPeer(peer, msg)
      continue

    var toSendPeers = initHashSet[PubSubPeer]()
    for t in msg.topicIDs:                      # for every topic in the message
      let topicParams = g.topicParams.mgetOrPut(t, TopicParams.init())

                                                # contribute to peer score first delivery
      var stats = g.peerStats[peer].topicInfos.getOrDefault(t)
      stats.firstMessageDeliveries += 1
      if stats.firstMessageDeliveries > topicParams.firstMessageDeliveriesCap:
        stats.firstMessageDeliveries = topicParams.firstMessageDeliveriesCap

                                                # if in mesh add more delivery score
      if stats.inMesh:
        stats.meshMessageDeliveries += 1
        if stats.meshMessageDeliveries > topicParams.meshMessageDeliveriesCap:
          stats.meshMessageDeliveries = topicParams.meshMessageDeliveriesCap

                                                # commit back to the table
      g.peerStats[peer].topicInfos[t] = stats

      g.floodsub.withValue(t, peers): toSendPeers.incl(peers[])
      g.mesh.withValue(t, peers): toSendPeers.incl(peers[])

      await handleData(g, t, msg.data)

    # In theory, if topics are the same in all messages, we could batch - we'd
    # also have to be careful to only include validated messages
    g.broadcast(toSeq(toSendPeers), RPCMsg(messages: @[msg]))
    trace "forwared message to peers", peers = toSendPeers.len, msgId, peer

  if rpcMsg.control.isSome:
    let control = rpcMsg.control.get()
    g.handlePrune(peer, control.prune)

    var respControl: ControlMessage
    respControl.iwant.add(g.handleIHave(peer, control.ihave))
    respControl.prune.add(g.handleGraft(peer, control.graft))
    let messages = g.handleIWant(peer, control.iwant)

    if respControl.graft.len > 0 or respControl.prune.len > 0 or
      respControl.ihave.len > 0 or messages.len > 0:

      debug "sending control message", msg = shortLog(respControl), peer
      g.send(
        peer,
        RPCMsg(control: some(respControl), messages: messages))

method subscribe*(g: GossipSub,
                  topic: string,
                  handler: TopicHandler) {.async.} =
  await procCall PubSub(g).subscribe(topic, handler)

  # if we have a fanout on this topic break it
  if topic in g.fanout:
    g.fanout.del(topic)

  await g.rebalanceMesh(topic)

method unsubscribe*(g: GossipSub,
                    topics: seq[TopicPair]) {.async.} =
  await procCall PubSub(g).unsubscribe(topics)

  for (topic, handler) in topics:
    # delete from mesh only if no handlers are left
    if topic notin g.topics:
      if topic in g.mesh:
        let peers = g.mesh[topic]
        g.mesh.del(topic)
        for peer in peers:
          g.pruned(peer, topic)
        let prune = RPCMsg(control: some(ControlMessage(
          prune: @[ControlPrune(
            topicID: topic,
            peers: g.peerExchangeList(topic),
            backoff: g.parameters.pruneBackoff.seconds.uint64)])))
        g.broadcast(toSeq(peers), prune)

method unsubscribeAll*(g: GossipSub, topic: string) {.async.} =
  await procCall PubSub(g).unsubscribeAll(topic)

  if topic in g.mesh:
    let peers = g.mesh.getOrDefault(topic)
    g.mesh.del(topic)
    for peer in peers:
      g.pruned(peer, topic)
    let prune = RPCMsg(control: some(ControlMessage(
      prune: @[ControlPrune(
        topicID: topic,
        peers: g.peerExchangeList(topic),
        backoff: g.parameters.pruneBackoff.seconds.uint64)])))
    g.broadcast(toSeq(peers), prune)

method publish*(g: GossipSub,
                topic: string,
                data: seq[byte]): Future[int] {.async.} =
  # base returns always 0
  discard await procCall PubSub(g).publish(topic, data)

  logScope: topic
  trace "Publishing message on topic", data = data.shortLog

  if topic.len <= 0: # data could be 0/empty
    debug "Empty topic, skipping publish"
    return 0

  var peers: HashSet[PubSubPeer]

  if g.parameters.floodPublish:
    # With flood publishing enabled, the mesh is used when propagating messages from other peers,
    # but a peer's own messages will always be published to all known peers in the topic.
    for peer in g.gossipsub.getOrDefault(topic):
      if peer.score >= g.parameters.publishThreshold:
        trace "publish: including flood/high score peer", peer = $peer
        peers.incl(peer)

  # add always direct peers
  peers.incl(g.explicit.getOrDefault(topic))

  if topic in g.topics: # if we're subscribed use the mesh
    peers.incl(g.mesh.getOrDefault(topic))
  else: # not subscribed, send to fanout peers
    # try optimistically
    peers.incl(g.fanout.getOrDefault(topic))
    if peers.len == 0:
      # ok we had nothing.. let's try replenish inline
      g.replenishFanout(topic)
      peers.incl(g.fanout.getOrDefault(topic))

    # even if we couldn't publish,
    # we still attempted to publish
    # on the topic, so it makes sense
    # to update the last topic publish
    # time
    g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL)

  if peers.len == 0:
    debug "No peers for topic, skipping publish"
    return 0

  inc g.msgSeqno
  let
    msg = Message.init(g.peerInfo, data, topic, g.msgSeqno, g.sign)
    msgId = g.msgIdProvider(msg)

  logScope: msgId

  trace "Created new message", msg = shortLog(msg), peers = peers.len

  if g.seen.put(msgId):
    # custom msgid providers might cause this
    trace "Dropping already-seen message"
    return 0

  g.mcache.put(msgId, msg)

  g.broadcast(toSeq(peers), RPCMsg(messages: @[msg]))
  when defined(libp2p_expensive_metrics):
    if peers.len > 0:
      libp2p_pubsub_messages_published.inc(labelValues = [topic])

  trace "Published message to peers"

  return peers.len

proc maintainDirectPeers(g: GossipSub) {.async.} =
  while g.heartbeatRunning:
    for id in g.parameters.directPeers:
      let peer = g.peers.getOrDefault(id)
      if peer == nil:
        # this creates a new peer and assigns the current switch to it
        # as a result the next time we try to Send we will as well try to open a connection
        # see pubsubpeer.nim send and such
        discard g.getOrCreatePeer(id, g.codec)

    await sleepAsync(1.minutes)

method start*(g: GossipSub) {.async.} =
  trace "gossipsub start"

  if not g.heartbeatFut.isNil:
    warn "Starting gossipsub twice"
    return

  g.heartbeatRunning = true
  g.heartbeatFut = g.heartbeat()
  g.directPeersLoop = g.maintainDirectPeers()

method stop*(g: GossipSub) {.async.} =
  trace "gossipsub stop"
  if g.heartbeatFut.isNil:
    warn "Stopping gossipsub without starting it"
    return

  # stop heartbeat interval
  g.heartbeatRunning = false
  g.directPeersLoop.cancel()
  if not g.heartbeatFut.finished:
    trace "awaiting last heartbeat"
    await g.heartbeatFut
    trace "heartbeat stopped"
    g.heartbeatFut = nil

method initPubSub*(g: GossipSub) =
  procCall FloodSub(g).initPubSub()

  if not g.parameters.explicit:
    g.parameters = GossipSubParams.init()

  g.parameters.validateParameters().tryGet()

  randomize()
  g.mcache = MCache.init(GossipSubHistoryGossip, GossipSubHistoryLength)
  g.mesh = initTable[string, HashSet[PubSubPeer]]()     # meshes - topic to peer
  g.fanout = initTable[string, HashSet[PubSubPeer]]()   # fanout - topic to peer
  g.gossipsub = initTable[string, HashSet[PubSubPeer]]()# topic to peer map of all gossipsub peers
  g.lastFanoutPubSub = initTable[string, Moment]()  # last publish time for fanout topics
  g.gossip = initTable[string, seq[ControlIHave]]() # pending gossip
  g.control = initTable[string, ControlMessage]()   # pending control messages