Giovanni Petrantoni b99d2039a8
Gossip one one (#240)
* allow multiple codecs per protocol (without breaking things)

* add 1.1 protocol to gossip

* explicit peering part 1

* explicit peering part 2

* explicit peering part 3

* PeerInfo and ControlPrune protocols

* fix encodePrune

* validated always, even explicit peers

* prune by score (score is stub still)

* add a way to pass parameters to gossip

* standard setup fixes

* take into account explicit direct peers in publish

* add floodPublish logic

* small fixes, publish still half broken

* make sure to waitsub in sparse test

* use var semantics to optimize table access

* wip... lvalues don't work properly sadly...

* big publish refactor, replenish and balance

* fix internal tests

* use g.peers for fanout (todo: don't include flood peers)

* exclude non gossip from fanout

* internal test fixes

* fix flood tests

* fix test's trypublish

* test interop fixes

* make sure to not remove peers from gossip table

* restore old replenishFanout

* cleanups

* restore utility module import

* restore trace vs debug in gossip

* improve fanout replenish behavior further

* triage publish nil peers (issue is on master too but just hidden behind a if/in)

* getGossipPeers fixes

* remove topics from pubsubpeer (was unused)

* simplify rebalanceMesh (following spec) and make it finally reach D_high

* better diagnostics

* merge new pubsubpeer, copy 1.1 to new module

* fix up merge

* conditional enable gossip11 module

* add back topics in peers, re-enable flood publish

* add more heartbeat locking to prevent races

* actually lock the heartbeat

* minor fixes

* with sugar

* merge 1.0

* remove assertion in publish

* fix multistream 1.1 multi proto

* Fix merge oops

* wip

* fix gossip 11 upstream

* gossipsub11 -> gossipsub

* support interop testing

* tests fixing

* fix directchat build

* control prune updates (pb)

* wip parameters

* gossip internal tests fixes

* parameters wip

* finishup with params

* cleanups/wip

* small sugar

* grafted and pruned procs

* wip updateScores

* wip

* fix logging issue

* pubsubpeer, chronicles explicit override

* fix internal gossip tests

* wip

* tables troubleshooting

* score wip

* score wip

* fixes

* fix test utils generateNodes

* don't delete while iterating in score update

* fix grafted defect

* add a handleConnect in subscribeTopic

* pruning improvements

* wip

* score fixes

* post merge - builds gossip tests

* further merge fixes

* rebalance improvements and opportunistic grafting

* fix test for now

* restore explicit peering

* implement peer exchange graft message

* add an hard cap to PX

* backoff time management

* IWANT cap/budget

* Adaptive gossip dissemination

* outbound mesh quota, internal tests fixing

* oversub prune score based, finish outbound quota

* finishup with score and ihave budget

* use go daemon 0.3.0

* import fixes

* byScore cleanup score sorting

* remove pointless scaling in `/` Duration operator

* revert using libp2p org for daemon

* interop fixes

* fixes and cleanup

* remove heartbeat assertion, minor debug fixes

* logging improvements and cleaning up

* (to revert) add some traces

* add explicit topic to gossip rpcs

* pubsub merge fixes and type fix in switch

* Revert "(to revert) add some traces"

This reverts commit 4663eaab6cc336c81cee50bc54025cf0b7bcbd99.

* cleanup some now irrelevant todo

* shuffle peers anyway as score might be disabled

* add missing shuffle

* old merge fix

* more merge fixes

* debug improvements

* re-enable gossip internal tests

* add gossip10 fallback (dormant but tested)

* split gossipsub internal tests into 1.0 and 1.1

Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
2020-09-21 11:16:29 +02:00

240 lines
7.2 KiB
Nim

## Nim-LibP2P
## Copyright (c) 2019 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
import tables, sequtils, oids
import chronos, chronicles, stew/byteutils, metrics
import ../muxer,
../../stream/connection,
../../stream/bufferstream,
../../utility,
../../peerinfo,
./coder,
./lpchannel
export muxer
logScope:
topics = "mplex"
const MplexCodec* = "/mplex/6.7.0"
const
MaxChannelCount = 200
when defined(libp2p_expensive_metrics):
declareGauge(libp2p_mplex_channels,
"mplex channels", labels = ["initiator", "peer"])
type
TooManyChannels* = object of CatchableError
InvalidChannelIdError* = object of CatchableError
Mplex* = ref object of Muxer
channels: array[bool, Table[uint64, LPChannel]]
currentId: uint64
inChannTimeout: Duration
outChannTimeout: Duration
isClosed: bool
oid*: Oid
maxChannCount: int
func shortLog*(m: MPlex): auto = shortLog(m.connection)
chronicles.formatIt(Mplex): shortLog(it)
proc newTooManyChannels(): ref TooManyChannels =
newException(TooManyChannels, "max allowed channel count exceeded")
proc newInvalidChannelIdError(): ref InvalidChannelIdError =
newException(InvalidChannelIdError, "max allowed channel count exceeded")
proc cleanupChann(m: Mplex, chann: LPChannel) {.async, inline.} =
## remove the local channel from the internal tables
##
try:
await chann.join()
m.channels[chann.initiator].del(chann.id)
trace "cleaned up channel", m, chann
when defined(libp2p_expensive_metrics):
libp2p_mplex_channels.set(
m.channels[chann.initiator].len.int64,
labelValues = [$chann.initiator, $m.connection.peerInfo.peerId])
except CatchableError as exc:
# This is top-level procedure which will work as separate task, so it
# do not need to propogate CancelledError, and no other exceptions should
# happen here
warn "Error cleaning up mplex channel", m, chann, msg = exc.msg
proc newStreamInternal*(m: Mplex,
initiator: bool = true,
chanId: uint64 = 0,
name: string = "",
lazy: bool = false,
timeout: Duration):
LPChannel {.gcsafe.} =
## create new channel/stream
##
let id = if initiator:
m.currentId.inc(); m.currentId
else: chanId
if id in m.channels[initiator]:
raise newInvalidChannelIdError()
result = LPChannel.init(
id,
m.connection,
initiator,
name,
lazy = lazy,
timeout = timeout)
result.peerInfo = m.connection.peerInfo
result.observedAddr = m.connection.observedAddr
trace "Creating new channel", m, channel = result, id, initiator, name
m.channels[initiator][id] = result
# All the errors are handled inside `cleanupChann()` procedure.
asyncSpawn m.cleanupChann(result)
when defined(libp2p_expensive_metrics):
libp2p_mplex_channels.set(
m.channels[initiator].len.int64,
labelValues = [$initiator, $m.connection.peerInfo.peerId])
proc handleStream(m: Mplex, chann: LPChannel) {.async.} =
## call the muxer stream handler for this channel
##
try:
await m.streamHandler(chann)
trace "finished handling stream", m, chann
doAssert(chann.closed, "connection not closed by handler!")
except CatchableError as exc:
# This is top-level procedure which will work as separate task, so it
# do not need to propogate CancelledError.
trace "Exception in mplex stream handler", m, chann, msg = exc.msg
await chann.reset()
method handle*(m: Mplex) {.async, gcsafe.} =
trace "Starting mplex handler", m
try:
while not m.connection.atEof:
trace "waiting for data", m
let
(id, msgType, data) = await m.connection.readMsg()
initiator = bool(ord(msgType) and 1)
logScope:
id = id
initiator = initiator
msgType = msgType
size = data.len
trace "read message from connection", m, data = data.shortLog
var channel =
if MessageType(msgType) != MessageType.New:
let tmp = m.channels[initiator].getOrDefault(id, nil)
if tmp == nil:
trace "Channel not found, skipping", m
continue
tmp
else:
if m.channels[false].len > m.maxChannCount - 1:
warn "too many channels created by remote peer",
allowedMax = MaxChannelCount, m
raise newTooManyChannels()
let name = string.fromBytes(data)
m.newStreamInternal(false, id, name, timeout = m.outChannTimeout)
trace "Processing channel message", m, channel, data = data.shortLog
case msgType:
of MessageType.New:
trace "created channel", m, channel
if not isNil(m.streamHandler):
# Launch handler task
# All the errors are handled inside `handleStream()` procedure.
asyncSpawn m.handleStream(channel)
of MessageType.MsgIn, MessageType.MsgOut:
if data.len > MaxMsgSize:
warn "attempting to send a packet larger than allowed",
allowed = MaxMsgSize, channel
raise newLPStreamLimitError()
trace "pushing data to channel", m, channel, len = data.len
await channel.pushTo(data)
trace "pushed data to channel", m, channel, len = data.len
of MessageType.CloseIn, MessageType.CloseOut:
await channel.closeRemote()
of MessageType.ResetIn, MessageType.ResetOut:
await channel.reset()
except CancelledError:
# This procedure is spawned as task and it is not part of public API, so
# there no way for this procedure to be cancelled implicitly.
debug "Unexpected cancellation in mplex handler", m
except LPStreamEOFError as exc:
trace "Stream EOF", m, msg = exc.msg
except CatchableError as exc:
warn "Unexpected exception in mplex read loop", m, msg = exc.msg
finally:
await m.close()
trace "Stopped mplex handler", m
proc init*(M: type Mplex,
conn: Connection,
inTimeout, outTimeout: Duration = DefaultChanTimeout,
maxChannCount: int = MaxChannelCount): Mplex =
M(connection: conn,
inChannTimeout: inTimeout,
outChannTimeout: outTimeout,
oid: genOid(),
maxChannCount: maxChannCount)
method newStream*(m: Mplex,
name: string = "",
lazy: bool = false): Future[Connection] {.async, gcsafe.} =
let channel = m.newStreamInternal(
lazy = lazy, timeout = m.inChannTimeout)
if not lazy:
await channel.open()
return Connection(channel)
method close*(m: Mplex) {.async, gcsafe.} =
if m.isClosed:
trace "Already closed", m
return
m.isClosed = true
trace "Closing mplex", m
let channs = toSeq(m.channels[false].values) & toSeq(m.channels[true].values)
for chann in channs:
await chann.reset()
await m.connection.close()
# TODO while we're resetting, new channels may be created that will not be
# closed properly
m.channels[false].clear()
m.channels[true].clear()
trace "Closed mplex", m