Connections and pubsub peers cleanup (#279)
* better peer tracking and cleanup * check if peer and conn is nil * test name * make timeout more agressive * rename method for better clarity
This commit is contained in:
parent
ba071cafa6
commit
94196fee71
|
@ -97,13 +97,9 @@ method rpcHandler*(f: FloodSub,
|
||||||
trace "exception in message handler", exc = exc.msg
|
trace "exception in message handler", exc = exc.msg
|
||||||
|
|
||||||
# forward the message to all peers interested in it
|
# forward the message to all peers interested in it
|
||||||
let (published, failed) = await f.sendHelper(toSendPeers, m.messages)
|
let published = await f.publishHelper(toSendPeers, m.messages)
|
||||||
for p in failed:
|
|
||||||
let peer = f.peers.getOrDefault(p)
|
|
||||||
if not(isNil(peer)):
|
|
||||||
f.handleDisconnect(peer) # cleanup failed peers
|
|
||||||
|
|
||||||
trace "forwared message to peers", peers = published.len
|
trace "forwared message to peers", peers = published
|
||||||
|
|
||||||
method init*(f: FloodSub) =
|
method init*(f: FloodSub) =
|
||||||
proc handler(conn: Connection, proto: string) {.async.} =
|
proc handler(conn: Connection, proto: string) {.async.} =
|
||||||
|
@ -139,18 +135,15 @@ method publish*(f: FloodSub,
|
||||||
trace "publishing on topic", name = topic
|
trace "publishing on topic", name = topic
|
||||||
inc f.msgSeqno
|
inc f.msgSeqno
|
||||||
let msg = Message.init(f.peerInfo, data, topic, f.msgSeqno, f.sign)
|
let msg = Message.init(f.peerInfo, data, topic, f.msgSeqno, f.sign)
|
||||||
|
|
||||||
# start the future but do not wait yet
|
# start the future but do not wait yet
|
||||||
let (published, failed) = await f.sendHelper(f.floodsub.getOrDefault(topic), @[msg])
|
let published = await f.publishHelper(f.floodsub.getOrDefault(topic), @[msg])
|
||||||
for p in failed:
|
|
||||||
let peer = f.peers.getOrDefault(p)
|
|
||||||
if not isNil(peer):
|
|
||||||
f.handleDisconnect(peer) # cleanup failed peers
|
|
||||||
|
|
||||||
libp2p_pubsub_messages_published.inc(labelValues = [topic])
|
libp2p_pubsub_messages_published.inc(labelValues = [topic])
|
||||||
|
|
||||||
trace "published message to peers", peers = published.len,
|
trace "published message to peers", peers = published,
|
||||||
msg = msg.shortLog()
|
msg = msg.shortLog()
|
||||||
return published.len
|
return published
|
||||||
|
|
||||||
method unsubscribe*(f: FloodSub,
|
method unsubscribe*(f: FloodSub,
|
||||||
topics: seq[TopicPair]) {.async.} =
|
topics: seq[TopicPair]) {.async.} =
|
||||||
|
|
|
@ -422,13 +422,9 @@ method rpcHandler*(g: GossipSub,
|
||||||
trace "exception in message handler", exc = exc.msg
|
trace "exception in message handler", exc = exc.msg
|
||||||
|
|
||||||
# forward the message to all peers interested in it
|
# forward the message to all peers interested in it
|
||||||
let (published, failed) = await g.sendHelper(toSendPeers, m.messages)
|
let published = await g.publishHelper(toSendPeers, m.messages)
|
||||||
for p in failed:
|
|
||||||
let peer = g.peers.getOrDefault(p)
|
|
||||||
if not isNil(peer):
|
|
||||||
g.handleDisconnect(peer) # cleanup failed peers
|
|
||||||
|
|
||||||
trace "forwared message to peers", peers = published.len
|
trace "forwared message to peers", peers = published
|
||||||
|
|
||||||
var respControl: ControlMessage
|
var respControl: ControlMessage
|
||||||
if m.control.isSome:
|
if m.control.isSome:
|
||||||
|
@ -501,18 +497,13 @@ method publish*(g: GossipSub,
|
||||||
if msgId notin g.mcache:
|
if msgId notin g.mcache:
|
||||||
g.mcache.put(msgId, msg)
|
g.mcache.put(msgId, msg)
|
||||||
|
|
||||||
let (published, failed) = await g.sendHelper(peers, @[msg])
|
let published = await g.publishHelper(peers, @[msg])
|
||||||
for p in failed:
|
if published > 0:
|
||||||
let peer = g.peers.getOrDefault(p)
|
|
||||||
if not isNil(peer):
|
|
||||||
g.handleDisconnect(peer) # cleanup failed peers
|
|
||||||
|
|
||||||
if published.len > 0:
|
|
||||||
libp2p_pubsub_messages_published.inc(labelValues = [topic])
|
libp2p_pubsub_messages_published.inc(labelValues = [topic])
|
||||||
|
|
||||||
trace "published message to peers", peers = published.len,
|
trace "published message to peers", peers = published,
|
||||||
msg = msg.shortLog()
|
msg = msg.shortLog()
|
||||||
return published.len
|
return published
|
||||||
|
|
||||||
method start*(g: GossipSub) {.async.} =
|
method start*(g: GossipSub) {.async.} =
|
||||||
trace "gossipsub start"
|
trace "gossipsub start"
|
||||||
|
|
|
@ -8,20 +8,22 @@
|
||||||
## those terms.
|
## those terms.
|
||||||
|
|
||||||
import tables, sequtils, sets
|
import tables, sequtils, sets
|
||||||
import pubsubpeer
|
import pubsubpeer, ../../peerid
|
||||||
|
|
||||||
type
|
type
|
||||||
PeerTable* = Table[string, HashSet[PubSubPeer]]
|
PeerTable* = Table[string, HashSet[PubSubPeer]] # topic string to peer map
|
||||||
|
|
||||||
proc hasPeerID*(t: PeerTable, topic, peerId: string): bool =
|
proc hasPeerID*(t: PeerTable, topic, peerId: string): bool =
|
||||||
# unefficient but used only in tests!
|
|
||||||
let peers = toSeq(t.getOrDefault(topic))
|
let peers = toSeq(t.getOrDefault(topic))
|
||||||
peers.any do (peer: PubSubPeer) -> bool:
|
peers.any do (peer: PubSubPeer) -> bool:
|
||||||
peer.id == peerId
|
peer.id == peerId
|
||||||
|
|
||||||
func addPeer*(table: var PeerTable, topic: string, peer: PubSubPeer): bool =
|
func addPeer*(table: var PeerTable, topic: string, peer: PubSubPeer): bool =
|
||||||
# returns true if the peer was added, false if it was already in the collection
|
# returns true if the peer was added,
|
||||||
not table.mgetOrPut(topic, initHashSet[PubSubPeer]()).containsOrIncl(peer)
|
# false if it was already in the collection
|
||||||
|
not table.mgetOrPut(topic,
|
||||||
|
initHashSet[PubSubPeer]())
|
||||||
|
.containsOrIncl(peer)
|
||||||
|
|
||||||
func removePeer*(table: var PeerTable, topic: string, peer: PubSubPeer) =
|
func removePeer*(table: var PeerTable, topic: string, peer: PubSubPeer) =
|
||||||
table.withValue(topic, peers):
|
table.withValue(topic, peers):
|
||||||
|
|
|
@ -51,6 +51,7 @@ type
|
||||||
peerInfo*: PeerInfo # this peer's info
|
peerInfo*: PeerInfo # this peer's info
|
||||||
topics*: Table[string, Topic] # local topics
|
topics*: Table[string, Topic] # local topics
|
||||||
peers*: Table[string, PubSubPeer] # peerid to peer map
|
peers*: Table[string, PubSubPeer] # peerid to peer map
|
||||||
|
conns*: Table[PeerInfo, HashSet[Connection]] # peers connections
|
||||||
triggerSelf*: bool # trigger own local handler on publish
|
triggerSelf*: bool # trigger own local handler on publish
|
||||||
verifySignature*: bool # enable signature verification
|
verifySignature*: bool # enable signature verification
|
||||||
sign*: bool # enable message signing
|
sign*: bool # enable message signing
|
||||||
|
@ -63,8 +64,7 @@ type
|
||||||
method handleDisconnect*(p: PubSub, peer: PubSubPeer) {.base.} =
|
method handleDisconnect*(p: PubSub, peer: PubSubPeer) {.base.} =
|
||||||
## handle peer disconnects
|
## handle peer disconnects
|
||||||
##
|
##
|
||||||
if not isNil(peer.peerInfo) and
|
if not(isNil(peer)) and peer.peerInfo notin p.conns:
|
||||||
peer.id in p.peers and not peer.inUse():
|
|
||||||
trace "deleting peer", peer = peer.id
|
trace "deleting peer", peer = peer.id
|
||||||
p.peers.del(peer.id)
|
p.peers.del(peer.id)
|
||||||
trace "peer disconnected", peer = peer.id
|
trace "peer disconnected", peer = peer.id
|
||||||
|
@ -72,6 +72,24 @@ method handleDisconnect*(p: PubSub, peer: PubSubPeer) {.base.} =
|
||||||
# metrics
|
# metrics
|
||||||
libp2p_pubsub_peers.set(p.peers.len.int64)
|
libp2p_pubsub_peers.set(p.peers.len.int64)
|
||||||
|
|
||||||
|
proc onConnClose(p: PubSub, conn: Connection) {.async.} =
|
||||||
|
try:
|
||||||
|
let peer = conn.peerInfo
|
||||||
|
await conn.closeEvent.wait()
|
||||||
|
|
||||||
|
if peer in p.conns:
|
||||||
|
p.conns[peer].excl(conn)
|
||||||
|
if p.conns[peer].len <= 0:
|
||||||
|
p.conns.del(peer)
|
||||||
|
|
||||||
|
if peer.id in p.peers:
|
||||||
|
p.handleDisconnect(p.peers[peer.id])
|
||||||
|
|
||||||
|
except CancelledError as exc:
|
||||||
|
raise exc
|
||||||
|
except CatchableError as exc:
|
||||||
|
trace "exception in onConnClose handler", exc = exc.msg
|
||||||
|
|
||||||
proc sendSubs*(p: PubSub,
|
proc sendSubs*(p: PubSub,
|
||||||
peer: PubSubPeer,
|
peer: PubSubPeer,
|
||||||
topics: seq[string],
|
topics: seq[string],
|
||||||
|
@ -87,11 +105,14 @@ proc sendSubs*(p: PubSub,
|
||||||
|
|
||||||
await peer.sendSubOpts(topics, subscribe)
|
await peer.sendSubOpts(topics, subscribe)
|
||||||
except CancelledError as exc:
|
except CancelledError as exc:
|
||||||
p.handleDisconnect(peer)
|
if not(isNil(peer)) and not(isNil(peer.conn)):
|
||||||
|
await peer.conn.close()
|
||||||
|
|
||||||
raise exc
|
raise exc
|
||||||
except CatchableError as exc:
|
except CatchableError as exc:
|
||||||
trace "unable to send subscriptions", exc = exc.msg
|
trace "unable to send subscriptions", exc = exc.msg
|
||||||
p.handleDisconnect(peer)
|
if not(isNil(peer)) and not(isNil(peer.conn)):
|
||||||
|
await peer.conn.close()
|
||||||
|
|
||||||
method subscribeTopic*(p: PubSub,
|
method subscribeTopic*(p: PubSub,
|
||||||
topic: string,
|
topic: string,
|
||||||
|
@ -150,11 +171,19 @@ method handleConn*(p: PubSub,
|
||||||
await conn.close()
|
await conn.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# track connection
|
||||||
|
p.conns.mgetOrPut(conn.peerInfo,
|
||||||
|
initHashSet[Connection]())
|
||||||
|
.incl(conn)
|
||||||
|
|
||||||
|
asyncCheck p.onConnClose(conn)
|
||||||
|
|
||||||
proc handler(peer: PubSubPeer, msgs: seq[RPCMsg]) {.async.} =
|
proc handler(peer: PubSubPeer, msgs: seq[RPCMsg]) {.async.} =
|
||||||
# call pubsub rpc handler
|
# call pubsub rpc handler
|
||||||
await p.rpcHandler(peer, msgs)
|
await p.rpcHandler(peer, msgs)
|
||||||
|
|
||||||
let peer = p.getOrCreatePeer(conn.peerInfo, proto)
|
let peer = p.getOrCreatePeer(conn.peerInfo, proto)
|
||||||
|
|
||||||
let topics = toSeq(p.topics.keys)
|
let topics = toSeq(p.topics.keys)
|
||||||
if topics.len > 0:
|
if topics.len > 0:
|
||||||
await p.sendSubs(peer, topics, true)
|
await p.sendSubs(peer, topics, true)
|
||||||
|
@ -168,13 +197,20 @@ method handleConn*(p: PubSub,
|
||||||
except CatchableError as exc:
|
except CatchableError as exc:
|
||||||
trace "exception ocurred in pubsub handle", exc = exc.msg
|
trace "exception ocurred in pubsub handle", exc = exc.msg
|
||||||
finally:
|
finally:
|
||||||
p.handleDisconnect(peer)
|
|
||||||
await conn.close()
|
await conn.close()
|
||||||
|
|
||||||
method subscribePeer*(p: PubSub, conn: Connection) {.base.} =
|
method subscribePeer*(p: PubSub, conn: Connection) {.base.} =
|
||||||
if not(isNil(conn)):
|
if not(isNil(conn)):
|
||||||
let peer = p.getOrCreatePeer(conn.peerInfo, p.codec)
|
|
||||||
trace "subscribing to peer", peerId = conn.peerInfo.id
|
trace "subscribing to peer", peerId = conn.peerInfo.id
|
||||||
|
|
||||||
|
# track connection
|
||||||
|
p.conns.mgetOrPut(conn.peerInfo,
|
||||||
|
initHashSet[Connection]())
|
||||||
|
.incl(conn)
|
||||||
|
|
||||||
|
asyncCheck p.onConnClose(conn)
|
||||||
|
|
||||||
|
let peer = p.getOrCreatePeer(conn.peerInfo, p.codec)
|
||||||
if not peer.connected:
|
if not peer.connected:
|
||||||
peer.conn = conn
|
peer.conn = conn
|
||||||
|
|
||||||
|
@ -183,11 +219,9 @@ method unsubscribePeer*(p: PubSub, peerInfo: PeerInfo) {.base, async.} =
|
||||||
let peer = p.peers[peerInfo.id]
|
let peer = p.peers[peerInfo.id]
|
||||||
|
|
||||||
trace "unsubscribing from peer", peerId = $peerInfo
|
trace "unsubscribing from peer", peerId = $peerInfo
|
||||||
if not(isNil(peer.conn)):
|
if not(isNil(peer)) and not(isNil(peer.conn)):
|
||||||
await peer.conn.close()
|
await peer.conn.close()
|
||||||
|
|
||||||
p.handleDisconnect(peer)
|
|
||||||
|
|
||||||
proc connected*(p: PubSub, peerInfo: PeerInfo): bool =
|
proc connected*(p: PubSub, peerInfo: PeerInfo): bool =
|
||||||
if peerInfo.id in p.peers:
|
if peerInfo.id in p.peers:
|
||||||
let peer = p.peers[peerInfo.id]
|
let peer = p.peers[peerInfo.id]
|
||||||
|
@ -269,6 +303,18 @@ proc sendHelper*(p: PubSub,
|
||||||
|
|
||||||
return (published, failed)
|
return (published, failed)
|
||||||
|
|
||||||
|
proc publishHelper*(p: PubSub,
|
||||||
|
sendPeers: HashSet[PubSubPeer],
|
||||||
|
msgs: seq[Message]): Future[int] {.async.} =
|
||||||
|
# send messages and cleanup failed peers
|
||||||
|
let (published, failed) = await p.sendHelper(sendPeers, msgs)
|
||||||
|
for f in failed:
|
||||||
|
let peer = p.peers.getOrDefault(f)
|
||||||
|
if not(isNil(peer)) and not(isNil(peer.conn)):
|
||||||
|
await peer.conn.close()
|
||||||
|
|
||||||
|
return published.len
|
||||||
|
|
||||||
method publish*(p: PubSub,
|
method publish*(p: PubSub,
|
||||||
topic: string,
|
topic: string,
|
||||||
data: seq[byte]): Future[int] {.base, async.} =
|
data: seq[byte]): Future[int] {.base, async.} =
|
||||||
|
|
|
@ -40,7 +40,6 @@ type
|
||||||
recvdRpcCache: TimedCache[string] # cache for already received messages
|
recvdRpcCache: TimedCache[string] # cache for already received messages
|
||||||
onConnect*: AsyncEvent
|
onConnect*: AsyncEvent
|
||||||
observers*: ref seq[PubSubObserver] # ref as in smart_ptr
|
observers*: ref seq[PubSubObserver] # ref as in smart_ptr
|
||||||
refs: int # how many active connections this peer has
|
|
||||||
|
|
||||||
RPCHandler* = proc(peer: PubSubPeer, msg: seq[RPCMsg]): Future[void] {.gcsafe.}
|
RPCHandler* = proc(peer: PubSubPeer, msg: seq[RPCMsg]): Future[void] {.gcsafe.}
|
||||||
|
|
||||||
|
@ -50,9 +49,6 @@ func hash*(p: PubSubPeer): Hash =
|
||||||
|
|
||||||
proc id*(p: PubSubPeer): string = p.peerInfo.id
|
proc id*(p: PubSubPeer): string = p.peerInfo.id
|
||||||
|
|
||||||
proc inUse*(p: PubSubPeer): bool =
|
|
||||||
p.refs > 0
|
|
||||||
|
|
||||||
proc connected*(p: PubSubPeer): bool =
|
proc connected*(p: PubSubPeer): bool =
|
||||||
not(isNil(p.sendConn))
|
not(isNil(p.sendConn))
|
||||||
|
|
||||||
|
@ -61,7 +57,6 @@ proc `conn=`*(p: PubSubPeer, conn: Connection) =
|
||||||
trace "attaching send connection for peer", peer = p.id
|
trace "attaching send connection for peer", peer = p.id
|
||||||
p.sendConn = conn
|
p.sendConn = conn
|
||||||
p.onConnect.fire()
|
p.onConnect.fire()
|
||||||
p.refs.inc()
|
|
||||||
|
|
||||||
proc conn*(p: PubSubPeer): Connection =
|
proc conn*(p: PubSubPeer): Connection =
|
||||||
p.sendConn
|
p.sendConn
|
||||||
|
@ -86,7 +81,6 @@ proc handle*(p: PubSubPeer, conn: Connection) {.async.} =
|
||||||
debug "starting pubsub read loop for peer", closed = conn.closed
|
debug "starting pubsub read loop for peer", closed = conn.closed
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
p.refs.inc()
|
|
||||||
while not conn.closed:
|
while not conn.closed:
|
||||||
trace "waiting for data", closed = conn.closed
|
trace "waiting for data", closed = conn.closed
|
||||||
let data = await conn.readLp(64 * 1024)
|
let data = await conn.readLp(64 * 1024)
|
||||||
|
@ -124,8 +118,6 @@ proc handle*(p: PubSubPeer, conn: Connection) {.async.} =
|
||||||
except CatchableError as exc:
|
except CatchableError as exc:
|
||||||
trace "Exception occurred in PubSubPeer.handle", exc = exc.msg
|
trace "Exception occurred in PubSubPeer.handle", exc = exc.msg
|
||||||
raise exc
|
raise exc
|
||||||
finally:
|
|
||||||
p.refs.dec()
|
|
||||||
|
|
||||||
proc send*(p: PubSubPeer, msg: RPCMsg) {.async.} =
|
proc send*(p: PubSubPeer, msg: RPCMsg) {.async.} =
|
||||||
logScope:
|
logScope:
|
||||||
|
@ -172,7 +164,6 @@ proc send*(p: PubSubPeer, msg: RPCMsg) {.async.} =
|
||||||
p.sendConn = nil
|
p.sendConn = nil
|
||||||
p.onConnect.clear()
|
p.onConnect.clear()
|
||||||
|
|
||||||
p.refs.dec()
|
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
proc sendSubOpts*(p: PubSubPeer, topics: seq[string], subscribe: bool): Future[void] =
|
proc sendSubOpts*(p: PubSubPeer, topics: seq[string], subscribe: bool): Future[void] =
|
||||||
|
|
|
@ -39,7 +39,7 @@ proc newStandardSwitch*(privKey = none(PrivateKey),
|
||||||
msgIdProvider: MsgIdProvider = defaultMsgIdProvider,
|
msgIdProvider: MsgIdProvider = defaultMsgIdProvider,
|
||||||
rng = newRng(),
|
rng = newRng(),
|
||||||
inTimeout: Duration = 1.minutes,
|
inTimeout: Duration = 1.minutes,
|
||||||
outTimeout: Duration = 5.minutes): Switch =
|
outTimeout: Duration = 1.minutes): Switch =
|
||||||
proc createMplex(conn: Connection): Muxer =
|
proc createMplex(conn: Connection): Muxer =
|
||||||
Mplex.init(
|
Mplex.init(
|
||||||
conn,
|
conn,
|
||||||
|
|
|
@ -211,7 +211,7 @@ suite "Mplex":
|
||||||
check:
|
check:
|
||||||
waitFor(testResetWrite()) == true
|
waitFor(testResetWrite()) == true
|
||||||
|
|
||||||
test "timeout, channel should reset":
|
test "reset - channel should reset on timeout":
|
||||||
proc testResetWrite(): Future[bool] {.async.} =
|
proc testResetWrite(): Future[bool] {.async.} =
|
||||||
proc writeHandler(data: seq[byte]) {.async, gcsafe.} = discard
|
proc writeHandler(data: seq[byte]) {.async, gcsafe.} = discard
|
||||||
let
|
let
|
||||||
|
|
Loading…
Reference in New Issue