Revert "Channel leaks (#413)" (#417)

This reverts commit 1de1d49223.
This commit is contained in:
Jacek Sieka 2020-11-01 21:49:25 +01:00 committed by GitHub
parent 9c1633bf87
commit 03639f1446
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 104 additions and 131 deletions

View File

@ -66,6 +66,12 @@ proc open*(s: LPChannel) {.async, gcsafe.} =
method closed*(s: LPChannel): bool =
s.closedLocal
proc closeUnderlying(s: LPChannel): Future[void] {.async.} =
## Channels may be closed for reading and writing in any order - we'll close
## the underlying bufferstream when both directions are closed
if s.closedLocal and s.isEof:
await procCall BufferStream(s).close()
proc reset*(s: LPChannel) {.async, gcsafe.} =
if s.isClosed:
trace "Already closed", s
@ -109,44 +115,30 @@ proc reset*(s: LPChannel) {.async, gcsafe.} =
trace "Channel reset", s
proc closeUnderlying*(s: LPChannel): Future[void] {.async.} =
## Channels may be closed for reading and writing in any order - we'll close
## the underlying bufferstream when both directions are closed
if s.closedLocal and s.isEof:
trace "Closing underlying", s, conn = s.conn
await procCall BufferStream(s).close()
method close*(s: LPChannel) {.async, gcsafe.} =
## Close channel for writing - a message will be sent to the other peer
## informing them that the channel is closed and that we're waiting for
## their acknowledgement.
##
if s.closedLocal:
trace "Already closed", s
return
s.closedLocal = true
try:
if s.closedLocal:
trace "Already closed", s
return
s.closedLocal = true
trace "Closing channel", s, conn = s.conn, len = s.len
trace "Closing channel", s, conn = s.conn, len = s.len
if s.isOpen:
try:
await s.conn.writeMsg(s.id, s.closeCode) # write close
except CancelledError as exc:
raise exc
except CatchableError as exc:
# It's harmless that close message cannot be sent - the connection is
# likely down already
trace "Cannot send close message", s, id = s.id
if s.isOpen:
trace "Sending close msg", s, conn = s.conn
await s.conn.writeMsg(s.id, s.closeCode).wait(10.seconds) # write close
trace "Closed channel", s, len = s.len
except CancelledError as exc:
trace "Cancelling close", s, conn = s.conn
# need to reset here otherwise the close sequence doesn't complete and
# the channel leaks since none of it's `onClose` events trigger
await s.reset()
raise exc
except CatchableError as exc:
# need to reset here otherwise the close sequence doesn't complete and
# the channel leaks since none of it's `onClose` events trigger
trace "Cannot send close message", exc = exc.msg, s, conn = s.conn
await s.reset()
finally:
await s.closeUnderlying() # maybe already eofed
await s.closeUnderlying() # maybe already eofed
trace "Closed channel", s, len = s.len
method initStream*(s: LPChannel) =
if s.objName.len == 0:

View File

@ -60,7 +60,7 @@ proc cleanupChann(m: Mplex, chann: LPChannel) {.async, inline.} =
try:
await chann.join()
m.channels[chann.initiator].del(chann.id)
trace "cleaned up channel", m, s = chann
trace "cleaned up channel", m, chann
when defined(libp2p_expensive_metrics):
libp2p_mplex_channels.set(
@ -68,7 +68,7 @@ proc cleanupChann(m: Mplex, chann: LPChannel) {.async, inline.} =
labelValues = [$chann.initiator, $m.connection.peerInfo.peerId])
except CatchableError as exc:
# This is top-level procedure which will work as separate task, so it
# do not need to propagate CancelledError, and no other exceptions should
# do not need to propogate CancelledError, and no other exceptions should
# happen here
warn "Error cleaning up mplex channel", m, chann, msg = exc.msg
@ -118,7 +118,7 @@ proc handleStream(m: Mplex, chann: LPChannel) {.async.} =
doAssert(chann.closed, "connection not closed by handler!")
except CatchableError as exc:
# This is top-level procedure which will work as separate task, so it
# do not need to propagate CancelledError.
# do not need to propogate CancelledError.
trace "Exception in mplex stream handler", m, chann, msg = exc.msg
await chann.reset()
@ -184,13 +184,11 @@ method handle*(m: Mplex) {.async, gcsafe.} =
except CancelledError:
# This procedure is spawned as task and it is not part of public API, so
# there no way for this procedure to be cancelled implicitly.
debug "Cancellation in mplex handler", m
except LPStreamClosedError as exc:
trace "Stream Closed", m, msg = exc.msg
debug "Unexpected cancellation in mplex handler", m
except LPStreamEOFError as exc:
trace "Stream EOF", m, msg = exc.msg
except CatchableError as exc:
warn "Exception in mplex read loop", m, msg = exc.msg
warn "Unexpected exception in mplex read loop", m, msg = exc.msg
finally:
await m.close()
trace "Stopped mplex handler", m

View File

@ -73,12 +73,12 @@ type
msgSeqno*: uint64
anonymize*: bool # if we omit fromPeer and seqno from RPC messages we send
method unsubscribePeer*(p: PubSub, peer: PeerID) {.base.} =
method unsubscribePeer*(p: PubSub, peerId: PeerID) {.base.} =
## handle peer disconnects
##
trace "Unsubscribing pubsub peer", peer
p.peers.del(peer)
trace "unsubscribing pubsub peer", peerId
p.peers.del(peerId)
libp2p_pubsub_peers.set(p.peers.len.int64)
@ -217,7 +217,6 @@ method subscribePeer*(p: PubSub, peer: PeerID) {.base.} =
## messages
##
trace "Subscribing peer", peer
let peer = p.getOrCreatePeer(peer, p.codecs)
peer.outbound = true # flag as outbound
@ -339,9 +338,9 @@ method validate*(p: PubSub, message: Message): Future[ValidationResult] {.async,
if res != ValidationResult.Accept:
result = res
break
case result
of ValidationResult.Accept:
of ValidationResult.Accept:
libp2p_pubsub_validation_success.inc()
of ValidationResult.Reject:
libp2p_pubsub_validation_failure.inc()
@ -390,7 +389,7 @@ proc init*[PubParams: object | bool](
switch.addPeerEventHandler(peerEventHandler, PeerEvent.Left)
pubsub.initPubSub()
return pubsub

View File

@ -42,8 +42,7 @@ proc init*(T: type SecureConn,
peerInfo: peerInfo,
observedAddr: observedAddr,
closeEvent: conn.closeEvent,
timeout: timeout,
dir: conn.dir)
timeout: timeout)
result.initStream()
method initStream*(s: SecureConn) =
@ -53,7 +52,6 @@ method initStream*(s: SecureConn) =
procCall Connection(s).initStream()
method close*(s: SecureConn) {.async.} =
trace "closing secure conn", s, dir = s.dir
if not(isNil(s.stream)):
await s.stream.close()
@ -76,6 +74,10 @@ proc handleConn*(s: Secure,
try:
await conn.join()
await sconn.close()
except CancelledError:
# This is top-level procedure which will work as separate task, so it
# do not need to propogate CancelledError.
discard
except CatchableError as exc:
trace "error cleaning up secure connection", err = exc.msg, sconn
@ -125,18 +127,18 @@ method readOnce*(s: SecureConn,
if not isNil(err):
if not (err of LPStreamEOFError):
warn "error while reading message from secure connection, closing.",
error=err.name,
message=err.msg,
warn "error while reading message from secure connection, closing.",
error=err.name,
message=err.msg,
connection=s
await s.close()
raise err
s.activity = true
if buf.len == 0:
raise newLPStreamIncompleteError()
s.buf.add(buf)
var p = cast[ptr UncheckedArray[byte]](pbytes)

View File

@ -7,6 +7,29 @@
## This file may not be copied, modified, or distributed except according to
## those terms.
## This module implements an asynchronous buffer stream
## which emulates physical async IO.
##
## The stream is based on the standard library's `Deque`,
## which is itself based on a ring buffer.
##
## It works by exposing a regular LPStream interface and
## a method ``pushTo`` to push data to the internal read
## buffer; as well as a handler that can be registered
## that gets triggered on every write to the stream. This
## allows using the buffered stream as a sort of proxy,
## which can be consumed as a regular LPStream but allows
## injecting data for reads and intercepting writes.
##
## Another notable feature is that the stream is fully
## ordered and asynchronous. Reads are queued up in order
## and are suspended when not enough data available. This
## allows preserving backpressure while maintaining full
## asynchrony. Both writing to the internal buffer with
## ``pushTo`` as well as reading with ``read*` methods,
## will suspend until either the amount of elements in the
## buffer goes below ``maxSize`` or more data becomes available.
import std/strformat
import stew/byteutils
import chronos, chronicles, metrics

View File

@ -32,23 +32,23 @@ method initStream*(s: ChronosStream) =
s.objName = "ChronosStream"
s.timeoutHandler = proc() {.async, gcsafe.} =
trace "Idle timeout expired, closing ChronosStream", s
trace "idle timeout expired, closing ChronosStream"
await s.close()
procCall Connection(s).initStream()
proc init*(C: type ChronosStream,
client: StreamTransport,
dir: Direction,
timeout = DefaultChronosStreamTimeout): ChronosStream =
result = C(client: client,
timeout: timeout,
dir: dir)
timeout: timeout)
result.initStream()
template withExceptions(body: untyped) =
try:
body
except CancelledError as exc:
raise exc
except TransportIncompleteError:
# for all intents and purposes this is an EOF
raise newLPStreamIncompleteError()

View File

@ -7,7 +7,7 @@
## This file may not be copied, modified, or distributed except according to
## those terms.
import std/[hashes, oids, strformat, sugar]
import std/[hashes, oids, strformat]
import chronicles, chronos, metrics
import lpstream,
../multiaddress,
@ -25,6 +25,9 @@ const
type
TimeoutHandler* = proc(): Future[void] {.gcsafe.}
Direction* {.pure.} = enum
None, In, Out
Connection* = ref object of LPStream
activity*: bool # reset every time data is sent or received
timeout*: Duration # channel timeout if no activity
@ -32,6 +35,7 @@ type
timeoutHandler*: TimeoutHandler # timeout handler
peerInfo*: PeerInfo
observedAddr*: Multiaddress
dir*: Direction
ConnectionTracker* = ref object of TrackerBase
opened*: uint64
@ -81,9 +85,7 @@ method initStream*(s: Connection) =
s.timerTaskFut = s.timeoutMonitor()
if isNil(s.timeoutHandler):
s.timeoutHandler = proc(): Future[void] =
trace "Idle timeout expired, closing connection", s
s.close()
s.timeoutHandler = proc(): Future[void] = s.close()
inc getConnectionTracker().opened
@ -102,7 +104,7 @@ func hash*(p: Connection): Hash =
cast[pointer](p).hash
proc timeoutMonitor(s: Connection) {.async, gcsafe.} =
## monitor the channel for inactivity
## monitor the channel for innactivity
##
## if the timeout was hit, it means that
## neither incoming nor outgoing activity
@ -123,10 +125,9 @@ proc timeoutMonitor(s: Connection) {.async, gcsafe.} =
break
# reset channel on inactivity timeout
# reset channel on innactivity timeout
trace "Connection timed out", s
if not(isNil(s.timeoutHandler)):
trace "Calling timeout handler", s
await s.timeoutHandler()
except CancelledError as exc:

View File

@ -15,8 +15,7 @@ import ../varint,
../peerinfo,
../multiaddress
declareGauge(libp2p_open_streams,
"open stream instances", labels = ["type", "dir"])
declareGauge(libp2p_open_streams, "open stream instances", labels = ["type"])
export oids
@ -24,16 +23,12 @@ logScope:
topics = "lpstream"
type
Direction* {.pure.} = enum
In, Out
LPStream* = ref object of RootObj
closeEvent*: AsyncEvent
isClosed*: bool
isEof*: bool
objName*: string
oid*: Oid
dir*: Direction
LPStreamError* = object of CatchableError
LPStreamIncompleteError* = object of LPStreamError
@ -91,8 +86,8 @@ method initStream*(s: LPStream) {.base.} =
s.closeEvent = newAsyncEvent()
s.oid = genOid()
libp2p_open_streams.inc(labelValues = [s.objName, $s.dir])
trace "Stream created", s, objName = s.objName, dir = $s.dir
libp2p_open_streams.inc(labelValues = [s.objName])
trace "Stream created", s, objName = s.objName
proc join*(s: LPStream): Future[void] =
s.closeEvent.wait()
@ -219,8 +214,8 @@ method closeImpl*(s: LPStream): Future[void] {.async, base.} =
## Implementation of close - called only once
trace "Closing stream", s, objName = s.objName
s.closeEvent.fire()
libp2p_open_streams.dec(labelValues = [s.objName, $s.dir])
trace "Closed stream", s, objName = s.objName, dir = $s.dir
libp2p_open_streams.dec(labelValues = [s.objName])
trace "Closed stream", s, objName = s.objName
method close*(s: LPStream): Future[void] {.base, async.} = # {.raises [Defect].}
## close the stream - this may block, but will not raise exceptions
@ -228,7 +223,6 @@ method close*(s: LPStream): Future[void] {.base, async.} = # {.raises [Defect].}
if s.isClosed:
trace "Already closed", s
return
s.isClosed = true # Set flag before performing virtual close
# An separate implementation method is used so that even when derived types

View File

@ -292,10 +292,10 @@ proc internalConnect(s: Switch,
let dialed = try:
await t.dial(a)
except CancelledError as exc:
trace "Dialing canceled", msg = exc.msg, peerId, address = $a
trace "Dialing canceled", msg = exc.msg, peerId
raise exc
except CatchableError as exc:
trace "Dialing failed", msg = exc.msg, peerId, address = $a
trace "Dialing failed", msg = exc.msg, peerId
libp2p_failed_dials.inc()
continue # Try the next address

View File

@ -62,57 +62,31 @@ proc setupTcpTransportTracker(): TcpTransportTracker =
proc connHandler*(t: TcpTransport,
client: StreamTransport,
initiator: bool): Connection =
debug "Handling tcp connection", address = $client.remoteAddress,
initiator = initiator,
clients = t.clients.len
let conn = Connection(
ChronosStream.init(
client,
dir = if initiator:
Direction.Out
else:
Direction.In))
trace "handling connection", address = $client.remoteAddress
let conn: Connection = Connection(ChronosStream.init(client))
conn.observedAddr = MultiAddress.init(client.remoteAddress).tryGet()
if not initiator:
if not isNil(t.handler):
t.handlers &= t.handler(conn)
proc cleanup() {.async.} =
try:
await client.join() or conn.join()
trace "Cleaning up client", addrs = $client.remoteAddress,
conn = $conn.oid
t.clients.keepItIf( it != client )
if not(isNil(conn) and not conn.closed()):
await client.join()
trace "cleaning up client", addrs = $client.remoteAddress, connoid = $conn.oid
if not(isNil(conn)):
await conn.close()
if not(isNil(client) and client.closed()):
await client.closeWait()
trace "Cleaned up client", addrs = $client.remoteAddress,
conn = $conn.oid
t.clients.keepItIf(it != client)
except CancelledError:
# This is top-level procedure which will work as separate task, so it
# do not need to propogate CancelledError.
trace "Unexpected cancellation in transport's cleanup"
except CatchableError as exc:
let useExc {.used.} = exc
debug "Error cleaning up client", errMsg = exc.msg, s = conn
trace "error cleaning up client", exc = exc.msg
t.clients.add(client)
# All the errors are handled inside `cleanup()` procedure.
asyncSpawn cleanup()
try:
conn.observedAddr = MultiAddress.init(client.remoteAddress).tryGet()
except CatchableError as exc:
trace "Unable to get remote address", exc = exc.msg
if not isNil(client):
client.close()
raise exc
return conn
result = conn
proc connCb(server: StreamServer,
client: StreamTransport) {.async, gcsafe.} =
@ -123,12 +97,10 @@ proc connCb(server: StreamServer,
# as it's added inside connHandler
discard t.connHandler(client, false)
except CancelledError as exc:
debug "Connection setup cancelled", exc = exc.msg
await client.closeWait()
raise exc
except CatchableError as exc:
debug "Connection setup failed", exc = exc.msg
await client.closeWait()
except CatchableError as err:
debug "Connection setup failed", err = err.msg
client.close()
proc init*(T: type TcpTransport, flags: set[ServerFlags] = {}): T =
result = T(flags: flags)
@ -197,16 +169,8 @@ method dial*(t: TcpTransport,
Future[Connection] {.async, gcsafe.} =
trace "dialing remote peer", address = $address
## dial a peer
var client: StreamTransport
try:
client = await connect(address)
except CatchableError as exc:
trace "Exception dialing peer", exc = exc.msg
if not(isNil(client)):
await client.closeWait()
raise exc
return t.connHandler(client, true)
let client: StreamTransport = await connect(address)
result = t.connHandler(client, true)
method handles*(t: TcpTransport, address: MultiAddress): bool {.gcsafe.} =
if procCall Transport(t).handles(address):