From af0955c58b06998b73540195363e6833b1382b42 Mon Sep 17 00:00:00 2001
From: Zahary Karadjov <zahary@gmail.com>
Date: Tue, 18 Aug 2020 13:51:27 +0300
Subject: [PATCH] Add comments explaning a possible deadlock

---
 libp2p/protocols/pubsub/pubsubpeer.nim | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/libp2p/protocols/pubsub/pubsubpeer.nim b/libp2p/protocols/pubsub/pubsubpeer.nim
index 7c084bc..15004d7 100644
--- a/libp2p/protocols/pubsub/pubsubpeer.nim
+++ b/libp2p/protocols/pubsub/pubsubpeer.nim
@@ -147,7 +147,19 @@ proc getSendConn(p: PubSubPeer): Future[Connection] {.async.} =
     # and later close one of them, other implementations such as rust-libp2p
     # become deaf to our messages (potentially due to the clean-up associated
     # with closing connections). To prevent this, we use a lock that ensures
-    # that only a single dial will be performed for each peer:
+    # that only a single dial will be performed for each peer.
+    #
+    # Nevertheless, this approach is still quite problematic because the gossip
+    # sends and their respective dials may be started from the mplex read loop.
+    # This may cause the read loop to get stuck which ultimately results in a
+    # deadlock when the other side tries to send us any other message that must
+    # be routed through mplex (it will be stuck on `pushTo`). Such messages
+    # naturally arise in the process of dialing itself.
+    #
+    # See https://github.com/status-im/nim-libp2p/issues/337
+    #
+    # One possible long-term solution is to avoid "blocking" the mplex read
+    # loop by making the gossip send non-blocking through the use of a queue.
     await p.dialLock.acquire()
 
     # Another concurrent dial may have populated p.sendConn