[NET-6459] Fix issue with wanfed lan ip conflicts. (#19503)

Fix issue with wanfed lan ip conflicts.

Prior to this commit, the connection pools were unaware which datacenter the
connection was associated with. This meant that any time servers with
overlapping LAN IP addresses and node shortnames existed, they would be
incorrectly co-located in the same pool. Whenever this occurred, the servers
would get stuck in an infinite loop of forwarding RPCs to themselves (rather
than the intended remote DC) until they eventually run out of memory.

Most notably, this issue can occur whenever wan federation through mesh
gateways is enabled.

This fix adds extra metadata to specify which DC the connection is associated
with in the pool.
This commit is contained in:
Derek Menteer 2023-11-06 08:47:12 -06:00 committed by GitHub
parent 395d32e5ad
commit 6baf695cd9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 2 deletions

3
.changelog/19503.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
wan-federation: Fix a bug where servers wan-federated through mesh-gateways could crash due to overlapping LAN IP addresses.
```

View File

@ -49,6 +49,7 @@ type Conn struct {
refCount int32 refCount int32
shouldClose int32 shouldClose int32
dc string
nodeName string nodeName string
addr net.Addr addr net.Addr
session muxSession session muxSession
@ -234,7 +235,7 @@ func (p *ConnPool) acquire(dc string, nodeName string, addr net.Addr) (*Conn, er
addrStr := addr.String() addrStr := addr.String()
poolKey := nodeName + ":" + addrStr poolKey := makePoolKey(dc, nodeName, addrStr)
// Check to see if there's a pooled connection available. This is up // Check to see if there's a pooled connection available. This is up
// here since it should the vastly more common case than the rest // here since it should the vastly more common case than the rest
@ -493,6 +494,7 @@ func (p *ConnPool) getNewConn(dc string, nodeName string, addr net.Addr) (*Conn,
// Wrap the connection // Wrap the connection
c := &Conn{ c := &Conn{
refCount: 1, refCount: 1,
dc: dc,
nodeName: nodeName, nodeName: nodeName,
addr: addr, addr: addr,
session: session, session: session,
@ -514,7 +516,7 @@ func (p *ConnPool) clearConn(conn *Conn) {
// Clear from the cache // Clear from the cache
addrStr := conn.addr.String() addrStr := conn.addr.String()
poolKey := conn.nodeName + ":" + addrStr poolKey := makePoolKey(conn.dc, conn.nodeName, addrStr)
p.Lock() p.Lock()
if c, ok := p.pool[poolKey]; ok && c == conn { if c, ok := p.pool[poolKey]; ok && c == conn {
delete(p.pool, poolKey) delete(p.pool, poolKey)
@ -716,3 +718,8 @@ func (p *ConnPool) reap() {
p.Unlock() p.Unlock()
} }
} }
// makePoolKey generates a unique key for grouping connections together into a pool.
func makePoolKey(dc, nodeName, addrStr string) string {
return dc + ":" + nodeName + ":" + addrStr
}