560 lines
15 KiB
Go
Raw Normal View History

package discover
import (
"bytes"
"crypto/ecdsa"
"errors"
"fmt"
"net"
"time"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/logger"
2015-04-07 14:57:04 +02:00
"github.com/ethereum/go-ethereum/logger/glog"
"github.com/ethereum/go-ethereum/p2p/nat"
"github.com/ethereum/go-ethereum/rlp"
)
2015-04-27 13:42:30 +02:00
const Version = 4
// Errors
var (
errPacketTooSmall = errors.New("too small")
errBadHash = errors.New("bad hash")
errExpired = errors.New("expired")
errBadVersion = errors.New("version mismatch")
errUnsolicitedReply = errors.New("unsolicited reply")
errUnknownNode = errors.New("unknown node")
errTimeout = errors.New("RPC timeout")
errClosed = errors.New("socket closed")
)
// Timeouts
const (
respTimeout = 500 * time.Millisecond
sendTimeout = 500 * time.Millisecond
2015-02-12 11:59:52 +01:00
expiration = 20 * time.Second
refreshInterval = 1 * time.Hour
)
// RPC packet types
const (
pingPacket = iota + 1 // zero is 'reserved'
pongPacket
findnodePacket
neighborsPacket
)
// RPC request structures
type (
ping struct {
Version uint
From, To rpcEndpoint
Expiration uint64
}
// pong is the reply to ping.
pong struct {
// This field should mirror the UDP envelope address
// of the ping packet, which provides a way to discover the
// the external address (after NAT).
To rpcEndpoint
ReplyTok []byte // This contains the hash of the ping packet.
Expiration uint64 // Absolute timestamp at which the packet becomes invalid.
}
// findnode is a query for nodes close to the given target.
findnode struct {
Target NodeID // doesn't need to be an actual public key
Expiration uint64
}
// reply to findnode
neighbors struct {
2015-04-23 12:11:21 +02:00
Nodes []rpcNode
Expiration uint64
}
2015-04-23 12:11:21 +02:00
rpcNode struct {
IP net.IP // len 4 for IPv4 or 16 for IPv6
UDP uint16 // for discovery protocol
TCP uint16 // for RLPx protocol
ID NodeID
}
rpcEndpoint struct {
IP net.IP // len 4 for IPv4 or 16 for IPv6
UDP uint16 // for discovery protocol
TCP uint16 // for RLPx protocol
}
)
func makeEndpoint(addr *net.UDPAddr, tcpPort uint16) rpcEndpoint {
ip := addr.IP.To4()
if ip == nil {
ip = addr.IP.To16()
}
return rpcEndpoint{IP: ip, UDP: uint16(addr.Port), TCP: tcpPort}
}
2015-04-23 12:11:21 +02:00
func nodeFromRPC(rn rpcNode) (n *Node, valid bool) {
// TODO: don't accept localhost, LAN addresses from internet hosts
2015-04-23 12:11:21 +02:00
// TODO: check public key is on secp256k1 curve
if rn.IP.IsMulticast() || rn.IP.IsUnspecified() || rn.UDP == 0 {
return nil, false
}
return newNode(rn.ID, rn.IP, rn.UDP, rn.TCP), true
}
func nodeToRPC(n *Node) rpcNode {
return rpcNode{ID: n.ID, IP: n.IP, UDP: n.UDP, TCP: n.TCP}
}
type packet interface {
handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error
}
type conn interface {
ReadFromUDP(b []byte) (n int, addr *net.UDPAddr, err error)
WriteToUDP(b []byte, addr *net.UDPAddr) (n int, err error)
Close() error
LocalAddr() net.Addr
}
// udp implements the RPC protocol.
type udp struct {
conn conn
priv *ecdsa.PrivateKey
ourEndpoint rpcEndpoint
addpending chan *pending
gotreply chan reply
closing chan struct{}
nat nat.Interface
*Table
}
// pending represents a pending reply.
//
// some implementations of the protocol wish to send more than one
// reply packet to findnode. in general, any neighbors packet cannot
// be matched up with a specific findnode packet.
//
// our implementation handles this by storing a callback function for
// each pending reply. incoming packets from a node are dispatched
// to all the callback functions for that node.
type pending struct {
// these fields must match in the reply.
from NodeID
ptype byte
// time when the request must complete
deadline time.Time
// callback is called when a matching reply arrives. if it returns
// true, the callback is removed from the pending reply queue.
// if it returns false, the reply is considered incomplete and
// the callback will be invoked again for the next matching reply.
callback func(resp interface{}) (done bool)
// errc receives nil when the callback indicates completion or an
// error if no further reply is received within the timeout.
errc chan<- error
}
type reply struct {
from NodeID
ptype byte
data interface{}
// loop indicates whether there was
// a matching request by sending on this channel.
matched chan<- bool
}
// ListenUDP returns a new table that listens for UDP packets on laddr.
func ListenUDP(priv *ecdsa.PrivateKey, laddr string, natm nat.Interface, nodeDBPath string) (*Table, error) {
addr, err := net.ResolveUDPAddr("udp", laddr)
if err != nil {
return nil, err
}
conn, err := net.ListenUDP("udp", addr)
if err != nil {
return nil, err
}
tab, _ := newUDP(priv, conn, natm, nodeDBPath)
2015-04-07 14:57:04 +02:00
glog.V(logger.Info).Infoln("Listening,", tab.self)
return tab, nil
}
func newUDP(priv *ecdsa.PrivateKey, c conn, natm nat.Interface, nodeDBPath string) (*Table, *udp) {
udp := &udp{
conn: c,
priv: priv,
closing: make(chan struct{}),
gotreply: make(chan reply),
addpending: make(chan *pending),
}
realaddr := c.LocalAddr().(*net.UDPAddr)
if natm != nil {
if !realaddr.IP.IsLoopback() {
go nat.Map(natm, udp.closing, "udp", realaddr.Port, realaddr.Port, "ethereum discovery")
}
// TODO: react to external IP changes over time.
if ext, err := natm.ExternalIP(); err == nil {
realaddr = &net.UDPAddr{IP: ext, Port: realaddr.Port}
}
}
// TODO: separate TCP port
udp.ourEndpoint = makeEndpoint(realaddr, uint16(realaddr.Port))
udp.Table = newTable(udp, PubkeyID(&priv.PublicKey), realaddr, nodeDBPath)
go udp.loop()
go udp.readLoop()
return udp.Table, udp
}
func (t *udp) close() {
close(t.closing)
t.conn.Close()
// TODO: wait for the loops to end.
}
// ping sends a ping message to the given node and waits for a reply.
func (t *udp) ping(toid NodeID, toaddr *net.UDPAddr) error {
// TODO: maybe check for ReplyTo field in callback to measure RTT
errc := t.pending(toid, pongPacket, func(interface{}) bool { return true })
t.send(toaddr, pingPacket, ping{
Version: Version,
From: t.ourEndpoint,
To: makeEndpoint(toaddr, 0), // TODO: maybe use known TCP port from DB
Expiration: uint64(time.Now().Add(expiration).Unix()),
})
return <-errc
}
func (t *udp) waitping(from NodeID) error {
return <-t.pending(from, pingPacket, func(interface{}) bool { return true })
}
// findnode sends a findnode request to the given node and waits until
// the node has sent up to k neighbors.
func (t *udp) findnode(toid NodeID, toaddr *net.UDPAddr, target NodeID) ([]*Node, error) {
nodes := make([]*Node, 0, bucketSize)
nreceived := 0
errc := t.pending(toid, neighborsPacket, func(r interface{}) bool {
reply := r.(*neighbors)
2015-04-23 12:11:21 +02:00
for _, rn := range reply.Nodes {
nreceived++
2015-04-23 12:11:21 +02:00
if n, valid := nodeFromRPC(rn); valid {
nodes = append(nodes, n)
}
}
2015-02-12 11:59:52 +01:00
return nreceived >= bucketSize
})
t.send(toaddr, findnodePacket, findnode{
Target: target,
Expiration: uint64(time.Now().Add(expiration).Unix()),
})
err := <-errc
return nodes, err
}
// pending adds a reply callback to the pending reply queue.
// see the documentation of type pending for a detailed explanation.
func (t *udp) pending(id NodeID, ptype byte, callback func(interface{}) bool) <-chan error {
ch := make(chan error, 1)
p := &pending{from: id, ptype: ptype, callback: callback, errc: ch}
select {
case t.addpending <- p:
// loop will handle it
case <-t.closing:
ch <- errClosed
}
return ch
}
func (t *udp) handleReply(from NodeID, ptype byte, req packet) bool {
matched := make(chan bool)
select {
case t.gotreply <- reply{from, ptype, req, matched}:
// loop will handle it
return <-matched
case <-t.closing:
return false
}
}
// loop runs in its own goroutin. it keeps track of
// the refresh timer and the pending reply queue.
func (t *udp) loop() {
var (
pending []*pending
nextDeadline time.Time
timeout = time.NewTimer(0)
refresh = time.NewTicker(refreshInterval)
)
<-timeout.C // ignore first timeout
defer refresh.Stop()
defer timeout.Stop()
rearmTimeout := func() {
now := time.Now()
if len(pending) == 0 || now.Before(nextDeadline) {
return
}
nextDeadline = pending[0].deadline
timeout.Reset(nextDeadline.Sub(now))
}
for {
select {
case <-refresh.C:
go t.refresh()
case <-t.closing:
for _, p := range pending {
p.errc <- errClosed
}
pending = nil
return
case p := <-t.addpending:
p.deadline = time.Now().Add(respTimeout)
pending = append(pending, p)
rearmTimeout()
case r := <-t.gotreply:
var matched bool
for i := 0; i < len(pending); i++ {
if p := pending[i]; p.from == r.from && p.ptype == r.ptype {
matched = true
if p.callback(r.data) {
// callback indicates the request is done, remove it.
p.errc <- nil
copy(pending[i:], pending[i+1:])
pending = pending[:len(pending)-1]
i--
}
}
}
r.matched <- matched
case now := <-timeout.C:
// notify and remove callbacks whose deadline is in the past.
i := 0
for ; i < len(pending) && now.After(pending[i].deadline); i++ {
pending[i].errc <- errTimeout
}
if i > 0 {
copy(pending, pending[i:])
pending = pending[:len(pending)-i]
}
rearmTimeout()
}
}
}
const (
macSize = 256 / 8
sigSize = 520 / 8
headSize = macSize + sigSize // space of packet frame data
)
var (
headSpace = make([]byte, headSize)
// Neighbors responses are sent across multiple packets to
// stay below the 1280 byte limit. We compute the maximum number
// of entries by stuffing a packet until it grows too large.
maxNeighbors int
)
func init() {
p := neighbors{Expiration: ^uint64(0)}
maxSizeNode := rpcNode{IP: make(net.IP, 16), UDP: ^uint16(0), TCP: ^uint16(0)}
for n := 0; ; n++ {
p.Nodes = append(p.Nodes, maxSizeNode)
size, _, err := rlp.EncodeToReader(p)
if err != nil {
// If this ever happens, it will be caught by the unit tests.
panic("cannot encode: " + err.Error())
}
if headSize+size+1 >= 1280 {
maxNeighbors = n
break
}
}
}
func (t *udp) send(toaddr *net.UDPAddr, ptype byte, req interface{}) error {
packet, err := encodePacket(t.priv, ptype, req)
if err != nil {
return err
}
2015-04-03 02:20:44 +02:00
glog.V(logger.Detail).Infof(">>> %v %T\n", toaddr, req)
if _, err = t.conn.WriteToUDP(packet, toaddr); err != nil {
2015-04-07 14:57:04 +02:00
glog.V(logger.Detail).Infoln("UDP send failed:", err)
}
return err
}
func encodePacket(priv *ecdsa.PrivateKey, ptype byte, req interface{}) ([]byte, error) {
b := new(bytes.Buffer)
b.Write(headSpace)
b.WriteByte(ptype)
if err := rlp.Encode(b, req); err != nil {
2015-04-07 14:57:04 +02:00
glog.V(logger.Error).Infoln("error encoding packet:", err)
return nil, err
}
packet := b.Bytes()
sig, err := crypto.Sign(crypto.Sha3(packet[headSize:]), priv)
if err != nil {
2015-04-07 14:57:04 +02:00
glog.V(logger.Error).Infoln("could not sign packet:", err)
return nil, err
}
copy(packet[macSize:], sig)
// add the hash to the front. Note: this doesn't protect the
// packet in any way. Our public key will be part of this hash in
// The future.
copy(packet, crypto.Sha3(packet[macSize:]))
return packet, nil
}
// readLoop runs in its own goroutine. it handles incoming UDP packets.
func (t *udp) readLoop() {
defer t.conn.Close()
// Discovery packets are defined to be no larger than 1280 bytes.
// Packets larger than this size will be cut at the end and treated
// as invalid because their hash won't match.
UDP Interop. Limit datagrams to 1280bytes. We don't have a UDP which specifies any messages that will be 4KB. Aside from being implemented for months and a necessity for encryption and piggy-backing packets, 1280bytes is ideal, and, means this TODO can be completed! Why 1280 bytes? * It's less than the default MTU for most WAN/LAN networks. That means fewer fragmented datagrams (esp on well-connected networks). * Fragmented datagrams and dropped packets suck and add latency while OS waits for a dropped fragment to never arrive (blocking readLoop()) * Most of our packets are < 1280 bytes. * 1280 bytes is minimum datagram size and MTU for IPv6 -- on IPv6, a datagram < 1280bytes will *never* be fragmented. UDP datagrams are dropped. A lot! And fragmented datagrams are worse. If a datagram has a 30% chance of being dropped, then a fragmented datagram has a 60% chance of being dropped. More importantly, we have signed packets and can't do anything with a packet unless we receive the entire datagram because the signature can't be verified. The same is true when we have encrypted packets. So the solution here to picking an ideal buffer size for receiving datagrams is a number under 1400bytes. And the lower-bound value for IPv6 of 1280 bytes make's it a non-decision. On IPv4 most ISPs and 3g/4g/let networks have an MTU just over 1400 -- and *never* over 1500. Never -- that means packets over 1500 (in reality: ~1450) bytes are fragmented. And probably dropped a lot. Just to prove the point, here are pings sending non-fragmented packets over wifi/ISP, and a second set of pings via cell-phone tethering. It's important to note that, if *any* router between my system and the EC2 node has a lower MTU, the message would not go through: On wifi w/normal ISP: localhost:Debug $ ping -D -s 1450 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1450 data bytes 1458 bytes from 52.6.250.242: icmp_seq=0 ttl=42 time=104.831 ms 1458 bytes from 52.6.250.242: icmp_seq=1 ttl=42 time=119.004 ms ^C --- 52.6.250.242 ping statistics --- 2 packets transmitted, 2 packets received, 0.0% packet loss round-trip min/avg/max/stddev = 104.831/111.918/119.004/7.087 ms localhost:Debug $ ping -D -s 1480 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1480 data bytes ping: sendto: Message too long ping: sendto: Message too long Request timeout for icmp_seq 0 ping: sendto: Message too long Request timeout for icmp_seq 1 Tethering to O2: localhost:Debug $ ping -D -s 1480 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1480 data bytes ping: sendto: Message too long ping: sendto: Message too long Request timeout for icmp_seq 0 ^C --- 52.6.250.242 ping statistics --- 2 packets transmitted, 0 packets received, 100.0% packet loss localhost:Debug $ ping -D -s 1450 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1450 data bytes 1458 bytes from 52.6.250.242: icmp_seq=0 ttl=42 time=107.844 ms 1458 bytes from 52.6.250.242: icmp_seq=1 ttl=42 time=105.127 ms 1458 bytes from 52.6.250.242: icmp_seq=2 ttl=42 time=120.483 ms 1458 bytes from 52.6.250.242: icmp_seq=3 ttl=42 time=102.136 ms
2015-05-13 19:03:00 +02:00
buf := make([]byte, 1280)
for {
nbytes, from, err := t.conn.ReadFromUDP(buf)
if err != nil {
return
}
t.handlePacket(from, buf[:nbytes])
}
}
func (t *udp) handlePacket(from *net.UDPAddr, buf []byte) error {
packet, fromID, hash, err := decodePacket(buf)
if err != nil {
glog.V(logger.Debug).Infof("Bad packet from %v: %v\n", from, err)
return err
}
status := "ok"
if err = packet.handle(t, from, fromID, hash); err != nil {
status = err.Error()
}
glog.V(logger.Detail).Infof("<<< %v %T: %s\n", from, packet, status)
return err
}
func decodePacket(buf []byte) (packet, NodeID, []byte, error) {
if len(buf) < headSize+1 {
return nil, NodeID{}, nil, errPacketTooSmall
}
hash, sig, sigdata := buf[:macSize], buf[macSize:headSize], buf[headSize:]
shouldhash := crypto.Sha3(buf[macSize:])
if !bytes.Equal(hash, shouldhash) {
return nil, NodeID{}, nil, errBadHash
}
fromID, err := recoverNodeID(crypto.Sha3(buf[headSize:]), sig)
if err != nil {
return nil, NodeID{}, hash, err
}
var req packet
switch ptype := sigdata[0]; ptype {
case pingPacket:
req = new(ping)
case pongPacket:
req = new(pong)
case findnodePacket:
req = new(findnode)
case neighborsPacket:
req = new(neighbors)
default:
return nil, fromID, hash, fmt.Errorf("unknown type: %d", ptype)
}
2015-04-14 12:02:23 +02:00
err = rlp.DecodeBytes(sigdata[1:], req)
return req, fromID, hash, err
}
func (req *ping) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
if expired(req.Expiration) {
return errExpired
}
if req.Version != Version {
return errBadVersion
}
t.send(from, pongPacket, pong{
To: makeEndpoint(from, req.From.TCP),
ReplyTok: mac,
Expiration: uint64(time.Now().Add(expiration).Unix()),
})
if !t.handleReply(fromID, pingPacket, req) {
// Note: we're ignoring the provided IP address right now
go t.bond(true, fromID, from, req.From.TCP)
}
return nil
}
func (req *pong) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
if expired(req.Expiration) {
return errExpired
}
if !t.handleReply(fromID, pongPacket, req) {
return errUnsolicitedReply
}
return nil
}
func (req *findnode) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
if expired(req.Expiration) {
return errExpired
}
if t.db.node(fromID) == nil {
// No bond exists, we don't process the packet. This prevents
// an attack vector where the discovery protocol could be used
// to amplify traffic in a DDOS attack. A malicious actor
// would send a findnode request with the IP address and UDP
// port of the target as the source address. The recipient of
// the findnode packet would then send a neighbors packet
// (which is a much bigger packet than findnode) to the victim.
return errUnknownNode
}
target := crypto.Sha3Hash(req.Target[:])
t.mutex.Lock()
closest := t.closest(target, bucketSize).entries
t.mutex.Unlock()
p := neighbors{Expiration: uint64(time.Now().Add(expiration).Unix())}
// Send neighbors in chunks with at most maxNeighbors per packet
// to stay below the 1280 byte limit.
2015-04-23 12:11:21 +02:00
for i, n := range closest {
p.Nodes = append(p.Nodes, nodeToRPC(n))
if len(p.Nodes) == maxNeighbors || i == len(closest)-1 {
t.send(from, neighborsPacket, p)
p.Nodes = p.Nodes[:0]
}
}
return nil
}
func (req *neighbors) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
if expired(req.Expiration) {
return errExpired
}
if !t.handleReply(fromID, neighborsPacket, req) {
return errUnsolicitedReply
}
return nil
}
func expired(ts uint64) bool {
return time.Unix(int64(ts), 0).Before(time.Now())
}