Support scraping from HTTP trackers

This commit is contained in:
Matt Joiner 2023-09-26 22:19:51 +10:00
parent cff4595d06
commit 11833b45cf
No known key found for this signature in database
GPG Key ID: 6B990B8185E7F782
5 changed files with 71 additions and 21 deletions

View File

@ -3,12 +3,11 @@ package main
import (
"context"
"fmt"
"net/url"
"github.com/davecgh/go-spew/spew"
"github.com/anacrolix/torrent"
"github.com/anacrolix/torrent/tracker/udp"
"github.com/anacrolix/torrent/tracker"
)
type scrapeCfg struct {
@ -17,25 +16,13 @@ type scrapeCfg struct {
}
func scrape(flags scrapeCfg) error {
trackerUrl, err := url.Parse(flags.Tracker)
cc, err := tracker.NewClient(flags.Tracker, tracker.NewClientOpts{})
if err != nil {
return fmt.Errorf("parsing tracker url: %w", err)
}
cc, err := udp.NewConnClient(udp.NewConnClientOpts{
Network: trackerUrl.Scheme,
Host: trackerUrl.Host,
//Ipv6: nil,
//Logger: log.Logger{},
})
if err != nil {
return fmt.Errorf("creating new udp tracker conn client: %w", err)
err = fmt.Errorf("creating new tracker client: %w", err)
return err
}
defer cc.Close()
var ihs []udp.InfoHash
for _, ih := range flags.InfoHashes {
ihs = append(ihs, ih)
}
scrapeOut, err := cc.Client.Scrape(context.TODO(), ihs)
scrapeOut, err := cc.Scrape(context.TODO(), flags.InfoHashes)
if err != nil {
return fmt.Errorf("scraping: %w", err)
}

View File

@ -9,10 +9,12 @@ import (
trHttp "github.com/anacrolix/torrent/tracker/http"
"github.com/anacrolix/torrent/tracker/udp"
"github.com/anacrolix/torrent/types/infohash"
)
type Client interface {
Announce(context.Context, AnnounceRequest, AnnounceOpt) (AnnounceResponse, error)
Scrape(ctx context.Context, ihs []infohash.T) (out udp.ScrapeResponse, err error)
Close() error
}

47
tracker/http/scrape.go Normal file
View File

@ -0,0 +1,47 @@
package httpTracker
import (
"context"
"log"
"net/http"
"net/url"
"github.com/anacrolix/torrent/bencode"
"github.com/anacrolix/torrent/tracker/udp"
"github.com/anacrolix/torrent/types/infohash"
)
type scrapeResponse struct {
Files files `bencode:"files"`
}
// Bencode should support bencode.Unmarshalers from a string in the dict key position.
type files = map[string]udp.ScrapeInfohashResult
func (cl Client) Scrape(ctx context.Context, ihs []infohash.T) (out udp.ScrapeResponse, err error) {
_url := cl.url_.JoinPath("..", "scrape")
query, err := url.ParseQuery(_url.RawQuery)
if err != nil {
return
}
for _, ih := range ihs {
query.Add("info_hash", ih.AsString())
}
_url.RawQuery = query.Encode()
log.Printf("%q", _url.String())
req, err := http.NewRequestWithContext(ctx, http.MethodGet, _url.String(), nil)
if err != nil {
return
}
resp, err := cl.hc.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
var decodedResp scrapeResponse
err = bencode.NewDecoder(resp.Body).Decode(&decodedResp)
for _, ih := range ihs {
out = append(out, decodedResp.Files[ih.AsString()])
}
return
}

View File

@ -4,8 +4,11 @@ import (
"context"
"encoding/binary"
"github.com/anacrolix/generics"
trHttp "github.com/anacrolix/torrent/tracker/http"
"github.com/anacrolix/torrent/tracker/udp"
"github.com/anacrolix/torrent/types/infohash"
)
type udpClient struct {
@ -13,6 +16,15 @@ type udpClient struct {
requestUri string
}
func (c *udpClient) Scrape(ctx context.Context, ihs []infohash.T) (out udp.ScrapeResponse, err error) {
return c.cl.Client.Scrape(
ctx,
generics.SliceMap(ihs, func(from infohash.T) udp.InfoHash {
return from
}),
)
}
func (c *udpClient) Close() error {
return c.cl.Close()
}

View File

@ -5,7 +5,9 @@ type ScrapeRequest []InfoHash
type ScrapeResponse []ScrapeInfohashResult
type ScrapeInfohashResult struct {
Seeders int32
Completed int32
Leechers int32
// I'm not sure why the fields are named differently for HTTP scrapes.
// https://www.bittorrent.org/beps/bep_0048.html
Seeders int32 `bencode:"complete"`
Completed int32 `bencode:"downloaded"`
Leechers int32 `bencode:"incomplete"`
}