torrent/webseed/client.go

207 lines
5.3 KiB
Go

package webseed
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"log"
"net/http"
"strings"
"github.com/RoaringBitmap/roaring"
"github.com/anacrolix/torrent/common"
"github.com/anacrolix/torrent/metainfo"
"github.com/anacrolix/torrent/segments"
)
type RequestSpec = segments.Extent
type requestPartResult struct {
resp *http.Response
err error
}
type requestPart struct {
req *http.Request
e segments.Extent
result chan requestPartResult
start func()
// Wrap http response bodies for such things as download rate limiting.
responseBodyWrapper ResponseBodyWrapper
}
type Request struct {
cancel func()
Result chan RequestResult
}
func (r Request) Cancel() {
r.cancel()
}
type Client struct {
HttpClient *http.Client
Url string
fileIndex segments.Index
info *metainfo.Info
// The pieces we can request with the Url. We're more likely to ban/block at the file-level
// given that's how requests are mapped to webseeds, but the torrent.Client works at the piece
// level. We can map our file-level adjustments to the pieces here. This probably need to be
// private in the future, if Client ever starts removing pieces.
Pieces roaring.Bitmap
ResponseBodyWrapper ResponseBodyWrapper
PathEscaper PathEscaper
}
type ResponseBodyWrapper func(io.Reader) io.Reader
func (me *Client) SetInfo(info *metainfo.Info) {
if !strings.HasSuffix(me.Url, "/") && info.IsDir() {
// In my experience, this is a non-conforming webseed. For example the
// http://ia600500.us.archive.org/1/items URLs in archive.org torrents.
return
}
me.fileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles()))
me.info = info
me.Pieces.AddRange(0, uint64(info.NumPieces()))
}
type RequestResult struct {
Bytes []byte
Err error
}
func (ws *Client) NewRequest(r RequestSpec) Request {
ctx, cancel := context.WithCancel(context.Background())
var requestParts []requestPart
if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
req, err := newRequest(
ws.Url, i, ws.info, e.Start, e.Length,
ws.PathEscaper,
)
if err != nil {
panic(err)
}
req = req.WithContext(ctx)
part := requestPart{
req: req,
result: make(chan requestPartResult, 1),
e: e,
responseBodyWrapper: ws.ResponseBodyWrapper,
}
part.start = func() {
go func() {
resp, err := ws.HttpClient.Do(req)
part.result <- requestPartResult{
resp: resp,
err: err,
}
}()
}
requestParts = append(requestParts, part)
return true
}) {
panic("request out of file bounds")
}
req := Request{
cancel: cancel,
Result: make(chan RequestResult, 1),
}
go func() {
b, err := readRequestPartResponses(ctx, requestParts)
req.Result <- RequestResult{
Bytes: b,
Err: err,
}
}()
return req
}
type ErrBadResponse struct {
Msg string
Response *http.Response
}
func (me ErrBadResponse) Error() string {
return me.Msg
}
func recvPartResult(ctx context.Context, buf io.Writer, part requestPart) error {
result := <-part.result
// Make sure there's no further results coming, it should be a one-shot channel.
close(part.result)
if result.err != nil {
return result.err
}
defer result.resp.Body.Close()
var body io.Reader = result.resp.Body
if part.responseBodyWrapper != nil {
body = part.responseBodyWrapper(body)
}
// Prevent further accidental use
result.resp.Body = nil
if ctx.Err() != nil {
return ctx.Err()
}
switch result.resp.StatusCode {
case http.StatusPartialContent:
copied, err := io.Copy(buf, body)
if err != nil {
return err
}
if copied != part.e.Length {
return fmt.Errorf("got %v bytes, expected %v", copied, part.e.Length)
}
return nil
case http.StatusOK:
// This number is based on
// https://archive.org/download/BloodyPitOfHorror/BloodyPitOfHorror.asr.srt. It seems that
// archive.org might be using a webserver implementation that refuses to do partial
// responses to small files.
if part.e.Start < 48<<10 {
if part.e.Start != 0 {
log.Printf("resp status ok but requested range [url=%q, range=%q]",
part.req.URL,
part.req.Header.Get("Range"))
}
// Instead of discarding, we could try receiving all the chunks present in the response
// body. I don't know how one would handle multiple chunk requests resulting in an OK
// response for the same file. The request algorithm might be need to be smarter for
// that.
discarded, _ := io.CopyN(io.Discard, body, part.e.Start)
if discarded != 0 {
log.Printf("discarded %v bytes in webseed request response part", discarded)
}
_, err := io.CopyN(buf, body, part.e.Length)
return err
} else {
return ErrBadResponse{"resp status ok but requested range", result.resp}
}
case http.StatusServiceUnavailable:
return ErrTooFast
default:
return ErrBadResponse{
fmt.Sprintf("unhandled response status code (%v)", result.resp.StatusCode),
result.resp,
}
}
}
var ErrTooFast = errors.New("making requests too fast")
func readRequestPartResponses(ctx context.Context, parts []requestPart) (_ []byte, err error) {
var buf bytes.Buffer
for _, part := range parts {
part.start()
err = recvPartResult(ctx, &buf, part)
if err != nil {
err = fmt.Errorf("reading %q at %q: %w", part.req.URL, part.req.Header.Get("Range"), err)
break
}
}
return buf.Bytes(), err
}