torrent/metainfo/builder.go

603 lines
14 KiB
Go

package metainfo
import (
"crypto/sha1"
"errors"
"hash"
"io"
"os"
"path/filepath"
"sort"
"time"
"github.com/anacrolix/torrent/bencode"
)
//----------------------------------------------------------------------------
// Build
//----------------------------------------------------------------------------
// The Builder type is responsible for .torrent files construction. Just
// instantiate it, call necessary methods and then call the .Build method. While
// waiting for completion you can use 'status' channel to get status reports.
type Builder struct {
batch_state
filesmap map[string]bool
}
// Adds a file to the builder queue. You may add one or more files.
func (b *Builder) AddFile(filename string) {
if b.filesmap == nil {
b.filesmap = make(map[string]bool)
}
filename, err := filepath.Abs(filename)
if err != nil {
panic(err)
}
b.filesmap[filename] = true
}
// Defines a name of the future torrent file. For single file torrents it's the
// recommended name of the contained file. For multiple files torrents it's the
// recommended name of the directory in which all of them will be
// stored. Calling this function is not required. In case if no name was
// specified, the builder will try to automatically assign it. It will use the
// name of the file if there is only one file in the queue or it will try to
// find the rightmost common directory of all the queued files and use its name as
// a torrent name. In case if name cannot be assigned automatically, it will use
// "unknown" as a torrent name.
func (b *Builder) SetName(name string) {
b.name = name
}
// Sets the length of a piece in the torrent file in bytes. The default is
// 256kb.
func (b *Builder) SetPieceLength(length int64) {
b.piece_length = length
}
// Sets the "private" flag. The default is false.
func (b *Builder) SetPrivate(v bool) {
b.private = v
}
// Add announce URL group. TODO: better explanation.
func (b *Builder) AddAnnounceGroup(group []string) {
b.announce_list = append(b.announce_list, group)
}
// Add DHT nodes URLs for trackerless mode
func (b *Builder) AddDhtNodes(group []string) {
b.node_list = append(b.node_list, group)
}
// Sets creation date. The default is time.Now() when the .Build method was
// called.
func (b *Builder) SetCreationDate(date time.Time) {
b.creation_date = date
}
// Sets the comment. The default is no comment.
func (b *Builder) SetComment(comment string) {
b.comment = comment
}
// Sets the "created by" parameter. The default is "libtorgo".
func (b *Builder) SetCreatedBy(createdby string) {
b.created_by = createdby
}
// Sets the "encoding" parameter. The default is "UTF-8".
func (b *Builder) SetEncoding(encoding string) {
b.encoding = encoding
}
// Add WebSeed URL to the list.
func (b *Builder) AddWebSeedURL(url string) {
b.urls = append(b.urls, url)
}
// Finalizes the Builder state and makes a Batch out of it. After calling that
// method, Builder becomes empty and you can use it to create another Batch if
// you will.
func (b *Builder) Submit() (*Batch, error) {
err := b.check_parameters()
if err != nil {
return nil, err
}
b.set_defaults()
batch := &Batch{
batch_state: b.batch_state,
}
const non_regular = os.ModeDir | os.ModeSymlink |
os.ModeDevice | os.ModeNamedPipe | os.ModeSocket
// convert a map to a slice, calculate sizes and split paths
batch.total_size = 0
batch.files = make([]file, 0, 10)
for f, _ := range b.filesmap {
var file file
fi, err := os.Stat(f)
if err != nil {
return nil, err
}
if fi.Mode()&non_regular != 0 {
return nil, errors.New(f + " is not a regular file")
}
file.abspath = f
file.splitpath = split_path(f)
file.size = fi.Size()
batch.files = append(batch.files, file)
batch.total_size += file.size
}
// find the rightmost common directory
if len(batch.files) == 1 {
sp := batch.files[0].splitpath
batch.default_name = sp[len(sp)-1]
} else {
common := batch.files[0].splitpath
for _, f := range batch.files {
if len(common) > len(f.splitpath) {
common = common[:len(f.splitpath)]
}
for i, n := 0, len(common); i < n; i++ {
if common[i] != f.splitpath[i] {
common = common[:i]
break
}
}
if len(common) == 0 {
break
}
}
if len(common) == 0 {
return nil, errors.New("no common rightmost folder was found for a set of queued files")
}
// found the common folder, let's strip that part from splitpath
// and setup the default name
batch.default_name = common[len(common)-1]
lcommon := len(common)
for i := range batch.files {
f := &batch.files[i]
f.splitpath = f.splitpath[lcommon:]
}
// and finally sort the files
sort.Sort(file_slice(batch.files))
}
// reset the builder state
b.batch_state = batch_state{}
b.filesmap = nil
return batch, nil
}
func (b *Builder) set_defaults() {
if b.piece_length == 0 {
b.piece_length = 256 * 1024
}
if b.creation_date.IsZero() {
b.creation_date = time.Now()
}
if b.created_by == "" {
b.created_by = "libtorgo"
}
if b.encoding == "" {
b.encoding = "UTF-8"
}
}
func (b *Builder) check_parameters() error {
// should be at least one file
if len(b.filesmap) == 0 {
return errors.New("no files were queued")
}
// let's clean up the announce_list and node_list
b.announce_list = cleanUpLists(b.announce_list)
b.node_list = cleanUpLists(b.node_list)
if len(b.announce_list) == 0 && len(b.node_list) == 0 {
return errors.New("no announce group or DHT nodes specified")
}
// Either the node_list or announce_list can be present
// Never the both!
if len(b.announce_list) > 0 && len(b.node_list) > 0 {
return errors.New("announce group and nodes are mutually exclusive")
}
// and clean up the urls
b.urls = remove_empty_strings(b.urls)
return nil
}
func cleanUpLists(list [][]string) [][]string {
newList := make([][]string, 0, len(list))
for _, l := range list {
l = remove_empty_strings(l)
// discard empty announce groups
if len(l) == 0 {
continue
}
newList = append(newList, l)
}
return newList
}
//----------------------------------------------------------------------------
// Batch
//----------------------------------------------------------------------------
// Batch represents a snapshot of a builder state, ready for transforming it
// into a torrent file. Note that Batch contains two accessor methods you might
// be interested in. The TotalSize is the total size of all the files queued for
// hashing, you will use it for status reporting. The DefaultName is an
// automatically determined name of the torrent metainfo, you might want to use
// it for naming the .torrent file itself.
type Batch struct {
batch_state
files []file
total_size int64
default_name string
}
// Get a total size of all the files queued for hashing. Useful in conjunction
// with status reports.
func (b *Batch) TotalSize() int64 {
return b.total_size
}
// Get an automatically determined name of the future torrent metainfo. You can
// use it for a .torrent file in case user hasn't provided it specifically.
func (b *Batch) DefaultName() string {
return b.default_name
}
// Starts a process of building the torrent file. This function does everything
// in a separate goroutine and uses up to 'nworkers' of goroutines to perform
// SHA1 hashing. Therefore it will return almost immedately. It returns two
// channels, the first one is for completion awaiting, the second one is for
// getting status reports. Status report is a number of bytes hashed, you can
// get the total amount of bytes by inspecting the Batch.TotalSize method return
// value.
func (b *Batch) Start(w io.Writer, nworkers int) (<-chan error, <-chan int64) {
if nworkers <= 0 {
nworkers = 1
}
completion := make(chan error)
status := make(chan int64)
go func() {
// prepare workers
workers := make([]*worker, nworkers)
free_workers := make(chan *worker, nworkers)
for i := 0; i < nworkers; i++ {
workers[i] = new_worker(free_workers)
}
stop_workers := func() {
for _, w := range workers {
w.stop()
}
for _, w := range workers {
w.wait_for_stop()
}
}
// prepare files for reading
fr := files_reader{files: b.files}
npieces := b.total_size/b.piece_length + 1
b.pieces = make([]byte, 20*npieces)
hashed := int64(0)
// read all the pieces passing them to workers for hashing
var data []byte
for i := int64(0); i < npieces; i++ {
if data == nil {
data = make([]byte, b.piece_length)
}
nr, err := fr.Read(data)
if err != nil {
// EOF is not an eror if it was the last piece
if err == io.EOF {
if i != npieces-1 {
stop_workers()
completion <- err
return
}
} else {
stop_workers()
completion <- err
return
}
}
// cut the data slice to the amount of actual data read
data = data[:nr]
w := <-free_workers
data = w.queue(data, b.pieces[20*i:20*i+20])
// update and try to send the status report
if data != nil {
hashed += int64(len(data))
data = data[:cap(data)]
select {
case status <- hashed:
default:
}
}
}
stop_workers()
// at this point the hash was calculated and we're ready to
// write the torrent file
err := b.write_torrent(w)
if err != nil {
completion <- err
return
}
completion <- nil
}()
return completion, status
}
func (b *Batch) write_torrent(w io.Writer) error {
var td MetaInfo
// Either announce or node lists are allowed - not both
if len(b.announce_list) != 0 {
td.Announce = b.announce_list[0][0]
if len(b.announce_list) != 1 || len(b.announce_list[0]) != 1 {
td.AnnounceList = b.announce_list
}
}
if len(b.node_list) != 0 {
td.Nodes = b.node_list
}
td.CreationDate = b.creation_date.Unix()
td.Comment = b.comment
td.CreatedBy = b.created_by
td.Encoding = b.encoding
switch {
case len(b.urls) == 0:
case len(b.urls) == 1:
td.URLList = b.urls[0]
default:
td.URLList = b.urls
}
td.Info.PieceLength = b.piece_length
td.Info.Pieces = b.pieces
if b.name == "" {
td.Info.Name = b.default_name
} else {
td.Info.Name = b.name
}
if len(b.files) == 1 {
td.Info.Length = b.files[0].size
} else {
td.Info.Files = make([]FileInfo, len(b.files))
for i, f := range b.files {
td.Info.Files[i] = FileInfo{
Path: f.splitpath,
Length: f.size,
}
}
}
td.Info.Private = b.private
e := bencode.NewEncoder(w)
return e.Encode(&td)
}
//----------------------------------------------------------------------------
// misc stuff
//----------------------------------------------------------------------------
// splits path into components (dirs and files), works only on absolute paths
func split_path(path string) []string {
var dir, file string
s := make([]string, 0, 5)
dir = path
for {
dir, file = filepath.Split(filepath.Clean(dir))
if file == "" {
break
}
s = append(s, file)
}
// reverse the slice
for i, n := 0, len(s)/2; i < n; i++ {
i2 := len(s) - i - 1
s[i], s[i2] = s[i2], s[i]
}
return s
}
// just a common data between the Builder and the Batch
type batch_state struct {
name string
piece_length int64
pieces []byte
private bool
announce_list [][]string
node_list [][]string
creation_date time.Time
comment string
created_by string
encoding string
urls []string
}
type file struct {
abspath string
splitpath []string
size int64
}
type file_slice []file
func (s file_slice) Len() int { return len(s) }
func (s file_slice) Less(i, j int) bool { return s[i].abspath < s[j].abspath }
func (s file_slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func remove_empty_strings(slice []string) []string {
j := 0
for i, n := 0, len(slice); i < n; i++ {
if slice[i] == "" {
continue
}
slice[j] = slice[i]
j++
}
return slice[:j]
}
//----------------------------------------------------------------------------
// worker
//----------------------------------------------------------------------------
type worker struct {
msgbox chan bool
hash hash.Hash
// request
sha1 []byte
data []byte
}
// returns existing 'data'
func (w *worker) queue(data, sha1 []byte) []byte {
d := w.data
w.data = data
w.sha1 = sha1
w.msgbox <- false
return d
}
func (w *worker) stop() {
w.msgbox <- true
}
func (w *worker) wait_for_stop() {
<-w.msgbox
}
func new_worker(out chan<- *worker) *worker {
w := &worker{
msgbox: make(chan bool),
hash: sha1.New(),
}
go func() {
var sha1 [20]byte
for {
if <-w.msgbox {
w.msgbox <- true
return
}
w.hash.Reset()
w.hash.Write(w.data)
w.hash.Sum(sha1[:0])
copy(w.sha1, sha1[:])
out <- w
}
}()
out <- w
return w
}
//----------------------------------------------------------------------------
// files_reader
//----------------------------------------------------------------------------
type files_reader struct {
files []file
cur int
curfile *os.File
off int64
}
func (f *files_reader) Read(data []byte) (int, error) {
if f.cur >= len(f.files) {
return 0, io.EOF
}
if len(data) == 0 {
return 0, nil
}
read := 0
for len(data) > 0 {
file := &f.files[f.cur]
if f.curfile == nil {
var err error
f.curfile, err = os.Open(file.abspath)
if err != nil {
return read, err
}
}
// we need to read up to 'len(data)' bytes from current file
n := int64(len(data))
// unless there is not enough data in this file
if file.size-f.off < n {
n = file.size - f.off
}
// if there is no data in this file, try next one
if n == 0 {
err := f.curfile.Close()
if err != nil {
return read, err
}
f.curfile = nil
f.off = 0
f.cur++
if f.cur >= len(f.files) {
return read, io.EOF
}
continue
}
// read, handle errors
nr, err := f.curfile.Read(data[:n])
read += nr
f.off += int64(nr)
if err != nil {
return read, err
}
// ok, we've read nr bytes out of len(data), cut the data slice
data = data[nr:]
}
return read, nil
}