[Core] Support creating v2 torrents

Add support for v2 torrents in create_torrent, but keep the old
default of only adding the v1 metadata.

Unify the single-file and directory cases to avoid code
duplication.

V2 torrents require files to be piece-aligned. The same for
hybrid v1/v2 ones. To handle both cases of piece-aligned and
non-aligned files, always read the files in piece-aligned
chunks. Re-slice the buffer if needed (for v1-only multi-file
torrents).

Also, had to adapt to progress event. It now depends on the
number of bytes hashed rather than the number of pieces. To
avoid sending and excessive amount of event when handling a
directory with many small files, add a mechanism to limit
event period at 1 per piece_length.

Closes: https://github.com/deluge-torrent/deluge/pull/430
This commit is contained in:
Radu Carpa 2023-08-03 09:15:23 +02:00 committed by Calum Lind
parent 4088e13905
commit 1751d62df9
No known key found for this signature in database
GPG Key ID: 90597A687B836BA3
3 changed files with 328 additions and 87 deletions

View File

@ -21,7 +21,7 @@ from twisted.web.client import Agent, readBody
import deluge.common
import deluge.component as component
from deluge import path_chooser_common
from deluge import metafile, path_chooser_common
from deluge._libtorrent import LT_VERSION, lt
from deluge.configmanager import ConfigManager, get_config_dir
from deluge.core.alertmanager import AlertManager
@ -998,7 +998,11 @@ class Core(component.Component):
created_by=None,
trackers=None,
add_to_session=False,
torrent_format=metafile.TorrentFormat.V1,
):
if isinstance(torrent_format, str):
torrent_format = metafile.TorrentFormat(torrent_format)
log.debug('creating torrent..')
return threads.deferToThread(
self._create_torrent_thread,
@ -1012,6 +1016,7 @@ class Core(component.Component):
created_by=created_by,
trackers=trackers,
add_to_session=add_to_session,
torrent_format=torrent_format,
)
def _create_torrent_thread(
@ -1026,6 +1031,7 @@ class Core(component.Component):
created_by,
trackers,
add_to_session,
torrent_format,
):
from deluge import metafile
@ -1038,6 +1044,7 @@ class Core(component.Component):
private=private,
created_by=created_by,
trackers=trackers,
torrent_format=torrent_format,
)
write_file = False

View File

@ -10,10 +10,13 @@
# See LICENSE for more details.
#
import copy
import logging
import os.path
import time
from enum import Enum
from hashlib import sha1 as sha
from hashlib import sha256
import deluge.component as component
from deluge.bencode import bencode
@ -41,6 +44,35 @@ def dummy(*v):
pass
class TorrentFormat(str, Enum):
V1 = 'v1'
V2 = 'v2'
HYBRID = 'hybrid'
@classmethod
def _missing_(cls, value):
if not value:
return None
value = value.lower()
for member in cls:
if member.value == value:
return member
def to_lt_flag(self):
if self.value == 'v1':
return 64
if self.value == 'v2':
return 32
return 0
def includes_v1(self):
return self == self.__class__.V1 or self == self.__class__.HYBRID
def includes_v2(self):
return self == self.__class__.V2 or self == self.__class__.HYBRID
class RemoteFileProgress:
def __init__(self, session_id):
self.session_id = session_id
@ -65,6 +97,7 @@ def make_meta_file_content(
private=False,
created_by=None,
trackers=None,
torrent_format=TorrentFormat.V1,
):
data = {'creation date': int(gmtime())}
if url:
@ -80,10 +113,20 @@ def make_meta_file_content(
if session_id:
progress = RemoteFileProgress(session_id)
info = makeinfo(path, piece_length, progress, name, content_type, private)
info, piece_layers = makeinfo(
path,
piece_length,
progress,
name,
content_type,
private,
torrent_format,
)
# check_info(info)
data['info'] = info
if piece_layers is not None:
data['piece layers'] = piece_layers
if title:
data['title'] = title.encode('utf8')
if comment:
@ -170,101 +213,237 @@ def calcsize(path):
return total
def makeinfo(path, piece_length, progress, name=None, content_type=None, private=False):
# HEREDAVE. If path is directory, how do we assign content type?
path = os.path.abspath(path)
piece_count = 0
if os.path.isdir(path):
subs = sorted(subfiles(path))
pieces = []
sh = sha()
done = 0
fs = []
totalsize = 0.0
totalhashed = 0
for p, f in subs:
totalsize += os.path.getsize(f)
if totalsize >= piece_length:
def _next_pow2(num):
import math
num_pieces = math.ceil(totalsize / piece_length)
else:
num_pieces = 1
if not num:
return 1
return 2 ** math.ceil(math.log2(num))
for p, f in subs:
pos = 0
size = os.path.getsize(f)
p2 = [n.encode('utf8') for n in p]
if content_type:
fs.append(
{'length': size, 'path': p2, 'content_type': content_type}
) # HEREDAVE. bad for batch!
else:
fs.append({'length': size, 'path': p2})
with open(f, 'rb') as file_:
while pos < size:
a = min(size - pos, piece_length - done)
sh.update(file_.read(a))
done += a
pos += a
totalhashed += a
if done == piece_length:
pieces.append(sh.digest())
piece_count += 1
done = 0
sh = sha()
progress(piece_count, num_pieces)
if done > 0:
pieces.append(sh.digest())
piece_count += 1
progress(piece_count, num_pieces)
def _sha256_merkle_root(leafs, nb_leafs, padding, in_place=True) -> bytes:
"""
Build the root of the merkle hash tree from the (possibly incomplete) leafs layer.
If len(leafs) < nb_leafs, it will be padded with the padding repeated as many times
as needed to have nb_leafs in total.
"""
if not in_place:
leafs = copy.copy(leafs)
while nb_leafs > 1:
nb_leafs = nb_leafs // 2
for i in range(nb_leafs):
node1 = leafs[2 * i] if 2 * i < len(leafs) else padding
node2 = leafs[2 * i + 1] if 2 * i + 1 < len(leafs) else padding
h = sha256(node1)
h.update(node2)
if i < len(leafs):
leafs[i] = h.digest()
else:
leafs.append(h.digest())
return leafs[0] if leafs else padding
def _sha256_buffer_blocks(buffer, block_len):
import math
nb_blocks = math.ceil(len(buffer) / block_len)
blocks = [
sha256(buffer[i * block_len : (i + 1) * block_len]).digest()
for i in range(nb_blocks)
]
return blocks
def makeinfo_lt(
path, piece_length, name=None, private=False, torrent_format=TorrentFormat.V1
):
"""
Make info using via the libtorrent library.
"""
from deluge._libtorrent import lt
if not name:
name = os.path.split(path)[1]
return {
'pieces': b''.join(pieces),
'piece length': piece_length,
'files': fs,
'name': name.encode('utf8'),
'private': private,
}
fs = lt.file_storage()
if os.path.isfile(path):
lt.add_files(fs, path)
else:
size = os.path.getsize(path)
if size >= piece_length:
num_pieces = size // piece_length
else:
num_pieces = 1
for p, f in subfiles(path):
fs.add_file(os.path.join(name, *p), os.path.getsize(f))
torrent = lt.create_torrent(
fs, piece_size=piece_length, flags=torrent_format.to_lt_flag()
)
lt.set_piece_hashes(torrent, os.path.dirname(path))
torrent.set_priv(private)
t = torrent.generate()
info = t[b'info']
pieces_layers = t.get(b'piece layers', None)
return info, pieces_layers
def makeinfo(
path,
piece_length,
progress,
name=None,
content_type=None,
private=False,
torrent_format=TorrentFormat.V1,
):
# HEREDAVE. If path is directory, how do we assign content type?
v2_block_len = 2**14 # 16 KiB
v2_blocks_per_piece = 1
v2_block_padding = b''
v2_piece_padding = b''
if torrent_format.includes_v2():
if _next_pow2(piece_length) != piece_length or piece_length < v2_block_len:
raise ValueError(
'Bittorrent v2 piece size must be a power of 2; and bigger than 16 KiB'
)
v2_blocks_per_piece = piece_length // v2_block_len
v2_block_padding = bytes(32) # 32 = size of sha256 in bytes
v2_piece_padding = _sha256_merkle_root(
[], nb_leafs=v2_blocks_per_piece, padding=v2_block_padding
)
path = os.path.abspath(path)
files = []
pieces = []
p = 0
with open(path, 'rb') as _file:
while p < size:
x = _file.read(min(piece_length, size - p))
pieces.append(sha(x).digest())
piece_count += 1
p += piece_length
if p > size:
p = size
progress(piece_count, num_pieces)
name = os.path.split(path)[1].encode('utf8')
if content_type is not None:
return {
'pieces': b''.join(pieces),
'piece length': piece_length,
'length': size,
'name': name,
'content_type': content_type,
'private': private,
file_tree = {}
piece_layers = {}
if os.path.isdir(path):
if not name:
name = os.path.split(path)[1]
subs = subfiles(path)
if torrent_format.includes_v2():
subs = sorted(subs)
length = None
totalsize = 0.0
for p, f in subs:
totalsize += os.path.getsize(f)
else:
name = os.path.split(path)[1]
subs = [([name], path)]
length = os.path.getsize(path)
totalsize = length
is_multi_file = len(subs) > 1
sh = sha()
done = 0
totalhashed = 0
next_progress_event = piece_length
for p, f in subs:
file_pieces_v2 = []
pos = 0
size = os.path.getsize(f)
p2 = [n.encode('utf8') for n in p]
if content_type:
files.append(
{b'length': size, b'path': p2, b'content_type': content_type}
) # HEREDAVE. bad for batch!
else:
files.append({b'length': size, b'path': p2})
with open(f, 'rb') as file_:
while pos < size:
to_read = min(size - pos, piece_length)
buffer = memoryview(file_.read(to_read))
pos += to_read
if torrent_format.includes_v1():
a = piece_length - done
for sub_buffer in (buffer[:a], buffer[a:]):
if sub_buffer:
sh.update(sub_buffer)
done += len(sub_buffer)
if done == piece_length:
pieces.append(sh.digest())
done = 0
sh = sha()
if torrent_format.includes_v2():
block_hashes = _sha256_buffer_blocks(buffer, v2_block_len)
num_leafs = v2_blocks_per_piece
if size <= piece_length:
# The special case when the file is smaller than a piece: only pad till the next power of 2
num_leafs = _next_pow2(len(block_hashes))
root = _sha256_merkle_root(
block_hashes, num_leafs, v2_block_padding, in_place=True
)
file_pieces_v2.append(root)
totalhashed += to_read
if totalhashed >= next_progress_event:
next_progress_event = totalhashed + piece_length
progress(totalhashed, totalsize)
if torrent_format == TorrentFormat.HYBRID and is_multi_file and done > 0:
# Add padding file to force piece-alignment
padding = piece_length - done
sh.update(bytes(padding))
files.append(
{
b'length': padding,
b'attr': b'p',
b'path': [b'.pad', str(padding).encode()],
}
return {
'pieces': b''.join(pieces),
'piece length': piece_length,
'length': size,
'name': name,
'private': private,
)
pieces.append(sh.digest())
done = 0
sh = sha()
if torrent_format.includes_v2():
# add file to the `file tree` and, if needed, to the `piece layers` structures
pieces_root = _sha256_merkle_root(
file_pieces_v2,
_next_pow2(len(file_pieces_v2)),
v2_piece_padding,
in_place=False,
)
dst_directory = file_tree
for directory in p2[:-1]:
dst_directory = dst_directory.setdefault(directory, {})
dst_directory[p2[-1]] = {
b'': {
b'length': size,
b'pieces root': pieces_root,
}
}
if len(file_pieces_v2) > 1:
piece_layers[pieces_root] = b''.join(file_pieces_v2)
if done > 0:
pieces.append(sh.digest())
progress(totalsize, totalsize)
info = {
b'piece length': piece_length,
b'name': name.encode('utf8'),
}
if private:
info[b'private'] = 1
if content_type:
info[b'content_type'] = content_type
if torrent_format.includes_v1():
info[b'pieces'] = b''.join(pieces)
if is_multi_file:
info[b'files'] = files
else:
info[b'length'] = length
if torrent_format.includes_v2():
info.update(
{
b'meta version': 2,
b'file tree': file_tree,
}
)
return info, piece_layers if torrent_format.includes_v2() else None
def subfiles(d):

View File

@ -7,7 +7,13 @@
import os
import tempfile
import pytest
from deluge import metafile
from deluge._libtorrent import LT_VERSION
from deluge.common import VersionSplit
from . import common
def check_torrent(filename):
@ -55,3 +61,52 @@ class TestMetafile:
metafile.make_meta_file(tmp_data, '', 32768, target=tmp_torrent)
check_torrent(tmp_torrent)
@pytest.mark.parametrize(
'path',
[
common.get_test_data_file('deluge.png'),
common.get_test_data_file('unicode_filenames.torrent'),
os.path.dirname(common.get_test_data_file('deluge.png')),
],
)
@pytest.mark.parametrize(
'torrent_format',
[
metafile.TorrentFormat.V1,
metafile.TorrentFormat.V2,
metafile.TorrentFormat.HYBRID,
],
)
@pytest.mark.parametrize('piece_length', [2**14, 2**15, 2**16])
@pytest.mark.parametrize('private', [True, False])
def test_create_info(self, path, torrent_format, piece_length, private):
our_info, our_piece_layers = metafile.makeinfo(
path,
piece_length,
metafile.dummy,
private=private,
torrent_format=torrent_format,
)
lt_info, lt_piece_layers = metafile.makeinfo_lt(
path,
piece_length,
private=private,
torrent_format=torrent_format,
)
if (
torrent_format == metafile.TorrentFormat.HYBRID
and os.path.isdir(path)
and VersionSplit(LT_VERSION) <= VersionSplit('2.0.7.0')
):
# Libtorrent didn't correctly follow the standard until version 2.0.7 included
# https://github.com/arvidn/libtorrent/commit/74d82a0cd7c2e9e3c4294901d7eb65e247050df4
# If last file is a padding, ignore that file and the last piece.
if our_info[b'files'][-1][b'path'][0] == b'.pad':
our_info[b'files'] = our_info[b'files'][:-1]
our_info[b'pieces'] = our_info[b'pieces'][:-32]
lt_info[b'pieces'] = lt_info[b'pieces'][:-32]
assert our_info == lt_info
assert our_piece_layers == lt_piece_layers