From 1751d62df9dc41adcd3caff2bb6a0c0d824a4fc5 Mon Sep 17 00:00:00 2001 From: Radu Carpa Date: Thu, 3 Aug 2023 09:15:23 +0200 Subject: [PATCH] [Core] Support creating v2 torrents Add support for v2 torrents in create_torrent, but keep the old default of only adding the v1 metadata. Unify the single-file and directory cases to avoid code duplication. V2 torrents require files to be piece-aligned. The same for hybrid v1/v2 ones. To handle both cases of piece-aligned and non-aligned files, always read the files in piece-aligned chunks. Re-slice the buffer if needed (for v1-only multi-file torrents). Also, had to adapt to progress event. It now depends on the number of bytes hashed rather than the number of pieces. To avoid sending and excessive amount of event when handling a directory with many small files, add a mechanism to limit event period at 1 per piece_length. Closes: https://github.com/deluge-torrent/deluge/pull/430 --- deluge/core/core.py | 9 +- deluge/metafile.py | 351 +++++++++++++++++++++++++--------- deluge/tests/test_metafile.py | 55 ++++++ 3 files changed, 328 insertions(+), 87 deletions(-) diff --git a/deluge/core/core.py b/deluge/core/core.py index fc12f7bc5..e2130f595 100644 --- a/deluge/core/core.py +++ b/deluge/core/core.py @@ -21,7 +21,7 @@ from twisted.web.client import Agent, readBody import deluge.common import deluge.component as component -from deluge import path_chooser_common +from deluge import metafile, path_chooser_common from deluge._libtorrent import LT_VERSION, lt from deluge.configmanager import ConfigManager, get_config_dir from deluge.core.alertmanager import AlertManager @@ -998,7 +998,11 @@ class Core(component.Component): created_by=None, trackers=None, add_to_session=False, + torrent_format=metafile.TorrentFormat.V1, ): + if isinstance(torrent_format, str): + torrent_format = metafile.TorrentFormat(torrent_format) + log.debug('creating torrent..') return threads.deferToThread( self._create_torrent_thread, @@ -1012,6 +1016,7 @@ class Core(component.Component): created_by=created_by, trackers=trackers, add_to_session=add_to_session, + torrent_format=torrent_format, ) def _create_torrent_thread( @@ -1026,6 +1031,7 @@ class Core(component.Component): created_by, trackers, add_to_session, + torrent_format, ): from deluge import metafile @@ -1038,6 +1044,7 @@ class Core(component.Component): private=private, created_by=created_by, trackers=trackers, + torrent_format=torrent_format, ) write_file = False diff --git a/deluge/metafile.py b/deluge/metafile.py index 906cc81e3..81a371ff3 100644 --- a/deluge/metafile.py +++ b/deluge/metafile.py @@ -10,10 +10,13 @@ # See LICENSE for more details. # +import copy import logging import os.path import time +from enum import Enum from hashlib import sha1 as sha +from hashlib import sha256 import deluge.component as component from deluge.bencode import bencode @@ -41,6 +44,35 @@ def dummy(*v): pass +class TorrentFormat(str, Enum): + V1 = 'v1' + V2 = 'v2' + HYBRID = 'hybrid' + + @classmethod + def _missing_(cls, value): + if not value: + return None + + value = value.lower() + for member in cls: + if member.value == value: + return member + + def to_lt_flag(self): + if self.value == 'v1': + return 64 + if self.value == 'v2': + return 32 + return 0 + + def includes_v1(self): + return self == self.__class__.V1 or self == self.__class__.HYBRID + + def includes_v2(self): + return self == self.__class__.V2 or self == self.__class__.HYBRID + + class RemoteFileProgress: def __init__(self, session_id): self.session_id = session_id @@ -65,6 +97,7 @@ def make_meta_file_content( private=False, created_by=None, trackers=None, + torrent_format=TorrentFormat.V1, ): data = {'creation date': int(gmtime())} if url: @@ -80,10 +113,20 @@ def make_meta_file_content( if session_id: progress = RemoteFileProgress(session_id) - info = makeinfo(path, piece_length, progress, name, content_type, private) + info, piece_layers = makeinfo( + path, + piece_length, + progress, + name, + content_type, + private, + torrent_format, + ) # check_info(info) data['info'] = info + if piece_layers is not None: + data['piece layers'] = piece_layers if title: data['title'] = title.encode('utf8') if comment: @@ -170,101 +213,237 @@ def calcsize(path): return total -def makeinfo(path, piece_length, progress, name=None, content_type=None, private=False): - # HEREDAVE. If path is directory, how do we assign content type? - path = os.path.abspath(path) - piece_count = 0 - if os.path.isdir(path): - subs = sorted(subfiles(path)) - pieces = [] - sh = sha() - done = 0 - fs = [] - totalsize = 0.0 - totalhashed = 0 - for p, f in subs: - totalsize += os.path.getsize(f) - if totalsize >= piece_length: - import math +def _next_pow2(num): + import math - num_pieces = math.ceil(totalsize / piece_length) - else: - num_pieces = 1 + if not num: + return 1 + return 2 ** math.ceil(math.log2(num)) - for p, f in subs: - pos = 0 - size = os.path.getsize(f) - p2 = [n.encode('utf8') for n in p] - if content_type: - fs.append( - {'length': size, 'path': p2, 'content_type': content_type} - ) # HEREDAVE. bad for batch! + +def _sha256_merkle_root(leafs, nb_leafs, padding, in_place=True) -> bytes: + """ + Build the root of the merkle hash tree from the (possibly incomplete) leafs layer. + If len(leafs) < nb_leafs, it will be padded with the padding repeated as many times + as needed to have nb_leafs in total. + """ + if not in_place: + leafs = copy.copy(leafs) + + while nb_leafs > 1: + nb_leafs = nb_leafs // 2 + for i in range(nb_leafs): + node1 = leafs[2 * i] if 2 * i < len(leafs) else padding + node2 = leafs[2 * i + 1] if 2 * i + 1 < len(leafs) else padding + h = sha256(node1) + h.update(node2) + if i < len(leafs): + leafs[i] = h.digest() else: - fs.append({'length': size, 'path': p2}) - with open(f, 'rb') as file_: - while pos < size: - a = min(size - pos, piece_length - done) - sh.update(file_.read(a)) - done += a - pos += a - totalhashed += a + leafs.append(h.digest()) + return leafs[0] if leafs else padding - if done == piece_length: - pieces.append(sh.digest()) - piece_count += 1 - done = 0 - sh = sha() - progress(piece_count, num_pieces) - if done > 0: - pieces.append(sh.digest()) - piece_count += 1 - progress(piece_count, num_pieces) +def _sha256_buffer_blocks(buffer, block_len): + import math + + nb_blocks = math.ceil(len(buffer) / block_len) + blocks = [ + sha256(buffer[i * block_len : (i + 1) * block_len]).digest() + for i in range(nb_blocks) + ] + return blocks + + +def makeinfo_lt( + path, piece_length, name=None, private=False, torrent_format=TorrentFormat.V1 +): + """ + Make info using via the libtorrent library. + """ + from deluge._libtorrent import lt + + if not name: + name = os.path.split(path)[1] + + fs = lt.file_storage() + if os.path.isfile(path): + lt.add_files(fs, path) + else: + for p, f in subfiles(path): + fs.add_file(os.path.join(name, *p), os.path.getsize(f)) + torrent = lt.create_torrent( + fs, piece_size=piece_length, flags=torrent_format.to_lt_flag() + ) + + lt.set_piece_hashes(torrent, os.path.dirname(path)) + torrent.set_priv(private) + + t = torrent.generate() + info = t[b'info'] + pieces_layers = t.get(b'piece layers', None) + + return info, pieces_layers + + +def makeinfo( + path, + piece_length, + progress, + name=None, + content_type=None, + private=False, + torrent_format=TorrentFormat.V1, +): + # HEREDAVE. If path is directory, how do we assign content type? + + v2_block_len = 2**14 # 16 KiB + v2_blocks_per_piece = 1 + v2_block_padding = b'' + v2_piece_padding = b'' + if torrent_format.includes_v2(): + if _next_pow2(piece_length) != piece_length or piece_length < v2_block_len: + raise ValueError( + 'Bittorrent v2 piece size must be a power of 2; and bigger than 16 KiB' + ) + + v2_blocks_per_piece = piece_length // v2_block_len + v2_block_padding = bytes(32) # 32 = size of sha256 in bytes + v2_piece_padding = _sha256_merkle_root( + [], nb_leafs=v2_blocks_per_piece, padding=v2_block_padding + ) + + path = os.path.abspath(path) + files = [] + pieces = [] + file_tree = {} + piece_layers = {} + if os.path.isdir(path): if not name: name = os.path.split(path)[1] - - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'files': fs, - 'name': name.encode('utf8'), - 'private': private, - } + subs = subfiles(path) + if torrent_format.includes_v2(): + subs = sorted(subs) + length = None + totalsize = 0.0 + for p, f in subs: + totalsize += os.path.getsize(f) else: - size = os.path.getsize(path) - if size >= piece_length: - num_pieces = size // piece_length - else: - num_pieces = 1 + name = os.path.split(path)[1] + subs = [([name], path)] + length = os.path.getsize(path) + totalsize = length + is_multi_file = len(subs) > 1 + sh = sha() + done = 0 + totalhashed = 0 - pieces = [] - p = 0 - with open(path, 'rb') as _file: - while p < size: - x = _file.read(min(piece_length, size - p)) - pieces.append(sha(x).digest()) - piece_count += 1 - p += piece_length - if p > size: - p = size - progress(piece_count, num_pieces) - name = os.path.split(path)[1].encode('utf8') - if content_type is not None: - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'length': size, - 'name': name, - 'content_type': content_type, - 'private': private, + next_progress_event = piece_length + for p, f in subs: + file_pieces_v2 = [] + pos = 0 + size = os.path.getsize(f) + p2 = [n.encode('utf8') for n in p] + if content_type: + files.append( + {b'length': size, b'path': p2, b'content_type': content_type} + ) # HEREDAVE. bad for batch! + else: + files.append({b'length': size, b'path': p2}) + with open(f, 'rb') as file_: + while pos < size: + to_read = min(size - pos, piece_length) + buffer = memoryview(file_.read(to_read)) + pos += to_read + + if torrent_format.includes_v1(): + a = piece_length - done + for sub_buffer in (buffer[:a], buffer[a:]): + if sub_buffer: + sh.update(sub_buffer) + done += len(sub_buffer) + + if done == piece_length: + pieces.append(sh.digest()) + done = 0 + sh = sha() + if torrent_format.includes_v2(): + block_hashes = _sha256_buffer_blocks(buffer, v2_block_len) + num_leafs = v2_blocks_per_piece + if size <= piece_length: + # The special case when the file is smaller than a piece: only pad till the next power of 2 + num_leafs = _next_pow2(len(block_hashes)) + root = _sha256_merkle_root( + block_hashes, num_leafs, v2_block_padding, in_place=True + ) + file_pieces_v2.append(root) + + totalhashed += to_read + if totalhashed >= next_progress_event: + next_progress_event = totalhashed + piece_length + progress(totalhashed, totalsize) + + if torrent_format == TorrentFormat.HYBRID and is_multi_file and done > 0: + # Add padding file to force piece-alignment + padding = piece_length - done + sh.update(bytes(padding)) + files.append( + { + b'length': padding, + b'attr': b'p', + b'path': [b'.pad', str(padding).encode()], + } + ) + pieces.append(sh.digest()) + done = 0 + sh = sha() + + if torrent_format.includes_v2(): + # add file to the `file tree` and, if needed, to the `piece layers` structures + pieces_root = _sha256_merkle_root( + file_pieces_v2, + _next_pow2(len(file_pieces_v2)), + v2_piece_padding, + in_place=False, + ) + dst_directory = file_tree + for directory in p2[:-1]: + dst_directory = dst_directory.setdefault(directory, {}) + dst_directory[p2[-1]] = { + b'': { + b'length': size, + b'pieces root': pieces_root, + } } - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'length': size, - 'name': name, - 'private': private, - } + if len(file_pieces_v2) > 1: + piece_layers[pieces_root] = b''.join(file_pieces_v2) + + if done > 0: + pieces.append(sh.digest()) + progress(totalsize, totalsize) + + info = { + b'piece length': piece_length, + b'name': name.encode('utf8'), + } + if private: + info[b'private'] = 1 + if content_type: + info[b'content_type'] = content_type + if torrent_format.includes_v1(): + info[b'pieces'] = b''.join(pieces) + if is_multi_file: + info[b'files'] = files + else: + info[b'length'] = length + if torrent_format.includes_v2(): + info.update( + { + b'meta version': 2, + b'file tree': file_tree, + } + ) + return info, piece_layers if torrent_format.includes_v2() else None def subfiles(d): diff --git a/deluge/tests/test_metafile.py b/deluge/tests/test_metafile.py index fda1cb73e..1b1675052 100644 --- a/deluge/tests/test_metafile.py +++ b/deluge/tests/test_metafile.py @@ -7,7 +7,13 @@ import os import tempfile +import pytest + from deluge import metafile +from deluge._libtorrent import LT_VERSION +from deluge.common import VersionSplit + +from . import common def check_torrent(filename): @@ -55,3 +61,52 @@ class TestMetafile: metafile.make_meta_file(tmp_data, '', 32768, target=tmp_torrent) check_torrent(tmp_torrent) + + @pytest.mark.parametrize( + 'path', + [ + common.get_test_data_file('deluge.png'), + common.get_test_data_file('unicode_filenames.torrent'), + os.path.dirname(common.get_test_data_file('deluge.png')), + ], + ) + @pytest.mark.parametrize( + 'torrent_format', + [ + metafile.TorrentFormat.V1, + metafile.TorrentFormat.V2, + metafile.TorrentFormat.HYBRID, + ], + ) + @pytest.mark.parametrize('piece_length', [2**14, 2**15, 2**16]) + @pytest.mark.parametrize('private', [True, False]) + def test_create_info(self, path, torrent_format, piece_length, private): + our_info, our_piece_layers = metafile.makeinfo( + path, + piece_length, + metafile.dummy, + private=private, + torrent_format=torrent_format, + ) + lt_info, lt_piece_layers = metafile.makeinfo_lt( + path, + piece_length, + private=private, + torrent_format=torrent_format, + ) + + if ( + torrent_format == metafile.TorrentFormat.HYBRID + and os.path.isdir(path) + and VersionSplit(LT_VERSION) <= VersionSplit('2.0.7.0') + ): + # Libtorrent didn't correctly follow the standard until version 2.0.7 included + # https://github.com/arvidn/libtorrent/commit/74d82a0cd7c2e9e3c4294901d7eb65e247050df4 + # If last file is a padding, ignore that file and the last piece. + if our_info[b'files'][-1][b'path'][0] == b'.pad': + our_info[b'files'] = our_info[b'files'][:-1] + our_info[b'pieces'] = our_info[b'pieces'][:-32] + lt_info[b'pieces'] = lt_info[b'pieces'][:-32] + + assert our_info == lt_info + assert our_piece_layers == lt_piece_layers