diff --git a/deluge/core/core.py b/deluge/core/core.py index fc12f7bc5..e2130f595 100644 --- a/deluge/core/core.py +++ b/deluge/core/core.py @@ -21,7 +21,7 @@ from twisted.web.client import Agent, readBody import deluge.common import deluge.component as component -from deluge import path_chooser_common +from deluge import metafile, path_chooser_common from deluge._libtorrent import LT_VERSION, lt from deluge.configmanager import ConfigManager, get_config_dir from deluge.core.alertmanager import AlertManager @@ -998,7 +998,11 @@ class Core(component.Component): created_by=None, trackers=None, add_to_session=False, + torrent_format=metafile.TorrentFormat.V1, ): + if isinstance(torrent_format, str): + torrent_format = metafile.TorrentFormat(torrent_format) + log.debug('creating torrent..') return threads.deferToThread( self._create_torrent_thread, @@ -1012,6 +1016,7 @@ class Core(component.Component): created_by=created_by, trackers=trackers, add_to_session=add_to_session, + torrent_format=torrent_format, ) def _create_torrent_thread( @@ -1026,6 +1031,7 @@ class Core(component.Component): created_by, trackers, add_to_session, + torrent_format, ): from deluge import metafile @@ -1038,6 +1044,7 @@ class Core(component.Component): private=private, created_by=created_by, trackers=trackers, + torrent_format=torrent_format, ) write_file = False diff --git a/deluge/metafile.py b/deluge/metafile.py index 906cc81e3..81a371ff3 100644 --- a/deluge/metafile.py +++ b/deluge/metafile.py @@ -10,10 +10,13 @@ # See LICENSE for more details. # +import copy import logging import os.path import time +from enum import Enum from hashlib import sha1 as sha +from hashlib import sha256 import deluge.component as component from deluge.bencode import bencode @@ -41,6 +44,35 @@ def dummy(*v): pass +class TorrentFormat(str, Enum): + V1 = 'v1' + V2 = 'v2' + HYBRID = 'hybrid' + + @classmethod + def _missing_(cls, value): + if not value: + return None + + value = value.lower() + for member in cls: + if member.value == value: + return member + + def to_lt_flag(self): + if self.value == 'v1': + return 64 + if self.value == 'v2': + return 32 + return 0 + + def includes_v1(self): + return self == self.__class__.V1 or self == self.__class__.HYBRID + + def includes_v2(self): + return self == self.__class__.V2 or self == self.__class__.HYBRID + + class RemoteFileProgress: def __init__(self, session_id): self.session_id = session_id @@ -65,6 +97,7 @@ def make_meta_file_content( private=False, created_by=None, trackers=None, + torrent_format=TorrentFormat.V1, ): data = {'creation date': int(gmtime())} if url: @@ -80,10 +113,20 @@ def make_meta_file_content( if session_id: progress = RemoteFileProgress(session_id) - info = makeinfo(path, piece_length, progress, name, content_type, private) + info, piece_layers = makeinfo( + path, + piece_length, + progress, + name, + content_type, + private, + torrent_format, + ) # check_info(info) data['info'] = info + if piece_layers is not None: + data['piece layers'] = piece_layers if title: data['title'] = title.encode('utf8') if comment: @@ -170,101 +213,237 @@ def calcsize(path): return total -def makeinfo(path, piece_length, progress, name=None, content_type=None, private=False): - # HEREDAVE. If path is directory, how do we assign content type? - path = os.path.abspath(path) - piece_count = 0 - if os.path.isdir(path): - subs = sorted(subfiles(path)) - pieces = [] - sh = sha() - done = 0 - fs = [] - totalsize = 0.0 - totalhashed = 0 - for p, f in subs: - totalsize += os.path.getsize(f) - if totalsize >= piece_length: - import math +def _next_pow2(num): + import math - num_pieces = math.ceil(totalsize / piece_length) - else: - num_pieces = 1 + if not num: + return 1 + return 2 ** math.ceil(math.log2(num)) - for p, f in subs: - pos = 0 - size = os.path.getsize(f) - p2 = [n.encode('utf8') for n in p] - if content_type: - fs.append( - {'length': size, 'path': p2, 'content_type': content_type} - ) # HEREDAVE. bad for batch! + +def _sha256_merkle_root(leafs, nb_leafs, padding, in_place=True) -> bytes: + """ + Build the root of the merkle hash tree from the (possibly incomplete) leafs layer. + If len(leafs) < nb_leafs, it will be padded with the padding repeated as many times + as needed to have nb_leafs in total. + """ + if not in_place: + leafs = copy.copy(leafs) + + while nb_leafs > 1: + nb_leafs = nb_leafs // 2 + for i in range(nb_leafs): + node1 = leafs[2 * i] if 2 * i < len(leafs) else padding + node2 = leafs[2 * i + 1] if 2 * i + 1 < len(leafs) else padding + h = sha256(node1) + h.update(node2) + if i < len(leafs): + leafs[i] = h.digest() else: - fs.append({'length': size, 'path': p2}) - with open(f, 'rb') as file_: - while pos < size: - a = min(size - pos, piece_length - done) - sh.update(file_.read(a)) - done += a - pos += a - totalhashed += a + leafs.append(h.digest()) + return leafs[0] if leafs else padding - if done == piece_length: - pieces.append(sh.digest()) - piece_count += 1 - done = 0 - sh = sha() - progress(piece_count, num_pieces) - if done > 0: - pieces.append(sh.digest()) - piece_count += 1 - progress(piece_count, num_pieces) +def _sha256_buffer_blocks(buffer, block_len): + import math + + nb_blocks = math.ceil(len(buffer) / block_len) + blocks = [ + sha256(buffer[i * block_len : (i + 1) * block_len]).digest() + for i in range(nb_blocks) + ] + return blocks + + +def makeinfo_lt( + path, piece_length, name=None, private=False, torrent_format=TorrentFormat.V1 +): + """ + Make info using via the libtorrent library. + """ + from deluge._libtorrent import lt + + if not name: + name = os.path.split(path)[1] + + fs = lt.file_storage() + if os.path.isfile(path): + lt.add_files(fs, path) + else: + for p, f in subfiles(path): + fs.add_file(os.path.join(name, *p), os.path.getsize(f)) + torrent = lt.create_torrent( + fs, piece_size=piece_length, flags=torrent_format.to_lt_flag() + ) + + lt.set_piece_hashes(torrent, os.path.dirname(path)) + torrent.set_priv(private) + + t = torrent.generate() + info = t[b'info'] + pieces_layers = t.get(b'piece layers', None) + + return info, pieces_layers + + +def makeinfo( + path, + piece_length, + progress, + name=None, + content_type=None, + private=False, + torrent_format=TorrentFormat.V1, +): + # HEREDAVE. If path is directory, how do we assign content type? + + v2_block_len = 2**14 # 16 KiB + v2_blocks_per_piece = 1 + v2_block_padding = b'' + v2_piece_padding = b'' + if torrent_format.includes_v2(): + if _next_pow2(piece_length) != piece_length or piece_length < v2_block_len: + raise ValueError( + 'Bittorrent v2 piece size must be a power of 2; and bigger than 16 KiB' + ) + + v2_blocks_per_piece = piece_length // v2_block_len + v2_block_padding = bytes(32) # 32 = size of sha256 in bytes + v2_piece_padding = _sha256_merkle_root( + [], nb_leafs=v2_blocks_per_piece, padding=v2_block_padding + ) + + path = os.path.abspath(path) + files = [] + pieces = [] + file_tree = {} + piece_layers = {} + if os.path.isdir(path): if not name: name = os.path.split(path)[1] - - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'files': fs, - 'name': name.encode('utf8'), - 'private': private, - } + subs = subfiles(path) + if torrent_format.includes_v2(): + subs = sorted(subs) + length = None + totalsize = 0.0 + for p, f in subs: + totalsize += os.path.getsize(f) else: - size = os.path.getsize(path) - if size >= piece_length: - num_pieces = size // piece_length - else: - num_pieces = 1 + name = os.path.split(path)[1] + subs = [([name], path)] + length = os.path.getsize(path) + totalsize = length + is_multi_file = len(subs) > 1 + sh = sha() + done = 0 + totalhashed = 0 - pieces = [] - p = 0 - with open(path, 'rb') as _file: - while p < size: - x = _file.read(min(piece_length, size - p)) - pieces.append(sha(x).digest()) - piece_count += 1 - p += piece_length - if p > size: - p = size - progress(piece_count, num_pieces) - name = os.path.split(path)[1].encode('utf8') - if content_type is not None: - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'length': size, - 'name': name, - 'content_type': content_type, - 'private': private, + next_progress_event = piece_length + for p, f in subs: + file_pieces_v2 = [] + pos = 0 + size = os.path.getsize(f) + p2 = [n.encode('utf8') for n in p] + if content_type: + files.append( + {b'length': size, b'path': p2, b'content_type': content_type} + ) # HEREDAVE. bad for batch! + else: + files.append({b'length': size, b'path': p2}) + with open(f, 'rb') as file_: + while pos < size: + to_read = min(size - pos, piece_length) + buffer = memoryview(file_.read(to_read)) + pos += to_read + + if torrent_format.includes_v1(): + a = piece_length - done + for sub_buffer in (buffer[:a], buffer[a:]): + if sub_buffer: + sh.update(sub_buffer) + done += len(sub_buffer) + + if done == piece_length: + pieces.append(sh.digest()) + done = 0 + sh = sha() + if torrent_format.includes_v2(): + block_hashes = _sha256_buffer_blocks(buffer, v2_block_len) + num_leafs = v2_blocks_per_piece + if size <= piece_length: + # The special case when the file is smaller than a piece: only pad till the next power of 2 + num_leafs = _next_pow2(len(block_hashes)) + root = _sha256_merkle_root( + block_hashes, num_leafs, v2_block_padding, in_place=True + ) + file_pieces_v2.append(root) + + totalhashed += to_read + if totalhashed >= next_progress_event: + next_progress_event = totalhashed + piece_length + progress(totalhashed, totalsize) + + if torrent_format == TorrentFormat.HYBRID and is_multi_file and done > 0: + # Add padding file to force piece-alignment + padding = piece_length - done + sh.update(bytes(padding)) + files.append( + { + b'length': padding, + b'attr': b'p', + b'path': [b'.pad', str(padding).encode()], + } + ) + pieces.append(sh.digest()) + done = 0 + sh = sha() + + if torrent_format.includes_v2(): + # add file to the `file tree` and, if needed, to the `piece layers` structures + pieces_root = _sha256_merkle_root( + file_pieces_v2, + _next_pow2(len(file_pieces_v2)), + v2_piece_padding, + in_place=False, + ) + dst_directory = file_tree + for directory in p2[:-1]: + dst_directory = dst_directory.setdefault(directory, {}) + dst_directory[p2[-1]] = { + b'': { + b'length': size, + b'pieces root': pieces_root, + } } - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'length': size, - 'name': name, - 'private': private, - } + if len(file_pieces_v2) > 1: + piece_layers[pieces_root] = b''.join(file_pieces_v2) + + if done > 0: + pieces.append(sh.digest()) + progress(totalsize, totalsize) + + info = { + b'piece length': piece_length, + b'name': name.encode('utf8'), + } + if private: + info[b'private'] = 1 + if content_type: + info[b'content_type'] = content_type + if torrent_format.includes_v1(): + info[b'pieces'] = b''.join(pieces) + if is_multi_file: + info[b'files'] = files + else: + info[b'length'] = length + if torrent_format.includes_v2(): + info.update( + { + b'meta version': 2, + b'file tree': file_tree, + } + ) + return info, piece_layers if torrent_format.includes_v2() else None def subfiles(d): diff --git a/deluge/tests/test_metafile.py b/deluge/tests/test_metafile.py index fda1cb73e..1b1675052 100644 --- a/deluge/tests/test_metafile.py +++ b/deluge/tests/test_metafile.py @@ -7,7 +7,13 @@ import os import tempfile +import pytest + from deluge import metafile +from deluge._libtorrent import LT_VERSION +from deluge.common import VersionSplit + +from . import common def check_torrent(filename): @@ -55,3 +61,52 @@ class TestMetafile: metafile.make_meta_file(tmp_data, '', 32768, target=tmp_torrent) check_torrent(tmp_torrent) + + @pytest.mark.parametrize( + 'path', + [ + common.get_test_data_file('deluge.png'), + common.get_test_data_file('unicode_filenames.torrent'), + os.path.dirname(common.get_test_data_file('deluge.png')), + ], + ) + @pytest.mark.parametrize( + 'torrent_format', + [ + metafile.TorrentFormat.V1, + metafile.TorrentFormat.V2, + metafile.TorrentFormat.HYBRID, + ], + ) + @pytest.mark.parametrize('piece_length', [2**14, 2**15, 2**16]) + @pytest.mark.parametrize('private', [True, False]) + def test_create_info(self, path, torrent_format, piece_length, private): + our_info, our_piece_layers = metafile.makeinfo( + path, + piece_length, + metafile.dummy, + private=private, + torrent_format=torrent_format, + ) + lt_info, lt_piece_layers = metafile.makeinfo_lt( + path, + piece_length, + private=private, + torrent_format=torrent_format, + ) + + if ( + torrent_format == metafile.TorrentFormat.HYBRID + and os.path.isdir(path) + and VersionSplit(LT_VERSION) <= VersionSplit('2.0.7.0') + ): + # Libtorrent didn't correctly follow the standard until version 2.0.7 included + # https://github.com/arvidn/libtorrent/commit/74d82a0cd7c2e9e3c4294901d7eb65e247050df4 + # If last file is a padding, ignore that file and the last piece. + if our_info[b'files'][-1][b'path'][0] == b'.pad': + our_info[b'files'] = our_info[b'files'][:-1] + our_info[b'pieces'] = our_info[b'pieces'][:-32] + lt_info[b'pieces'] = lt_info[b'pieces'][:-32] + + assert our_info == lt_info + assert our_piece_layers == lt_piece_layers