[#3440] Fix httpdownloader reencoding torrent file downloads

Torrent downloads from rutracker responds with the header:

    Content-Type: application/x-bittorrent; charset=Windows-1251

The problem is that httpdownloader was using the charset to re-encode
the downloaded file, corrupting the binary torrent file download.

Fixed by only re-encoding text content types, since it is very rare
that non-text content types would actually have a non-utf8 codeset and
if there is a requirement we would need to determine it on a type by
type basis.
This commit is contained in:
Calum Lind 2021-02-20 19:39:07 +00:00
parent f331b6c754
commit 4d970754a4
2 changed files with 55 additions and 9 deletions

View File

@ -151,9 +151,12 @@ class HTTPDownloaderAgent(object):
self.filename = new_file_name
cont_type = headers.getRawHeaders(b'content-type')[0].decode()
params = cgi.parse_header(cont_type)[1]
encoding = params.get('charset', None)
cont_type_header = headers.getRawHeaders(b'content-type')[0].decode()
cont_type, params = cgi.parse_header(cont_type_header)
# Only re-ecode text content types.
encoding = None
if cont_type.startswith('text/'):
encoding = params.get('charset', None)
response.deliverBody(
BodyHandler(response.request, finished, body_length, self, encoding)
)

View File

@ -9,6 +9,7 @@ from __future__ import unicode_literals
import tempfile
from email.utils import formatdate
from io import open
from twisted.internet import reactor
from twisted.internet.error import CannotListenError
@ -47,9 +48,30 @@ class RenameResource(Resource):
class AttachmentResource(Resource):
def render(self, request):
request.setHeader(b'Content-Type', b'text/plain')
content_type = b'text/plain'
charset = request.getHeader(b'content-charset')
if charset:
content_type += b'; charset=' + charset
request.setHeader(b'Content-Type', content_type)
request.setHeader(b'Content-Disposition', b'attachment')
return b'Attachement with no filename set'
append = request.getHeader(b'content-append') or b''
content = 'Attachment with no filename set{}'.format(append.decode('utf8'))
return (
content.encode(charset.decode('utf8'))
if charset
else content.encode('utf8')
)
class TorrentResource(Resource):
def render(self, request):
content_type = b'application/x-bittorrent'
charset = request.getHeader(b'content-charset')
if charset:
content_type += b'; charset=' + charset
request.setHeader(b'Content-Type', content_type)
request.setHeader(b'Content-Disposition', b'attachment; filename=test.torrent')
return 'Binary attachment ignore charset 世丕且\n'.encode('utf8')
class CookieResource(Resource):
@ -101,6 +123,7 @@ class TopLevelResource(Resource):
self.putChild(b'redirect', self.redirect_rsrc)
self.putChild(b'rename', RenameResource())
self.putChild(b'attachment', AttachmentResource())
self.putChild(b'torrent', TorrentResource())
self.putChild(b'partial', PartialDownloadResource())
def getChild(self, path, request): # NOQA: N802
@ -110,7 +133,7 @@ class TopLevelResource(Resource):
return Resource.getChild(self, path, request)
def render(self, request):
if request.getHeader('If-Modified-Since'):
if request.getHeader(b'If-Modified-Since'):
request.setResponseCode(NOT_MODIFIED)
return b'<h1>Deluge HTTP Downloader tests webserver here</h1>'
@ -139,7 +162,7 @@ class DownloadFileTestCase(unittest.TestCase):
return self.webserver.stopListening()
def assertContains(self, filename, contents): # NOQA
with open(filename) as _file:
with open(filename, 'r', encoding='utf8') as _file:
try:
self.assertEqual(_file.read(), contents)
except Exception as ex:
@ -147,7 +170,7 @@ class DownloadFileTestCase(unittest.TestCase):
return filename
def assertNotContains(self, filename, contents, file_mode=''): # NOQA
with open(filename, file_mode) as _file:
with open(filename, 'r', encoding='utf8') as _file:
try:
self.assertNotEqual(_file.read(), contents)
except Exception as ex:
@ -212,7 +235,7 @@ class DownloadFileTestCase(unittest.TestCase):
url = self.get_url('attachment')
d = download_file(url, fname('original'))
d.addCallback(self.assertEqual, fname('original'))
d.addCallback(self.assertContains, 'Attachement with no filename set')
d.addCallback(self.assertContains, 'Attachment with no filename set')
return d
def test_download_with_rename_prevented(self):
@ -264,3 +287,23 @@ class DownloadFileTestCase(unittest.TestCase):
d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure)
return d
def test_download_text_reencode_charset(self):
"""Re-encode as UTF-8 specified charset for text content-type header"""
url = self.get_url('attachment')
filepath = fname('test.txt')
headers = {'content-charset': 'Windows-1251', 'content-append': 'бвгде'}
d = download_file(url, filepath, headers=headers)
d.addCallback(self.assertEqual, filepath)
d.addCallback(self.assertContains, 'Attachment with no filename setбвгде')
return d
def test_download_binary_ignore_charset(self):
"""Ignore charset for binary content-type header e.g. torrent files"""
url = self.get_url('torrent')
headers = {'content-charset': 'Windows-1251'}
filepath = fname('test.torrent')
d = download_file(url, fname('test.torrent'), headers=headers)
d.addCallback(self.assertEqual, filepath)
d.addCallback(self.assertContains, 'Binary attachment ignore charset 世丕且\n')
return d