[#3440] Fix httpdownloader reencoding torrent file downloads
Torrent downloads from rutracker responds with the header: Content-Type: application/x-bittorrent; charset=Windows-1251 The problem is that httpdownloader was using the charset to re-encode the downloaded file, corrupting the binary torrent file download. Fixed by only re-encoding text content types, since it is very rare that non-text content types would actually have a non-utf8 codeset and if there is a requirement we would need to determine it on a type by type basis.
This commit is contained in:
parent
f331b6c754
commit
4d970754a4
|
@ -151,9 +151,12 @@ class HTTPDownloaderAgent(object):
|
||||||
|
|
||||||
self.filename = new_file_name
|
self.filename = new_file_name
|
||||||
|
|
||||||
cont_type = headers.getRawHeaders(b'content-type')[0].decode()
|
cont_type_header = headers.getRawHeaders(b'content-type')[0].decode()
|
||||||
params = cgi.parse_header(cont_type)[1]
|
cont_type, params = cgi.parse_header(cont_type_header)
|
||||||
encoding = params.get('charset', None)
|
# Only re-ecode text content types.
|
||||||
|
encoding = None
|
||||||
|
if cont_type.startswith('text/'):
|
||||||
|
encoding = params.get('charset', None)
|
||||||
response.deliverBody(
|
response.deliverBody(
|
||||||
BodyHandler(response.request, finished, body_length, self, encoding)
|
BodyHandler(response.request, finished, body_length, self, encoding)
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,6 +9,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
from email.utils import formatdate
|
from email.utils import formatdate
|
||||||
|
from io import open
|
||||||
|
|
||||||
from twisted.internet import reactor
|
from twisted.internet import reactor
|
||||||
from twisted.internet.error import CannotListenError
|
from twisted.internet.error import CannotListenError
|
||||||
|
@ -47,9 +48,30 @@ class RenameResource(Resource):
|
||||||
|
|
||||||
class AttachmentResource(Resource):
|
class AttachmentResource(Resource):
|
||||||
def render(self, request):
|
def render(self, request):
|
||||||
request.setHeader(b'Content-Type', b'text/plain')
|
content_type = b'text/plain'
|
||||||
|
charset = request.getHeader(b'content-charset')
|
||||||
|
if charset:
|
||||||
|
content_type += b'; charset=' + charset
|
||||||
|
request.setHeader(b'Content-Type', content_type)
|
||||||
request.setHeader(b'Content-Disposition', b'attachment')
|
request.setHeader(b'Content-Disposition', b'attachment')
|
||||||
return b'Attachement with no filename set'
|
append = request.getHeader(b'content-append') or b''
|
||||||
|
content = 'Attachment with no filename set{}'.format(append.decode('utf8'))
|
||||||
|
return (
|
||||||
|
content.encode(charset.decode('utf8'))
|
||||||
|
if charset
|
||||||
|
else content.encode('utf8')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TorrentResource(Resource):
|
||||||
|
def render(self, request):
|
||||||
|
content_type = b'application/x-bittorrent'
|
||||||
|
charset = request.getHeader(b'content-charset')
|
||||||
|
if charset:
|
||||||
|
content_type += b'; charset=' + charset
|
||||||
|
request.setHeader(b'Content-Type', content_type)
|
||||||
|
request.setHeader(b'Content-Disposition', b'attachment; filename=test.torrent')
|
||||||
|
return 'Binary attachment ignore charset 世丕且\n'.encode('utf8')
|
||||||
|
|
||||||
|
|
||||||
class CookieResource(Resource):
|
class CookieResource(Resource):
|
||||||
|
@ -101,6 +123,7 @@ class TopLevelResource(Resource):
|
||||||
self.putChild(b'redirect', self.redirect_rsrc)
|
self.putChild(b'redirect', self.redirect_rsrc)
|
||||||
self.putChild(b'rename', RenameResource())
|
self.putChild(b'rename', RenameResource())
|
||||||
self.putChild(b'attachment', AttachmentResource())
|
self.putChild(b'attachment', AttachmentResource())
|
||||||
|
self.putChild(b'torrent', TorrentResource())
|
||||||
self.putChild(b'partial', PartialDownloadResource())
|
self.putChild(b'partial', PartialDownloadResource())
|
||||||
|
|
||||||
def getChild(self, path, request): # NOQA: N802
|
def getChild(self, path, request): # NOQA: N802
|
||||||
|
@ -110,7 +133,7 @@ class TopLevelResource(Resource):
|
||||||
return Resource.getChild(self, path, request)
|
return Resource.getChild(self, path, request)
|
||||||
|
|
||||||
def render(self, request):
|
def render(self, request):
|
||||||
if request.getHeader('If-Modified-Since'):
|
if request.getHeader(b'If-Modified-Since'):
|
||||||
request.setResponseCode(NOT_MODIFIED)
|
request.setResponseCode(NOT_MODIFIED)
|
||||||
return b'<h1>Deluge HTTP Downloader tests webserver here</h1>'
|
return b'<h1>Deluge HTTP Downloader tests webserver here</h1>'
|
||||||
|
|
||||||
|
@ -139,7 +162,7 @@ class DownloadFileTestCase(unittest.TestCase):
|
||||||
return self.webserver.stopListening()
|
return self.webserver.stopListening()
|
||||||
|
|
||||||
def assertContains(self, filename, contents): # NOQA
|
def assertContains(self, filename, contents): # NOQA
|
||||||
with open(filename) as _file:
|
with open(filename, 'r', encoding='utf8') as _file:
|
||||||
try:
|
try:
|
||||||
self.assertEqual(_file.read(), contents)
|
self.assertEqual(_file.read(), contents)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
|
@ -147,7 +170,7 @@ class DownloadFileTestCase(unittest.TestCase):
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
def assertNotContains(self, filename, contents, file_mode=''): # NOQA
|
def assertNotContains(self, filename, contents, file_mode=''): # NOQA
|
||||||
with open(filename, file_mode) as _file:
|
with open(filename, 'r', encoding='utf8') as _file:
|
||||||
try:
|
try:
|
||||||
self.assertNotEqual(_file.read(), contents)
|
self.assertNotEqual(_file.read(), contents)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
|
@ -212,7 +235,7 @@ class DownloadFileTestCase(unittest.TestCase):
|
||||||
url = self.get_url('attachment')
|
url = self.get_url('attachment')
|
||||||
d = download_file(url, fname('original'))
|
d = download_file(url, fname('original'))
|
||||||
d.addCallback(self.assertEqual, fname('original'))
|
d.addCallback(self.assertEqual, fname('original'))
|
||||||
d.addCallback(self.assertContains, 'Attachement with no filename set')
|
d.addCallback(self.assertContains, 'Attachment with no filename set')
|
||||||
return d
|
return d
|
||||||
|
|
||||||
def test_download_with_rename_prevented(self):
|
def test_download_with_rename_prevented(self):
|
||||||
|
@ -264,3 +287,23 @@ class DownloadFileTestCase(unittest.TestCase):
|
||||||
d.addCallback(self.fail)
|
d.addCallback(self.fail)
|
||||||
d.addErrback(self.assertIsInstance, Failure)
|
d.addErrback(self.assertIsInstance, Failure)
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
def test_download_text_reencode_charset(self):
|
||||||
|
"""Re-encode as UTF-8 specified charset for text content-type header"""
|
||||||
|
url = self.get_url('attachment')
|
||||||
|
filepath = fname('test.txt')
|
||||||
|
headers = {'content-charset': 'Windows-1251', 'content-append': 'бвгде'}
|
||||||
|
d = download_file(url, filepath, headers=headers)
|
||||||
|
d.addCallback(self.assertEqual, filepath)
|
||||||
|
d.addCallback(self.assertContains, 'Attachment with no filename setбвгде')
|
||||||
|
return d
|
||||||
|
|
||||||
|
def test_download_binary_ignore_charset(self):
|
||||||
|
"""Ignore charset for binary content-type header e.g. torrent files"""
|
||||||
|
url = self.get_url('torrent')
|
||||||
|
headers = {'content-charset': 'Windows-1251'}
|
||||||
|
filepath = fname('test.torrent')
|
||||||
|
d = download_file(url, fname('test.torrent'), headers=headers)
|
||||||
|
d.addCallback(self.assertEqual, filepath)
|
||||||
|
d.addCallback(self.assertContains, 'Binary attachment ignore charset 世丕且\n')
|
||||||
|
return d
|
||||||
|
|
Loading…
Reference in New Issue