From 18d448d4a54b2fd51e0eec084ea42f09f073a514 Mon Sep 17 00:00:00 2001 From: Calum Lind Date: Sun, 29 Jul 2018 07:17:14 +0100 Subject: [PATCH] [Py2to3] Ensure httpdownloader saves data as UTF-8 Python 3 raised a decoding error with the google page which appears to be encoded with 'latin-1', so extract the content charset to decode and re-encode in 'utf-8'. --- deluge/httpdownloader.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/deluge/httpdownloader.py b/deluge/httpdownloader.py index f8d1e2a0e..ad0d2f80a 100644 --- a/deluge/httpdownloader.py +++ b/deluge/httpdownloader.py @@ -45,7 +45,7 @@ class CompressionDecoderProtocol(client._GzipProtocol): class BodyHandler(HTTPClientParser, object): """An HTTP parser that saves the response to a file.""" - def __init__(self, request, finished, length, agent): + def __init__(self, request, finished, length, agent, encoding=None): """BodyHandler init. Args: @@ -60,6 +60,7 @@ class BodyHandler(HTTPClientParser, object): self.total_length = length self.current_length = 0 self.data = b'' + self.encoding = encoding def dataReceived(self, data): # NOQA: N802 self.current_length += len(data) @@ -69,6 +70,8 @@ class BodyHandler(HTTPClientParser, object): data, self.current_length, self.total_length) def connectionLost(self, reason): # NOQA: N802 + if self.encoding: + self.data = self.data.decode(self.encoding).encode('utf8') with open(self.agent.filename, 'wb') as _file: _file.write(self.data) self.finished.callback(self.agent.filename) @@ -148,8 +151,17 @@ class HTTPDownloaderAgent(object): self.filename = new_file_name + cont_type = headers.getRawHeaders(b'content-type')[0].decode() + params = cgi.parse_header(cont_type)[1] + encoding = params.get('charset', None) response.deliverBody( - BodyHandler(response.request, finished, body_length, self)) + BodyHandler( + response.request, + finished, + body_length, + self, + encoding, + )) return finished