Cleanup docstrings in httpdownloader

Use the new google docstring style.
Keep line length to 80 chars and new lines for mult-line func params.
This commit is contained in:
Calum Lind 2018-07-14 11:13:02 +01:00
parent c7e61f8c34
commit 8bfa2cacbb
1 changed files with 113 additions and 83 deletions

View File

@ -30,31 +30,29 @@ log = logging.getLogger(__name__)
class CompressionDecoder(client.GzipDecoder): class CompressionDecoder(client.GzipDecoder):
"""A compression decoder for gzip, x-gzip and deflate""" """A compression decoder for gzip, x-gzip and deflate."""
def deliverBody(self, protocol): # NOQA: N802 def deliverBody(self, protocol): # NOQA: N802
self.original.deliverBody(CompressionDecoderProtocol(protocol, self.original)) self.original.deliverBody(
CompressionDecoderProtocol(protocol, self.original))
class CompressionDecoderProtocol(client._GzipProtocol): class CompressionDecoderProtocol(client._GzipProtocol):
"""A compression decoder protocol for CompressionDecoder""" """A compression decoder protocol for CompressionDecoder."""
def __init__(self, protocol, response): def __init__(self, protocol, response):
super(CompressionDecoderProtocol, self).__init__(protocol, response) super(CompressionDecoderProtocol, self).__init__(protocol, response)
self._zlibDecompress = zlib.decompressobj(32 + zlib.MAX_WBITS) self._zlibDecompress = zlib.decompressobj(32 + zlib.MAX_WBITS)
class BodyHandler(HTTPClientParser, object): class BodyHandler(HTTPClientParser, object):
"""An HTTP parser that saves the response on a file""" """An HTTP parser that saves the response to a file."""
def __init__(self, request, finished, length, agent): def __init__(self, request, finished, length, agent):
""" """BodyHandler init.
:param request: the request to which this parser is for Args:
:type request: twisted.web.iweb.IClientRequest request (t.w.i.IClientRequest): The parser request.
:param finished: a Deferred to handle the the finished response finished (Deferred): A Deferred to handle the finished response.
:type finished: twisted.internet.defer.Deferred length (int): The length of the response.
:param length: the length of the response agent (t.w.i.IAgent): The agent from which the request was sent.
:type length: int
:param agent: the agent from which the request was sent
:type agent: twisted.web.iweb.IAgent
""" """
super(BodyHandler, self).__init__(request, finished) super(BodyHandler, self).__init__(request, finished)
self.agent = agent self.agent = agent
@ -67,7 +65,8 @@ class BodyHandler(HTTPClientParser, object):
self.current_length += len(data) self.current_length += len(data)
self.data += data self.data += data
if self.agent.part_callback: if self.agent.part_callback:
self.agent.part_callback(data, self.current_length, self.total_length) self.agent.part_callback(
data, self.current_length, self.total_length)
def connectionLost(self, reason): # NOQA: N802 def connectionLost(self, reason): # NOQA: N802
with open(self.agent.filename, 'wb') as _file: with open(self.agent.filename, 'wb') as _file:
@ -79,23 +78,26 @@ class BodyHandler(HTTPClientParser, object):
@implementer(IAgent) @implementer(IAgent)
class HTTPDownloaderAgent(object): class HTTPDownloaderAgent(object):
""" """A File Downloader Agent."""
A File Downloader Agent
"""
def __init__( def __init__(
self, agent, filename, part_callback=None, self,
force_filename=False, allow_compression=True, handle_redirect=True, agent,
filename,
part_callback=None,
force_filename=False,
allow_compression=True,
handle_redirect=True,
): ):
""" """HTTPDownloaderAgent init.
:param agent: the agent which will send the requests
:type agent: twisted.web.client.Agent Args:
:param filename: the filename to save the file as agent (t.w.c.Agent): The agent which will send the requests.
:type filename: string filename (str): The filename to save the file as.
:param force_filename: forces use of the supplied filename, regardless of header content force_filename (bool): Forces use of the supplied filename,
:type force_filename: bool regardless of header content.
:param part_callback: a function to be called when a part of data part_callback (func): A function to be called when a part of data
is received, it's signature should be: func(data, current_length, total_length) is received, it's signature should be:
:type part_callback: function func(data, current_length, total_length)
""" """
self.handle_redirect = handle_redirect self.handle_redirect = handle_redirect
@ -120,15 +122,21 @@ class HTTPDownloaderAgent(object):
finished.errback(Failure(error)) finished.errback(Failure(error))
else: else:
headers = response.headers headers = response.headers
body_length = int(headers.getRawHeaders(b'content-length', default=[0])[0]) body_length = int(
headers.getRawHeaders(b'content-length', default=[0])[0])
if headers.hasHeader(b'content-disposition') and not self.force_filename: if (
content_disp = headers.getRawHeaders(b'content-disposition')[0].decode('utf-8') headers.hasHeader(b'content-disposition')
and not self.force_filename
):
content_disp = headers.getRawHeaders(
b'content-disposition')[0].decode('utf-8')
content_disp_params = cgi.parse_header(content_disp)[1] content_disp_params = cgi.parse_header(content_disp)[1]
if 'filename' in content_disp_params: if 'filename' in content_disp_params:
new_file_name = content_disp_params['filename'] new_file_name = content_disp_params['filename']
new_file_name = sanitise_filename(new_file_name) new_file_name = sanitise_filename(new_file_name)
new_file_name = os.path.join(os.path.split(self.filename)[0], new_file_name) new_file_name = os.path.join(
os.path.split(self.filename)[0], new_file_name)
count = 1 count = 1
fileroot = os.path.splitext(new_file_name)[0] fileroot = os.path.splitext(new_file_name)[0]
@ -140,20 +148,22 @@ class HTTPDownloaderAgent(object):
self.filename = new_file_name self.filename = new_file_name
response.deliverBody(BodyHandler(response.request, finished, body_length, self)) response.deliverBody(
BodyHandler(response.request, finished, body_length, self))
return finished return finished
def request(self, method, uri, headers=None, body_producer=None): def request(self, method, uri, headers=None, body_producer=None):
""" """Issue a new request to the wrapped agent.
:param method: the HTTP method to use Args:
:param uri: the url to download from method (bytes): The HTTP method to use.
:type uri: string uri (bytes): The url to download from.
:param headers: any optional headers to send headers (t.w.h.Headers, optional): Any extra headers to send.
:type headers: twisted.web.http_headers.Headers body_producer (t.w.i.IBodyProducer, optional): Request body data.
:param body_producer:
:return: Returns:
Deferred: The filename of the of the downloaded file.
""" """
if headers is None: if headers is None:
headers = Headers() headers = Headers()
@ -174,14 +184,14 @@ class HTTPDownloaderAgent(object):
def sanitise_filename(filename): def sanitise_filename(filename):
""" """Sanitises a filename to use as a download destination file.
Sanitises a filename to use as a download destination file.
Logs any filenames that could be considered malicious. Logs any filenames that could be considered malicious.
:param filename: the filename to sanitise filename (str): The filename to sanitise.
:type filename: string
:returns: the sanitised filename Returns:
:rtype: string str: The sanitised filename.
""" """
# Remove any quotes # Remove any quotes
@ -189,43 +199,52 @@ def sanitise_filename(filename):
if os.path.basename(filename) != filename: if os.path.basename(filename) != filename:
# Dodgy server, log it # Dodgy server, log it
log.warning('Potentially malicious server: trying to write to file: %s', filename) log.warning(
'Potentially malicious server: trying to write to file: %s',
filename,
)
# Only use the basename # Only use the basename
filename = os.path.basename(filename) filename = os.path.basename(filename)
filename = filename.strip() filename = filename.strip()
if filename.startswith('.') or ';' in filename or '|' in filename: if filename.startswith('.') or ';' in filename or '|' in filename:
# Dodgy server, log it # Dodgy server, log it
log.warning('Potentially malicious server: trying to write to file: %s', filename) log.warning(
'Potentially malicious server: trying to write to file: %s',
filename,
)
return filename return filename
def _download_file( def _download_file(
url, filename, callback=None, headers=None, url, filename,
force_filename=False, allow_compression=True, handle_redirects=True, callback=None,
headers=None,
force_filename=False,
allow_compression=True,
handle_redirects=True,
): ):
""" """Downloads a file from a specific URL and returns a Deferred.
Downloads a file from a specific URL and returns a Deferred. A callback
function can be specified to be called as parts are received. A callback function can be specified to be called as parts are received.
Args: Args:
url (str): The url to download from url (str): The url to download from.
filename (str): The filename to save the file as filename (str): The filename to save the file as.
callback (func): A function to be called when a part of data is received, callback (func): A function to be called when partial data is received,
it's signature should be: func(data, current_length, total_length) it's signature should be: func(data, current_length, total_length)
headers (dict): Any optional headers to send headers (dict): Any optional headers to send.
force_filename (bool): force us to use the filename specified rather than force_filename (bool): Force using the filename specified rather than
one the server may suggest one the server may suggest.
allow_compression (bool): Allows gzip & deflate decoding allow_compression (bool): Allows gzip & deflate decoding.
Returns: Returns:
Deferred: the filename of the downloaded file Deferred: The filename of the downloaded file.
Raises: Raises:
t.w.e.PageRedirect t.w.e.PageRedirect
t.w.e.Error: for all other HTTP response errors t.w.e.Error: for all other HTTP response errors
""" """
agent = client.Agent(reactor) agent = client.Agent(reactor)
@ -237,7 +256,14 @@ def _download_file(
if handle_redirects: if handle_redirects:
agent = client.RedirectAgent(agent) agent = client.RedirectAgent(agent)
agent = HTTPDownloaderAgent(agent, filename, callback, force_filename, allow_compression, handle_redirects) agent = HTTPDownloaderAgent(
agent,
filename,
callback,
force_filename,
allow_compression,
handle_redirects,
)
# The Headers init expects dict values to be a list. # The Headers init expects dict values to be a list.
if headers: if headers:
@ -249,31 +275,35 @@ def _download_file(
def download_file( def download_file(
url, filename, callback=None, headers=None, force_filename=False, url,
allow_compression=True, handle_redirects=True, filename,
callback=None,
headers=None,
force_filename=False,
allow_compression=True,
handle_redirects=True,
): ):
""" """Downloads a file from a specific URL and returns a Deferred.
Downloads a file from a specific URL and returns a Deferred. A callback
function can be specified to be called as parts are received. A callback function can be specified to be called as parts are received.
Args: Args:
url (str): The url to download from url (str): The url to download from.
filename (str): The filename to save the file as filename (str): The filename to save the file as.
callback (func): A function to be called when a part of data is received, callback (func): A function to be called when partial data is received,
it's signature should be: func(data, current_length, total_length) it's signature should be: func(data, current_length, total_length).
headers (dict): Any optional headers to send headers (dict): Any optional headers to send.
force_filename (bool): force us to use the filename specified rather than force_filename (bool): Force the filename specified rather than one the
one the server may suggest server may suggest.
allow_compression (bool): Allows gzip & deflate decoding allow_compression (bool): Allows gzip & deflate decoding.
handle_redirects (bool): If HTTP redirects should be handled automatically handle_redirects (bool): HTTP redirects handled automatically or not.
Returns: Returns:
Deferred: the filename of the downloaded file Deferred: The filename of the downloaded file.
Raises: Raises:
t.w.e.PageRedirect: Unless handle_redirects=True t.w.e.PageRedirect: If handle_redirects is False.
t.w.e.Error: for all other HTTP response errors t.w.e.Error: For all other HTTP response errors.
""" """
def on_download_success(result): def on_download_success(result):
log.debug('Download success!') log.debug('Download success!')