nim-chronos/chronos/apps/http/multipart.nim

414 lines
14 KiB
Nim

#
# Chronos HTTP/S multipart/form
# encoding and decoding helper procedures
# (c) Copyright 2019-Present
# Status Research & Development GmbH
#
# Licensed under either of
# Apache License, version 2.0, (LICENSE-APACHEv2)
# MIT license (LICENSE-MIT)
import std/[monotimes, strutils]
import stew/results
import ../../asyncloop
import ../../streams/[asyncstream, boundstream, chunkstream]
import httptable, httpcommon
export httptable, httpcommon, asyncstream
type
MultiPartSource* {.pure.} = enum
Stream, Buffer
MultiPartReader* = object
case kind: MultiPartSource
of MultiPartSource.Stream:
stream*: AsyncStreamReader
of MultiPartSource.Buffer:
discard
firstTime: bool
buffer: seq[byte]
offset: int
boundary: seq[byte]
counter: int
MultiPartReaderRef* = ref MultiPartReader
MultiPart* = object
case kind: MultiPartSource
of MultiPartSource.Stream:
stream: BoundedStreamReader
of MultiPartSource.Buffer:
discard
buffer: seq[byte]
headers: HttpTable
counter: int
name*: string
filename*: string
MultipartError* = object of HttpCriticalError
MultipartEoM* = object of MultipartError
MultipartIncorrectError* = object of MultipartError
MultipartIncompleteError* = object of MultipartError
MultipartReadError* = object of MultipartError
BChar* = byte | char
proc newMultipartReadError(msg: string): ref MultipartReadError =
newException(MultipartReadError, msg)
proc startsWith*(s, prefix: openarray[byte]): bool =
var i = 0
while true:
if i >= len(prefix): return true
if i >= len(s) or s[i] != prefix[i]: return false
inc(i)
proc parseUntil*(s, until: openarray[byte]): int =
var i = 0
while i < len(s):
if len(until) > 0 and s[i] == until[0]:
var u = 1
while i + u < len(s) and u < len(until) and s[i + u] == until[u]:
inc u
if u >= len(until): return i
inc(i)
-1
proc init*[A: BChar, B: BChar](mpt: typedesc[MultiPartReader],
buffer: openarray[A],
boundary: openarray[B]): MultiPartReader =
## Create new MultiPartReader instance with `buffer` interface.
##
## ``buffer`` - is buffer which will be used to read data.
## ``boundary`` - is multipart boundary, this value must not be empty.
doAssert(len(boundary) > 0)
# Our internal boundary has format `<CR><LF><-><-><boundary>`, so we can
# reuse different parts of this sequence for processing.
var fboundary = newSeq[byte](len(boundary) + 4)
fboundary[0] = 0x0D'u8
fboundary[1] = 0x0A'u8
fboundary[2] = byte('-')
fboundary[3] = byte('-')
copyMem(addr fboundary[4], unsafeAddr boundary[0], len(boundary))
# Make copy of buffer, because all the returned parts depending on it.
var buf = newSeq[byte](len(buffer))
if len(buf) > 0:
copyMem(addr buf[0], unsafeAddr buffer[0], len(buffer))
MultiPartReader(kind: MultiPartSource.Buffer,
buffer: buf, offset: 0, boundary: fboundary)
proc new*[B: BChar](mpt: typedesc[MultiPartReaderRef],
stream: AsyncStreamReader,
boundary: openarray[B],
partHeadersMaxSize = 4096): MultiPartReaderRef =
## Create new MultiPartReader instance with `stream` interface.
##
## ``stream`` is stream used to read data.
## ``boundary`` is multipart boundary, this value must not be empty.
## ``partHeadersMaxSize`` is maximum size of multipart's headers.
doAssert(len(boundary) > 0)
# Our internal boundary has format `<CR><LF><-><-><boundary>`, so we can
# reuse different parts of this sequence for processing.
var fboundary = newSeq[byte](len(boundary) + 4)
fboundary[0] = 0x0D'u8
fboundary[1] = 0x0A'u8
fboundary[2] = byte('-')
fboundary[3] = byte('-')
copyMem(addr fboundary[4], unsafeAddr boundary[0], len(boundary))
MultiPartReaderRef(kind: MultiPartSource.Stream, firstTime: true,
stream: stream, offset: 0, boundary: fboundary,
buffer: newSeq[byte](partHeadersMaxSize))
func setPartNames(part: var MultiPart): HttpResult[void] =
if part.headers.count("content-disposition") != 1:
return err("Content-Disposition header is incorrect")
var header = part.headers.getString("content-disposition")
let disp = parseDisposition(header, false)
if disp.failed():
return err("Content-Disposition header value is incorrect")
let dtype = disp.dispositionType(header.toOpenArrayByte(0, len(header) - 1))
if dtype.toLowerAscii() != "form-data":
return err("Content-Disposition type is incorrect")
for k, v in disp.fields(header.toOpenArrayByte(0, len(header) - 1)):
case k.toLowerAscii()
of "name":
part.name = v
of "filename":
part.filename = v
else:
discard
if len(part.name) == 0:
part.name = $part.counter
ok()
proc readPart*(mpr: MultiPartReaderRef): Future[MultiPart] {.async.} =
doAssert(mpr.kind == MultiPartSource.Stream)
if mpr.firstTime:
try:
# Read and verify initial <-><-><boundary><CR><LF>
await mpr.stream.readExactly(addr mpr.buffer[0], len(mpr.boundary) - 2)
mpr.firstTime = false
if not(startsWith(mpr.buffer.toOpenArray(0, len(mpr.boundary) - 3),
mpr.boundary.toOpenArray(2, len(mpr.boundary) - 1))):
raise newException(MultiPartIncorrectError,
"Unexpected boundary encountered")
except CancelledError as exc:
raise exc
except AsyncStreamIncompleteError:
raise newMultipartReadError("Error reading multipart message")
except AsyncStreamReadError:
raise newMultipartReadError("Error reading multipart message")
# Reading part's headers
try:
await mpr.stream.readExactly(addr mpr.buffer[0], 2)
if mpr.buffer[0] == byte('-') and mpr.buffer[1] == byte('-'):
raise newException(MultiPartEoM,
"End of multipart message")
if mpr.buffer[0] != 0x0D'u8 or mpr.buffer[1] != 0x0A'u8:
raise newException(MultiPartIncorrectError,
"Unexpected boundary suffix")
let res = await mpr.stream.readUntil(addr mpr.buffer[0], len(mpr.buffer),
HeadersMark)
var headersList = parseHeaders(mpr.buffer.toOpenArray(0, res - 1), false)
if headersList.failed():
raise newException(MultiPartIncorrectError,
"Incorrect part headers found")
var part = MultiPart(
kind: MultiPartSource.Stream,
headers: HttpTable.init(),
stream: newBoundedStreamReader(mpr.stream, -1, mpr.boundary),
counter: mpr.counter
)
inc(mpr.counter)
for k, v in headersList.headers(mpr.buffer.toOpenArray(0, res - 1)):
part.headers.add(k, v)
let sres = part.setPartNames()
if sres.isErr():
raise newException(MultiPartIncorrectError, sres.error)
return part
except CancelledError as exc:
raise exc
except AsyncStreamIncompleteError:
raise newMultipartReadError("Error reading multipart message")
except AsyncStreamLimitError:
raise newMultipartReadError("Multipart message headers size too big")
except AsyncStreamReadError:
raise newMultipartReadError("Error reading multipart message")
proc getBody*(mp: MultiPart): Future[seq[byte]] {.async.} =
## Get multipart's ``mp`` value as sequence of bytes.
case mp.kind
of MultiPartSource.Stream:
try:
let res = await mp.stream.read()
return res
except AsyncStreamError:
raise newException(MultipartReadError, "Could not read multipart body")
of MultiPartSource.Buffer:
return mp.buffer
proc consumeBody*(mp: MultiPart) {.async.} =
## Discard multipart's ``mp`` value.
case mp.kind
of MultiPartSource.Stream:
try:
await mp.stream.consume()
except AsyncStreamError:
raise newException(MultipartReadError, "Could not consume multipart body")
of MultiPartSource.Buffer:
discard
proc getBodyStream*(mp: MultiPart): HttpResult[AsyncStreamReader] =
## Get multipart's ``mp`` stream, which can be used to obtain value of the
## part.
case mp.kind
of MultiPartSource.Stream:
ok(mp.stream)
else:
err("Could not obtain stream from buffer-like part")
proc close*(mp: MultiPart) {.async.} =
## Close and release MultiPart's ``mp`` stream and resources.
case mp.kind
of MultiPartSource.Stream:
await closeWait(mp.stream)
else:
discard
proc close*(mpr: MultiPartReaderRef) {.async.} =
## Close and release MultiPartReader's ``mpr`` stream and resources.
case mpr.kind
of MultiPartSource.Stream:
await mpr.stream.closeWait()
else:
discard
proc getBytes*(mp: MultiPart): seq[byte] =
## Returns value for MultiPart ``mp`` as sequence of bytes.
case mp.kind
of MultiPartSource.Buffer:
mp.buffer
of MultiPartSource.Stream:
doAssert(not(mp.stream.atEof()), "Value is not obtained yet")
mp.buffer
proc getString*(mp: MultiPart): string =
## Returns value for MultiPart ``mp`` as string.
case mp.kind
of MultiPartSource.Buffer:
if len(mp.buffer) > 0:
var res = newString(len(mp.buffer))
copyMem(addr res[0], unsafeAddr mp.buffer[0], len(mp.buffer))
res
else:
""
of MultiPartSource.Stream:
doAssert(not(mp.stream.atEof()), "Value is not obtained yet")
if len(mp.buffer) > 0:
var res = newString(len(mp.buffer))
copyMem(addr res[0], unsafeAddr mp.buffer[0], len(mp.buffer))
res
else:
""
proc atEoM*(mpr: var MultiPartReader): bool =
## Procedure returns ``true`` if MultiPartReader has reached the end of
## multipart message.
case mpr.kind
of MultiPartSource.Buffer:
mpr.offset >= len(mpr.buffer)
of MultiPartSource.Stream:
mpr.stream.atEof()
proc atEoM*(mpr: MultiPartReaderRef): bool =
## Procedure returns ``true`` if MultiPartReader has reached the end of
## multipart message.
case mpr.kind
of MultiPartSource.Buffer:
mpr.offset >= len(mpr.buffer)
of MultiPartSource.Stream:
mpr.stream.atEof()
proc getPart*(mpr: var MultiPartReader): Result[MultiPart, string] =
## Get multipart part from MultiPartReader instance.
##
## This procedure will work only for MultiPartReader with buffer source.
doAssert(mpr.kind == MultiPartSource.Buffer)
if mpr.offset >= len(mpr.buffer):
return err("End of multipart form encountered")
if startsWith(mpr.buffer.toOpenArray(mpr.offset, len(mpr.buffer) - 1),
mpr.boundary.toOpenArray(2, len(mpr.boundary) - 1)):
# Buffer must start at <-><-><boundary>
mpr.offset += (len(mpr.boundary) - 2)
# After boundary there should be at least 2 symbols <-><-> or <CR><LF>.
if len(mpr.buffer) <= mpr.offset + 1:
return err("Incomplete multipart form")
if mpr.buffer[mpr.offset] == byte('-') and
mpr.buffer[mpr.offset + 1] == byte('-'):
# If we have <-><-><boundary><-><-> it means we have found last boundary
# of multipart message.
mpr.offset += 2
return err("End of multipart form encountered")
if mpr.buffer[mpr.offset] == 0x0D'u8 and
mpr.buffer[mpr.offset + 1] == 0x0A'u8:
# If we have <-><-><boundary><CR><LF> it means that we have found another
# part of multipart message.
mpr.offset += 2
# Multipart form must always have at least single Content-Disposition
# header, so we searching position where all the headers should be
# finished <CR><LF><CR><LF>.
let pos1 = parseUntil(
mpr.buffer.toOpenArray(mpr.offset, len(mpr.buffer) - 1),
[0x0D'u8, 0x0A'u8, 0x0D'u8, 0x0A'u8]
)
if pos1 < 0:
return err("Incomplete multipart form")
# parseUntil returns 0-based position without `until` sequence.
let start = mpr.offset + pos1 + 4
# Multipart headers position
let hstart = mpr.offset
let hfinish = mpr.offset + pos1 + 4 - 1
let headersList = parseHeaders(mpr.buffer.toOpenArray(hstart, hfinish),
false)
if headersList.failed():
return err("Incorrect or incomplete multipart headers received")
# Searching for value's boundary <CR><LF><-><-><boundary>.
let pos2 = parseUntil(
mpr.buffer.toOpenArray(start, len(mpr.buffer) - 1),
mpr.boundary.toOpenArray(0, len(mpr.boundary) - 1)
)
if pos2 < 0:
return err("Incomplete multipart form")
# We set reader's offset to the place right after <CR><LF>
mpr.offset = start + pos2 + 2
var part = MultiPart(
kind: MultiPartSource.Buffer,
headers: HttpTable.init(),
buffer: @(mpr.buffer.toOpenArray(start, start + pos2 - 1)),
counter: mpr.counter
)
inc(mpr.counter)
for k, v in headersList.headers(mpr.buffer.toOpenArray(hstart, hfinish)):
part.headers.add(k, v)
? part.setPartNames()
ok(part)
else:
err("Incorrect multipart form")
else:
err("Incorrect multipart form")
func getMultipartBoundary*(ch: openarray[string]): HttpResult[string] =
## Returns ``multipart/form-data`` boundary value from ``Content-Type``
## header.
##
## The procedure carries out all the necessary checks:
## 1) There should be single `Content-Type` header value in headers.
## 2) `Content-Type` must be ``multipart/form-data``.
## 3) `boundary` value must be present
## 4) `boundary` value must be less then 70 characters length and
## all characters should be part of alphabet.
if len(ch) > 1:
err("Multiple Content-Type headers found")
else:
if len(ch) == 0:
err("Content-Type header is missing")
else:
let mparts = ch[0].split(";")
if strip(mparts[0]).toLowerAscii() != "multipart/form-data":
return err("Content-Type is not multipart")
if len(mparts) < 2:
return err("Content-Type missing boundary value")
let stripped = strip(mparts[1])
if not(stripped.toLowerAscii().startsWith("boundary")):
return err("Incorrect Content-Type boundary format")
let bparts = stripped.split("=")
if len(bparts) < 2:
err("Missing Content-Type boundary")
else:
let candidate = strip(bparts[1])
if len(candidate) > 70:
err("Content-Type boundary must be less then 70 characters")
else:
for ch in candidate:
if ch notin {'a'..'z', 'A' .. 'Z', '0' .. '9',
'\'' .. ')', '+' .. '/', ':', '=', '?', '_'}:
return err("Content-Type boundary alphabat incorrect")
ok(candidate)