implement UTF8 handling (#35)
* implement UTF8 handling or to be precisely, we add UTF8 validator which main duty is to detect malformed UTF8 sequence using a fast DFA UTF8 decoder. also enable autobahn UTF8 category tests, much more green :) fixes #13 * fixes case 7.5.1 Send a close frame with invalid UTF8 payload * add tests for validateUTF8 - tests for validateUTF8 in raw mode - tests for validateUTF8 in websocket client/server
This commit is contained in:
parent
93f0aba685
commit
90c664545d
|
@ -7,6 +7,6 @@
|
|||
}
|
||||
],
|
||||
"cases": ["*"],
|
||||
"exclude-cases": ["6.*", "9.*", "12.*", "13.*"],
|
||||
"exclude-cases": ["9.*", "12.*", "13.*"],
|
||||
"exclude-agent-cases": {}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
{. warning[UnusedImport]:off .}
|
||||
|
||||
import ./testframes
|
||||
import ./testwebsockets
|
||||
import ./testtlswebsockets
|
||||
import ./testutf8
|
||||
|
|
|
@ -0,0 +1,229 @@
|
|||
## nim-ws
|
||||
## Copyright (c) 2021 Status Research & Development GmbH
|
||||
## Licensed under either of
|
||||
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
|
||||
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
|
||||
## at your option.
|
||||
## This file may not be copied, modified, or distributed except according to
|
||||
## those terms.
|
||||
|
||||
import
|
||||
std/[strutils],
|
||||
pkg/[
|
||||
stew/byteutils,
|
||||
asynctest,
|
||||
chronos,
|
||||
chronos/apps/http/httpserver,
|
||||
chronicles
|
||||
],
|
||||
../ws/[ws, utf8_dfa]
|
||||
|
||||
suite "UTF-8 DFA validator":
|
||||
test "single octet":
|
||||
check:
|
||||
validateUTF8("\x01")
|
||||
validateUTF8("\x32")
|
||||
validateUTF8("\x7f")
|
||||
validateUTF8("\x80") == false
|
||||
|
||||
test "two octets":
|
||||
check:
|
||||
validateUTF8("\xc2\x80")
|
||||
validateUTF8("\xc4\x80")
|
||||
validateUTF8("\xdf\xbf")
|
||||
validateUTF8("\xdfu\xc0") == false
|
||||
validateUTF8("\xdf") == false
|
||||
|
||||
test "three octets":
|
||||
check:
|
||||
validateUTF8("\xe0\xa0\x80")
|
||||
validateUTF8("\xe1\x80\x80")
|
||||
validateUTF8("\xef\xbf\xbf")
|
||||
validateUTF8("\xef\xbf\xc0") == false
|
||||
validateUTF8("\xef\xbf") == false
|
||||
|
||||
test "four octets":
|
||||
check:
|
||||
validateUTF8("\xf0\x90\x80\x80")
|
||||
validateUTF8("\xf0\x92\x80\x80")
|
||||
validateUTF8("\xf0\x9f\xbf\xbf")
|
||||
validateUTF8("\xf0\x9f\xbf\xc0") == false
|
||||
validateUTF8("\xf0\x9f\xbf") == false
|
||||
|
||||
test "overlong sequence":
|
||||
check:
|
||||
validateUTF8("\xc0\xaf") == false
|
||||
validateUTF8("\xe0\x80\xaf") == false
|
||||
validateUTF8("\xf0\x80\x80\xaf") == false
|
||||
validateUTF8("\xf8\x80\x80\x80\xaf") == false
|
||||
validateUTF8("\xfc\x80\x80\x80\x80\xaf") == false
|
||||
|
||||
test "max overlong sequence":
|
||||
check:
|
||||
validateUTF8("\xc1\xbf") == false
|
||||
validateUTF8("\xe0\x9f\xbf") == false
|
||||
validateUTF8("\xf0\x8f\xbf\xbf") == false
|
||||
validateUTF8("\xf8\x87\xbf\xbf\xbf") == false
|
||||
validateUTF8("\xfc\x83\xbf\xbf\xbf\xbf") == false
|
||||
|
||||
test "distinct codepoint":
|
||||
check:
|
||||
validateUTF8("foobar")
|
||||
validateUTF8("foob\xc3\xa6r")
|
||||
validateUTF8("foob\xf0\x9f\x99\x88r")
|
||||
|
||||
proc waitForClose(ws: WSSession) {.async.} =
|
||||
try:
|
||||
while ws.readystate != ReadyState.Closed:
|
||||
discard await ws.recv()
|
||||
except CatchableError:
|
||||
debug "Closing websocket"
|
||||
|
||||
# TODO: use new test framework from dryajov
|
||||
# if it is ready.
|
||||
var server: HttpServerRef
|
||||
let address = initTAddress("127.0.0.1:8888")
|
||||
|
||||
suite "UTF-8 validator in action":
|
||||
teardown:
|
||||
await server.stop()
|
||||
await server.closeWait()
|
||||
|
||||
test "valid UTF-8 sequence":
|
||||
let testData = "hello world"
|
||||
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||
if r.isErr():
|
||||
return dumbResponse()
|
||||
|
||||
let request = r.get()
|
||||
check request.uri.path == "/ws"
|
||||
|
||||
let server = WSServer.new(protos = ["proto"])
|
||||
let ws = await server.handleRequest(request)
|
||||
|
||||
let res = await ws.recv()
|
||||
check:
|
||||
string.fromBytes(res) == testData
|
||||
ws.binary == false
|
||||
|
||||
await waitForClose(ws)
|
||||
|
||||
let res = HttpServerRef.new(address, process)
|
||||
server = res.get()
|
||||
server.start()
|
||||
|
||||
let wsClient = await WebSocket.connect(
|
||||
"127.0.0.1",
|
||||
Port(8888),
|
||||
path = "/ws",
|
||||
protocols = @["proto"],
|
||||
)
|
||||
|
||||
await wsClient.send(testData)
|
||||
await wsClient.close()
|
||||
|
||||
test "valid UTF-8 sequence in close reason":
|
||||
let testData = "hello world"
|
||||
let closeReason = "i want to close"
|
||||
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||
if r.isErr():
|
||||
return dumbResponse()
|
||||
|
||||
let request = r.get()
|
||||
check request.uri.path == "/ws"
|
||||
|
||||
proc onClose(status: Status, reason: string): CloseResult{.gcsafe,
|
||||
raises: [Defect].} =
|
||||
try:
|
||||
check status == Status.Fulfilled
|
||||
check reason == closeReason
|
||||
return (status, reason)
|
||||
except Exception as exc:
|
||||
raise newException(Defect, exc.msg)
|
||||
|
||||
let server = WSServer.new(protos = ["proto"], onClose = onClose)
|
||||
let ws = await server.handleRequest(request)
|
||||
|
||||
let res = await ws.recv()
|
||||
check:
|
||||
string.fromBytes(res) == testData
|
||||
ws.binary == false
|
||||
|
||||
await waitForClose(ws)
|
||||
|
||||
let res = HttpServerRef.new(address, process)
|
||||
server = res.get()
|
||||
server.start()
|
||||
|
||||
let wsClient = await WebSocket.connect(
|
||||
"127.0.0.1",
|
||||
Port(8888),
|
||||
path = "/ws",
|
||||
protocols = @["proto"],
|
||||
)
|
||||
|
||||
await wsClient.send(testData)
|
||||
await wsClient.close(reason = closeReason)
|
||||
|
||||
test "invalid UTF-8 sequence":
|
||||
# TODO: how to check for Invalid UTF8 exception?
|
||||
let testData = "hello world\xc0\xaf"
|
||||
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||
if r.isErr():
|
||||
return dumbResponse()
|
||||
|
||||
let request = r.get()
|
||||
check request.uri.path == "/ws"
|
||||
|
||||
let server = WSServer.new(protos = ["proto"])
|
||||
let ws = await server.handleRequest(request)
|
||||
|
||||
let res = HttpServerRef.new(address, process)
|
||||
server = res.get()
|
||||
server.start()
|
||||
|
||||
let wsClient = await WebSocket.connect(
|
||||
"127.0.0.1",
|
||||
Port(8888),
|
||||
path = "/ws",
|
||||
protocols = @["proto"]
|
||||
)
|
||||
|
||||
await wsClient.send(testData)
|
||||
await waitForClose(wsClient)
|
||||
check wsClient.readyState == ReadyState.Closed
|
||||
|
||||
test "invalid UTF-8 sequence close code":
|
||||
# TODO: how to check for Invalid UTF8 exception?
|
||||
let testData = "hello world"
|
||||
let closeReason = "i want to close\xc0\xaf"
|
||||
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||
if r.isErr():
|
||||
return dumbResponse()
|
||||
|
||||
let request = r.get()
|
||||
check request.uri.path == "/ws"
|
||||
|
||||
let server = WSServer.new(protos = ["proto"])
|
||||
let ws = await server.handleRequest(request)
|
||||
|
||||
let res = await ws.recv()
|
||||
check:
|
||||
string.fromBytes(res) == testData
|
||||
ws.binary == false
|
||||
|
||||
let res = HttpServerRef.new(address, process)
|
||||
server = res.get()
|
||||
server.start()
|
||||
|
||||
let wsClient = await WebSocket.connect(
|
||||
"127.0.0.1",
|
||||
Port(8888),
|
||||
path = "/ws",
|
||||
protocols = @["proto"]
|
||||
)
|
||||
|
||||
await wsClient.send(testData)
|
||||
await wsClient.close(reason = closeReason)
|
||||
await waitForClose(wsClient)
|
||||
check wsClient.readyState == ReadyState.Closed
|
|
@ -10,7 +10,7 @@
|
|||
{.push raises: [Defect].}
|
||||
|
||||
import pkg/[chronos, chronicles, stew/byteutils, stew/endians2]
|
||||
import ./types, ./frame, ./utils, ./stream
|
||||
import ./types, ./frame, ./utils, ./stream, ./utf8_dfa
|
||||
|
||||
import pkg/chronos/[
|
||||
streams/asyncstream,
|
||||
|
@ -132,6 +132,9 @@ proc handleClose*(
|
|||
# remining payload bytes are reason for closing
|
||||
reason = string.fromBytes(payLoad[2..payLoad.high])
|
||||
|
||||
if not ws.binary and validateUTF8(reason) == false:
|
||||
raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected in close reason")
|
||||
|
||||
var rcode: Status
|
||||
if code in {Status.Fulfilled}:
|
||||
rcode = Status.Fulfilled
|
||||
|
@ -296,6 +299,9 @@ proc recv*(
|
|||
consumed += read
|
||||
ws.frame.consumed += read.uint64
|
||||
|
||||
if not ws.binary and validateUTF8(pbuffer.toOpenArray(0, consumed - 1)) == false:
|
||||
raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected")
|
||||
|
||||
return consumed.int
|
||||
|
||||
except WebSocketError as exc:
|
||||
|
|
|
@ -125,6 +125,7 @@ type
|
|||
WSInvalidCloseCodeError* = object of WebSocketError
|
||||
WSPayloadLengthError* = object of WebSocketError
|
||||
WSInvalidOpcodeError* = object of WebSocketError
|
||||
WSInvalidUTF8* = object of WebSocketError
|
||||
|
||||
proc `name=`*(self: Extension, name: string) =
|
||||
raiseAssert "Can't change extensions name!"
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
## nim-ws
|
||||
## Copyright (c) 2021 Status Research & Development GmbH
|
||||
## Licensed under either of
|
||||
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
|
||||
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
|
||||
## at your option.
|
||||
## This file may not be copied, modified, or distributed except according to
|
||||
## those terms.
|
||||
|
||||
# DFA based UTF8 decoder/validator
|
||||
# See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
||||
|
||||
const
|
||||
UTF8_ACCEPT* = 0
|
||||
UTF8_REJECT* = 1
|
||||
|
||||
const utf8Table = [
|
||||
0'u8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
|
||||
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
|
||||
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
|
||||
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
|
||||
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
|
||||
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
|
||||
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
|
||||
]
|
||||
|
||||
proc validateUTF8*[T: byte | char](text: openArray[T]): bool =
|
||||
var state = 0
|
||||
for c in text:
|
||||
let x = utf8Table[c.int].int
|
||||
state = utf8Table[256 + state*16 + x].int
|
||||
state == UTF8_ACCEPT
|
Loading…
Reference in New Issue