implement UTF8 handling (#35)
* implement UTF8 handling or to be precisely, we add UTF8 validator which main duty is to detect malformed UTF8 sequence using a fast DFA UTF8 decoder. also enable autobahn UTF8 category tests, much more green :) fixes #13 * fixes case 7.5.1 Send a close frame with invalid UTF8 payload * add tests for validateUTF8 - tests for validateUTF8 in raw mode - tests for validateUTF8 in websocket client/server
This commit is contained in:
parent
93f0aba685
commit
90c664545d
|
@ -7,6 +7,6 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"cases": ["*"],
|
"cases": ["*"],
|
||||||
"exclude-cases": ["6.*", "9.*", "12.*", "13.*"],
|
"exclude-cases": ["9.*", "12.*", "13.*"],
|
||||||
"exclude-agent-cases": {}
|
"exclude-agent-cases": {}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
{. warning[UnusedImport]:off .}
|
||||||
|
|
||||||
import ./testframes
|
import ./testframes
|
||||||
import ./testwebsockets
|
import ./testwebsockets
|
||||||
import ./testtlswebsockets
|
import ./testtlswebsockets
|
||||||
|
import ./testutf8
|
||||||
|
|
|
@ -0,0 +1,229 @@
|
||||||
|
## nim-ws
|
||||||
|
## Copyright (c) 2021 Status Research & Development GmbH
|
||||||
|
## Licensed under either of
|
||||||
|
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
|
||||||
|
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
|
||||||
|
## at your option.
|
||||||
|
## This file may not be copied, modified, or distributed except according to
|
||||||
|
## those terms.
|
||||||
|
|
||||||
|
import
|
||||||
|
std/[strutils],
|
||||||
|
pkg/[
|
||||||
|
stew/byteutils,
|
||||||
|
asynctest,
|
||||||
|
chronos,
|
||||||
|
chronos/apps/http/httpserver,
|
||||||
|
chronicles
|
||||||
|
],
|
||||||
|
../ws/[ws, utf8_dfa]
|
||||||
|
|
||||||
|
suite "UTF-8 DFA validator":
|
||||||
|
test "single octet":
|
||||||
|
check:
|
||||||
|
validateUTF8("\x01")
|
||||||
|
validateUTF8("\x32")
|
||||||
|
validateUTF8("\x7f")
|
||||||
|
validateUTF8("\x80") == false
|
||||||
|
|
||||||
|
test "two octets":
|
||||||
|
check:
|
||||||
|
validateUTF8("\xc2\x80")
|
||||||
|
validateUTF8("\xc4\x80")
|
||||||
|
validateUTF8("\xdf\xbf")
|
||||||
|
validateUTF8("\xdfu\xc0") == false
|
||||||
|
validateUTF8("\xdf") == false
|
||||||
|
|
||||||
|
test "three octets":
|
||||||
|
check:
|
||||||
|
validateUTF8("\xe0\xa0\x80")
|
||||||
|
validateUTF8("\xe1\x80\x80")
|
||||||
|
validateUTF8("\xef\xbf\xbf")
|
||||||
|
validateUTF8("\xef\xbf\xc0") == false
|
||||||
|
validateUTF8("\xef\xbf") == false
|
||||||
|
|
||||||
|
test "four octets":
|
||||||
|
check:
|
||||||
|
validateUTF8("\xf0\x90\x80\x80")
|
||||||
|
validateUTF8("\xf0\x92\x80\x80")
|
||||||
|
validateUTF8("\xf0\x9f\xbf\xbf")
|
||||||
|
validateUTF8("\xf0\x9f\xbf\xc0") == false
|
||||||
|
validateUTF8("\xf0\x9f\xbf") == false
|
||||||
|
|
||||||
|
test "overlong sequence":
|
||||||
|
check:
|
||||||
|
validateUTF8("\xc0\xaf") == false
|
||||||
|
validateUTF8("\xe0\x80\xaf") == false
|
||||||
|
validateUTF8("\xf0\x80\x80\xaf") == false
|
||||||
|
validateUTF8("\xf8\x80\x80\x80\xaf") == false
|
||||||
|
validateUTF8("\xfc\x80\x80\x80\x80\xaf") == false
|
||||||
|
|
||||||
|
test "max overlong sequence":
|
||||||
|
check:
|
||||||
|
validateUTF8("\xc1\xbf") == false
|
||||||
|
validateUTF8("\xe0\x9f\xbf") == false
|
||||||
|
validateUTF8("\xf0\x8f\xbf\xbf") == false
|
||||||
|
validateUTF8("\xf8\x87\xbf\xbf\xbf") == false
|
||||||
|
validateUTF8("\xfc\x83\xbf\xbf\xbf\xbf") == false
|
||||||
|
|
||||||
|
test "distinct codepoint":
|
||||||
|
check:
|
||||||
|
validateUTF8("foobar")
|
||||||
|
validateUTF8("foob\xc3\xa6r")
|
||||||
|
validateUTF8("foob\xf0\x9f\x99\x88r")
|
||||||
|
|
||||||
|
proc waitForClose(ws: WSSession) {.async.} =
|
||||||
|
try:
|
||||||
|
while ws.readystate != ReadyState.Closed:
|
||||||
|
discard await ws.recv()
|
||||||
|
except CatchableError:
|
||||||
|
debug "Closing websocket"
|
||||||
|
|
||||||
|
# TODO: use new test framework from dryajov
|
||||||
|
# if it is ready.
|
||||||
|
var server: HttpServerRef
|
||||||
|
let address = initTAddress("127.0.0.1:8888")
|
||||||
|
|
||||||
|
suite "UTF-8 validator in action":
|
||||||
|
teardown:
|
||||||
|
await server.stop()
|
||||||
|
await server.closeWait()
|
||||||
|
|
||||||
|
test "valid UTF-8 sequence":
|
||||||
|
let testData = "hello world"
|
||||||
|
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||||
|
if r.isErr():
|
||||||
|
return dumbResponse()
|
||||||
|
|
||||||
|
let request = r.get()
|
||||||
|
check request.uri.path == "/ws"
|
||||||
|
|
||||||
|
let server = WSServer.new(protos = ["proto"])
|
||||||
|
let ws = await server.handleRequest(request)
|
||||||
|
|
||||||
|
let res = await ws.recv()
|
||||||
|
check:
|
||||||
|
string.fromBytes(res) == testData
|
||||||
|
ws.binary == false
|
||||||
|
|
||||||
|
await waitForClose(ws)
|
||||||
|
|
||||||
|
let res = HttpServerRef.new(address, process)
|
||||||
|
server = res.get()
|
||||||
|
server.start()
|
||||||
|
|
||||||
|
let wsClient = await WebSocket.connect(
|
||||||
|
"127.0.0.1",
|
||||||
|
Port(8888),
|
||||||
|
path = "/ws",
|
||||||
|
protocols = @["proto"],
|
||||||
|
)
|
||||||
|
|
||||||
|
await wsClient.send(testData)
|
||||||
|
await wsClient.close()
|
||||||
|
|
||||||
|
test "valid UTF-8 sequence in close reason":
|
||||||
|
let testData = "hello world"
|
||||||
|
let closeReason = "i want to close"
|
||||||
|
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||||
|
if r.isErr():
|
||||||
|
return dumbResponse()
|
||||||
|
|
||||||
|
let request = r.get()
|
||||||
|
check request.uri.path == "/ws"
|
||||||
|
|
||||||
|
proc onClose(status: Status, reason: string): CloseResult{.gcsafe,
|
||||||
|
raises: [Defect].} =
|
||||||
|
try:
|
||||||
|
check status == Status.Fulfilled
|
||||||
|
check reason == closeReason
|
||||||
|
return (status, reason)
|
||||||
|
except Exception as exc:
|
||||||
|
raise newException(Defect, exc.msg)
|
||||||
|
|
||||||
|
let server = WSServer.new(protos = ["proto"], onClose = onClose)
|
||||||
|
let ws = await server.handleRequest(request)
|
||||||
|
|
||||||
|
let res = await ws.recv()
|
||||||
|
check:
|
||||||
|
string.fromBytes(res) == testData
|
||||||
|
ws.binary == false
|
||||||
|
|
||||||
|
await waitForClose(ws)
|
||||||
|
|
||||||
|
let res = HttpServerRef.new(address, process)
|
||||||
|
server = res.get()
|
||||||
|
server.start()
|
||||||
|
|
||||||
|
let wsClient = await WebSocket.connect(
|
||||||
|
"127.0.0.1",
|
||||||
|
Port(8888),
|
||||||
|
path = "/ws",
|
||||||
|
protocols = @["proto"],
|
||||||
|
)
|
||||||
|
|
||||||
|
await wsClient.send(testData)
|
||||||
|
await wsClient.close(reason = closeReason)
|
||||||
|
|
||||||
|
test "invalid UTF-8 sequence":
|
||||||
|
# TODO: how to check for Invalid UTF8 exception?
|
||||||
|
let testData = "hello world\xc0\xaf"
|
||||||
|
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||||
|
if r.isErr():
|
||||||
|
return dumbResponse()
|
||||||
|
|
||||||
|
let request = r.get()
|
||||||
|
check request.uri.path == "/ws"
|
||||||
|
|
||||||
|
let server = WSServer.new(protos = ["proto"])
|
||||||
|
let ws = await server.handleRequest(request)
|
||||||
|
|
||||||
|
let res = HttpServerRef.new(address, process)
|
||||||
|
server = res.get()
|
||||||
|
server.start()
|
||||||
|
|
||||||
|
let wsClient = await WebSocket.connect(
|
||||||
|
"127.0.0.1",
|
||||||
|
Port(8888),
|
||||||
|
path = "/ws",
|
||||||
|
protocols = @["proto"]
|
||||||
|
)
|
||||||
|
|
||||||
|
await wsClient.send(testData)
|
||||||
|
await waitForClose(wsClient)
|
||||||
|
check wsClient.readyState == ReadyState.Closed
|
||||||
|
|
||||||
|
test "invalid UTF-8 sequence close code":
|
||||||
|
# TODO: how to check for Invalid UTF8 exception?
|
||||||
|
let testData = "hello world"
|
||||||
|
let closeReason = "i want to close\xc0\xaf"
|
||||||
|
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
|
||||||
|
if r.isErr():
|
||||||
|
return dumbResponse()
|
||||||
|
|
||||||
|
let request = r.get()
|
||||||
|
check request.uri.path == "/ws"
|
||||||
|
|
||||||
|
let server = WSServer.new(protos = ["proto"])
|
||||||
|
let ws = await server.handleRequest(request)
|
||||||
|
|
||||||
|
let res = await ws.recv()
|
||||||
|
check:
|
||||||
|
string.fromBytes(res) == testData
|
||||||
|
ws.binary == false
|
||||||
|
|
||||||
|
let res = HttpServerRef.new(address, process)
|
||||||
|
server = res.get()
|
||||||
|
server.start()
|
||||||
|
|
||||||
|
let wsClient = await WebSocket.connect(
|
||||||
|
"127.0.0.1",
|
||||||
|
Port(8888),
|
||||||
|
path = "/ws",
|
||||||
|
protocols = @["proto"]
|
||||||
|
)
|
||||||
|
|
||||||
|
await wsClient.send(testData)
|
||||||
|
await wsClient.close(reason = closeReason)
|
||||||
|
await waitForClose(wsClient)
|
||||||
|
check wsClient.readyState == ReadyState.Closed
|
|
@ -10,7 +10,7 @@
|
||||||
{.push raises: [Defect].}
|
{.push raises: [Defect].}
|
||||||
|
|
||||||
import pkg/[chronos, chronicles, stew/byteutils, stew/endians2]
|
import pkg/[chronos, chronicles, stew/byteutils, stew/endians2]
|
||||||
import ./types, ./frame, ./utils, ./stream
|
import ./types, ./frame, ./utils, ./stream, ./utf8_dfa
|
||||||
|
|
||||||
import pkg/chronos/[
|
import pkg/chronos/[
|
||||||
streams/asyncstream,
|
streams/asyncstream,
|
||||||
|
@ -132,6 +132,9 @@ proc handleClose*(
|
||||||
# remining payload bytes are reason for closing
|
# remining payload bytes are reason for closing
|
||||||
reason = string.fromBytes(payLoad[2..payLoad.high])
|
reason = string.fromBytes(payLoad[2..payLoad.high])
|
||||||
|
|
||||||
|
if not ws.binary and validateUTF8(reason) == false:
|
||||||
|
raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected in close reason")
|
||||||
|
|
||||||
var rcode: Status
|
var rcode: Status
|
||||||
if code in {Status.Fulfilled}:
|
if code in {Status.Fulfilled}:
|
||||||
rcode = Status.Fulfilled
|
rcode = Status.Fulfilled
|
||||||
|
@ -296,6 +299,9 @@ proc recv*(
|
||||||
consumed += read
|
consumed += read
|
||||||
ws.frame.consumed += read.uint64
|
ws.frame.consumed += read.uint64
|
||||||
|
|
||||||
|
if not ws.binary and validateUTF8(pbuffer.toOpenArray(0, consumed - 1)) == false:
|
||||||
|
raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected")
|
||||||
|
|
||||||
return consumed.int
|
return consumed.int
|
||||||
|
|
||||||
except WebSocketError as exc:
|
except WebSocketError as exc:
|
||||||
|
|
|
@ -125,6 +125,7 @@ type
|
||||||
WSInvalidCloseCodeError* = object of WebSocketError
|
WSInvalidCloseCodeError* = object of WebSocketError
|
||||||
WSPayloadLengthError* = object of WebSocketError
|
WSPayloadLengthError* = object of WebSocketError
|
||||||
WSInvalidOpcodeError* = object of WebSocketError
|
WSInvalidOpcodeError* = object of WebSocketError
|
||||||
|
WSInvalidUTF8* = object of WebSocketError
|
||||||
|
|
||||||
proc `name=`*(self: Extension, name: string) =
|
proc `name=`*(self: Extension, name: string) =
|
||||||
raiseAssert "Can't change extensions name!"
|
raiseAssert "Can't change extensions name!"
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
## nim-ws
|
||||||
|
## Copyright (c) 2021 Status Research & Development GmbH
|
||||||
|
## Licensed under either of
|
||||||
|
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
|
||||||
|
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
|
||||||
|
## at your option.
|
||||||
|
## This file may not be copied, modified, or distributed except according to
|
||||||
|
## those terms.
|
||||||
|
|
||||||
|
# DFA based UTF8 decoder/validator
|
||||||
|
# See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
||||||
|
|
||||||
|
const
|
||||||
|
UTF8_ACCEPT* = 0
|
||||||
|
UTF8_REJECT* = 1
|
||||||
|
|
||||||
|
const utf8Table = [
|
||||||
|
0'u8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
|
||||||
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
|
||||||
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
|
||||||
|
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
|
||||||
|
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
|
||||||
|
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
|
||||||
|
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
|
||||||
|
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
|
||||||
|
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
|
||||||
|
]
|
||||||
|
|
||||||
|
proc validateUTF8*[T: byte | char](text: openArray[T]): bool =
|
||||||
|
var state = 0
|
||||||
|
for c in text:
|
||||||
|
let x = utf8Table[c.int].int
|
||||||
|
state = utf8Table[256 + state*16 + x].int
|
||||||
|
state == UTF8_ACCEPT
|
Loading…
Reference in New Issue