implement UTF8 handling (#35)

* implement UTF8 handling

or to be precisely, we add UTF8 validator which main duty
is to detect malformed UTF8 sequence using a fast DFA UTF8
decoder.

also enable autobahn UTF8 category tests, much more green :)

fixes #13

* fixes case 7.5.1 Send a close frame with invalid UTF8 payload

* add tests for validateUTF8

- tests for validateUTF8 in raw mode
- tests for validateUTF8 in websocket client/server
This commit is contained in:
andri lim 2021-05-28 23:47:24 +07:00 committed by GitHub
parent 93f0aba685
commit 90c664545d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 280 additions and 2 deletions

View File

@ -7,6 +7,6 @@
} }
], ],
"cases": ["*"], "cases": ["*"],
"exclude-cases": ["6.*", "9.*", "12.*", "13.*"], "exclude-cases": ["9.*", "12.*", "13.*"],
"exclude-agent-cases": {} "exclude-agent-cases": {}
} }

View File

@ -1,3 +1,6 @@
{. warning[UnusedImport]:off .}
import ./testframes import ./testframes
import ./testwebsockets import ./testwebsockets
import ./testtlswebsockets import ./testtlswebsockets
import ./testutf8

229
tests/testutf8.nim Normal file
View File

@ -0,0 +1,229 @@
## nim-ws
## Copyright (c) 2021 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
import
std/[strutils],
pkg/[
stew/byteutils,
asynctest,
chronos,
chronos/apps/http/httpserver,
chronicles
],
../ws/[ws, utf8_dfa]
suite "UTF-8 DFA validator":
test "single octet":
check:
validateUTF8("\x01")
validateUTF8("\x32")
validateUTF8("\x7f")
validateUTF8("\x80") == false
test "two octets":
check:
validateUTF8("\xc2\x80")
validateUTF8("\xc4\x80")
validateUTF8("\xdf\xbf")
validateUTF8("\xdfu\xc0") == false
validateUTF8("\xdf") == false
test "three octets":
check:
validateUTF8("\xe0\xa0\x80")
validateUTF8("\xe1\x80\x80")
validateUTF8("\xef\xbf\xbf")
validateUTF8("\xef\xbf\xc0") == false
validateUTF8("\xef\xbf") == false
test "four octets":
check:
validateUTF8("\xf0\x90\x80\x80")
validateUTF8("\xf0\x92\x80\x80")
validateUTF8("\xf0\x9f\xbf\xbf")
validateUTF8("\xf0\x9f\xbf\xc0") == false
validateUTF8("\xf0\x9f\xbf") == false
test "overlong sequence":
check:
validateUTF8("\xc0\xaf") == false
validateUTF8("\xe0\x80\xaf") == false
validateUTF8("\xf0\x80\x80\xaf") == false
validateUTF8("\xf8\x80\x80\x80\xaf") == false
validateUTF8("\xfc\x80\x80\x80\x80\xaf") == false
test "max overlong sequence":
check:
validateUTF8("\xc1\xbf") == false
validateUTF8("\xe0\x9f\xbf") == false
validateUTF8("\xf0\x8f\xbf\xbf") == false
validateUTF8("\xf8\x87\xbf\xbf\xbf") == false
validateUTF8("\xfc\x83\xbf\xbf\xbf\xbf") == false
test "distinct codepoint":
check:
validateUTF8("foobar")
validateUTF8("foob\xc3\xa6r")
validateUTF8("foob\xf0\x9f\x99\x88r")
proc waitForClose(ws: WSSession) {.async.} =
try:
while ws.readystate != ReadyState.Closed:
discard await ws.recv()
except CatchableError:
debug "Closing websocket"
# TODO: use new test framework from dryajov
# if it is ready.
var server: HttpServerRef
let address = initTAddress("127.0.0.1:8888")
suite "UTF-8 validator in action":
teardown:
await server.stop()
await server.closeWait()
test "valid UTF-8 sequence":
let testData = "hello world"
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
if r.isErr():
return dumbResponse()
let request = r.get()
check request.uri.path == "/ws"
let server = WSServer.new(protos = ["proto"])
let ws = await server.handleRequest(request)
let res = await ws.recv()
check:
string.fromBytes(res) == testData
ws.binary == false
await waitForClose(ws)
let res = HttpServerRef.new(address, process)
server = res.get()
server.start()
let wsClient = await WebSocket.connect(
"127.0.0.1",
Port(8888),
path = "/ws",
protocols = @["proto"],
)
await wsClient.send(testData)
await wsClient.close()
test "valid UTF-8 sequence in close reason":
let testData = "hello world"
let closeReason = "i want to close"
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
if r.isErr():
return dumbResponse()
let request = r.get()
check request.uri.path == "/ws"
proc onClose(status: Status, reason: string): CloseResult{.gcsafe,
raises: [Defect].} =
try:
check status == Status.Fulfilled
check reason == closeReason
return (status, reason)
except Exception as exc:
raise newException(Defect, exc.msg)
let server = WSServer.new(protos = ["proto"], onClose = onClose)
let ws = await server.handleRequest(request)
let res = await ws.recv()
check:
string.fromBytes(res) == testData
ws.binary == false
await waitForClose(ws)
let res = HttpServerRef.new(address, process)
server = res.get()
server.start()
let wsClient = await WebSocket.connect(
"127.0.0.1",
Port(8888),
path = "/ws",
protocols = @["proto"],
)
await wsClient.send(testData)
await wsClient.close(reason = closeReason)
test "invalid UTF-8 sequence":
# TODO: how to check for Invalid UTF8 exception?
let testData = "hello world\xc0\xaf"
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
if r.isErr():
return dumbResponse()
let request = r.get()
check request.uri.path == "/ws"
let server = WSServer.new(protos = ["proto"])
let ws = await server.handleRequest(request)
let res = HttpServerRef.new(address, process)
server = res.get()
server.start()
let wsClient = await WebSocket.connect(
"127.0.0.1",
Port(8888),
path = "/ws",
protocols = @["proto"]
)
await wsClient.send(testData)
await waitForClose(wsClient)
check wsClient.readyState == ReadyState.Closed
test "invalid UTF-8 sequence close code":
# TODO: how to check for Invalid UTF8 exception?
let testData = "hello world"
let closeReason = "i want to close\xc0\xaf"
proc process(r: RequestFence): Future[HttpResponseRef] {.async.} =
if r.isErr():
return dumbResponse()
let request = r.get()
check request.uri.path == "/ws"
let server = WSServer.new(protos = ["proto"])
let ws = await server.handleRequest(request)
let res = await ws.recv()
check:
string.fromBytes(res) == testData
ws.binary == false
let res = HttpServerRef.new(address, process)
server = res.get()
server.start()
let wsClient = await WebSocket.connect(
"127.0.0.1",
Port(8888),
path = "/ws",
protocols = @["proto"]
)
await wsClient.send(testData)
await wsClient.close(reason = closeReason)
await waitForClose(wsClient)
check wsClient.readyState == ReadyState.Closed

View File

@ -10,7 +10,7 @@
{.push raises: [Defect].} {.push raises: [Defect].}
import pkg/[chronos, chronicles, stew/byteutils, stew/endians2] import pkg/[chronos, chronicles, stew/byteutils, stew/endians2]
import ./types, ./frame, ./utils, ./stream import ./types, ./frame, ./utils, ./stream, ./utf8_dfa
import pkg/chronos/[ import pkg/chronos/[
streams/asyncstream, streams/asyncstream,
@ -132,6 +132,9 @@ proc handleClose*(
# remining payload bytes are reason for closing # remining payload bytes are reason for closing
reason = string.fromBytes(payLoad[2..payLoad.high]) reason = string.fromBytes(payLoad[2..payLoad.high])
if not ws.binary and validateUTF8(reason) == false:
raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected in close reason")
var rcode: Status var rcode: Status
if code in {Status.Fulfilled}: if code in {Status.Fulfilled}:
rcode = Status.Fulfilled rcode = Status.Fulfilled
@ -296,6 +299,9 @@ proc recv*(
consumed += read consumed += read
ws.frame.consumed += read.uint64 ws.frame.consumed += read.uint64
if not ws.binary and validateUTF8(pbuffer.toOpenArray(0, consumed - 1)) == false:
raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected")
return consumed.int return consumed.int
except WebSocketError as exc: except WebSocketError as exc:

View File

@ -125,6 +125,7 @@ type
WSInvalidCloseCodeError* = object of WebSocketError WSInvalidCloseCodeError* = object of WebSocketError
WSPayloadLengthError* = object of WebSocketError WSPayloadLengthError* = object of WebSocketError
WSInvalidOpcodeError* = object of WebSocketError WSInvalidOpcodeError* = object of WebSocketError
WSInvalidUTF8* = object of WebSocketError
proc `name=`*(self: Extension, name: string) = proc `name=`*(self: Extension, name: string) =
raiseAssert "Can't change extensions name!" raiseAssert "Can't change extensions name!"

39
ws/utf8_dfa.nim Normal file
View File

@ -0,0 +1,39 @@
## nim-ws
## Copyright (c) 2021 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
# DFA based UTF8 decoder/validator
# See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
const
UTF8_ACCEPT* = 0
UTF8_REJECT* = 1
const utf8Table = [
0'u8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
]
proc validateUTF8*[T: byte | char](text: openArray[T]): bool =
var state = 0
for c in text:
let x = utf8Table[c.int].int
state = utf8Table[256 + state*16 + x].int
state == UTF8_ACCEPT