diff --git a/autobahn/fuzzingclient.json b/autobahn/fuzzingclient.json index ea9122c..456a2bf 100644 --- a/autobahn/fuzzingclient.json +++ b/autobahn/fuzzingclient.json @@ -7,6 +7,6 @@ } ], "cases": ["*"], - "exclude-cases": ["6.*", "9.*", "12.*", "13.*"], + "exclude-cases": ["9.*", "12.*", "13.*"], "exclude-agent-cases": {} } diff --git a/tests/testall.nim b/tests/testall.nim index 80f6b76..16323f5 100644 --- a/tests/testall.nim +++ b/tests/testall.nim @@ -1,3 +1,6 @@ +{. warning[UnusedImport]:off .} + import ./testframes import ./testwebsockets import ./testtlswebsockets +import ./testutf8 diff --git a/tests/testutf8.nim b/tests/testutf8.nim new file mode 100644 index 0000000..b2dd2fc --- /dev/null +++ b/tests/testutf8.nim @@ -0,0 +1,229 @@ +## nim-ws +## Copyright (c) 2021 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import + std/[strutils], + pkg/[ + stew/byteutils, + asynctest, + chronos, + chronos/apps/http/httpserver, + chronicles + ], + ../ws/[ws, utf8_dfa] + +suite "UTF-8 DFA validator": + test "single octet": + check: + validateUTF8("\x01") + validateUTF8("\x32") + validateUTF8("\x7f") + validateUTF8("\x80") == false + + test "two octets": + check: + validateUTF8("\xc2\x80") + validateUTF8("\xc4\x80") + validateUTF8("\xdf\xbf") + validateUTF8("\xdfu\xc0") == false + validateUTF8("\xdf") == false + + test "three octets": + check: + validateUTF8("\xe0\xa0\x80") + validateUTF8("\xe1\x80\x80") + validateUTF8("\xef\xbf\xbf") + validateUTF8("\xef\xbf\xc0") == false + validateUTF8("\xef\xbf") == false + + test "four octets": + check: + validateUTF8("\xf0\x90\x80\x80") + validateUTF8("\xf0\x92\x80\x80") + validateUTF8("\xf0\x9f\xbf\xbf") + validateUTF8("\xf0\x9f\xbf\xc0") == false + validateUTF8("\xf0\x9f\xbf") == false + + test "overlong sequence": + check: + validateUTF8("\xc0\xaf") == false + validateUTF8("\xe0\x80\xaf") == false + validateUTF8("\xf0\x80\x80\xaf") == false + validateUTF8("\xf8\x80\x80\x80\xaf") == false + validateUTF8("\xfc\x80\x80\x80\x80\xaf") == false + + test "max overlong sequence": + check: + validateUTF8("\xc1\xbf") == false + validateUTF8("\xe0\x9f\xbf") == false + validateUTF8("\xf0\x8f\xbf\xbf") == false + validateUTF8("\xf8\x87\xbf\xbf\xbf") == false + validateUTF8("\xfc\x83\xbf\xbf\xbf\xbf") == false + + test "distinct codepoint": + check: + validateUTF8("foobar") + validateUTF8("foob\xc3\xa6r") + validateUTF8("foob\xf0\x9f\x99\x88r") + +proc waitForClose(ws: WSSession) {.async.} = + try: + while ws.readystate != ReadyState.Closed: + discard await ws.recv() + except CatchableError: + debug "Closing websocket" + +# TODO: use new test framework from dryajov +# if it is ready. +var server: HttpServerRef +let address = initTAddress("127.0.0.1:8888") + +suite "UTF-8 validator in action": + teardown: + await server.stop() + await server.closeWait() + + test "valid UTF-8 sequence": + let testData = "hello world" + proc process(r: RequestFence): Future[HttpResponseRef] {.async.} = + if r.isErr(): + return dumbResponse() + + let request = r.get() + check request.uri.path == "/ws" + + let server = WSServer.new(protos = ["proto"]) + let ws = await server.handleRequest(request) + + let res = await ws.recv() + check: + string.fromBytes(res) == testData + ws.binary == false + + await waitForClose(ws) + + let res = HttpServerRef.new(address, process) + server = res.get() + server.start() + + let wsClient = await WebSocket.connect( + "127.0.0.1", + Port(8888), + path = "/ws", + protocols = @["proto"], + ) + + await wsClient.send(testData) + await wsClient.close() + + test "valid UTF-8 sequence in close reason": + let testData = "hello world" + let closeReason = "i want to close" + proc process(r: RequestFence): Future[HttpResponseRef] {.async.} = + if r.isErr(): + return dumbResponse() + + let request = r.get() + check request.uri.path == "/ws" + + proc onClose(status: Status, reason: string): CloseResult{.gcsafe, + raises: [Defect].} = + try: + check status == Status.Fulfilled + check reason == closeReason + return (status, reason) + except Exception as exc: + raise newException(Defect, exc.msg) + + let server = WSServer.new(protos = ["proto"], onClose = onClose) + let ws = await server.handleRequest(request) + + let res = await ws.recv() + check: + string.fromBytes(res) == testData + ws.binary == false + + await waitForClose(ws) + + let res = HttpServerRef.new(address, process) + server = res.get() + server.start() + + let wsClient = await WebSocket.connect( + "127.0.0.1", + Port(8888), + path = "/ws", + protocols = @["proto"], + ) + + await wsClient.send(testData) + await wsClient.close(reason = closeReason) + + test "invalid UTF-8 sequence": + # TODO: how to check for Invalid UTF8 exception? + let testData = "hello world\xc0\xaf" + proc process(r: RequestFence): Future[HttpResponseRef] {.async.} = + if r.isErr(): + return dumbResponse() + + let request = r.get() + check request.uri.path == "/ws" + + let server = WSServer.new(protos = ["proto"]) + let ws = await server.handleRequest(request) + + let res = HttpServerRef.new(address, process) + server = res.get() + server.start() + + let wsClient = await WebSocket.connect( + "127.0.0.1", + Port(8888), + path = "/ws", + protocols = @["proto"] + ) + + await wsClient.send(testData) + await waitForClose(wsClient) + check wsClient.readyState == ReadyState.Closed + + test "invalid UTF-8 sequence close code": + # TODO: how to check for Invalid UTF8 exception? + let testData = "hello world" + let closeReason = "i want to close\xc0\xaf" + proc process(r: RequestFence): Future[HttpResponseRef] {.async.} = + if r.isErr(): + return dumbResponse() + + let request = r.get() + check request.uri.path == "/ws" + + let server = WSServer.new(protos = ["proto"]) + let ws = await server.handleRequest(request) + + let res = await ws.recv() + check: + string.fromBytes(res) == testData + ws.binary == false + + let res = HttpServerRef.new(address, process) + server = res.get() + server.start() + + let wsClient = await WebSocket.connect( + "127.0.0.1", + Port(8888), + path = "/ws", + protocols = @["proto"] + ) + + await wsClient.send(testData) + await wsClient.close(reason = closeReason) + await waitForClose(wsClient) + check wsClient.readyState == ReadyState.Closed diff --git a/ws/session.nim b/ws/session.nim index 91933c7..d65b53a 100644 --- a/ws/session.nim +++ b/ws/session.nim @@ -10,7 +10,7 @@ {.push raises: [Defect].} import pkg/[chronos, chronicles, stew/byteutils, stew/endians2] -import ./types, ./frame, ./utils, ./stream +import ./types, ./frame, ./utils, ./stream, ./utf8_dfa import pkg/chronos/[ streams/asyncstream, @@ -132,6 +132,9 @@ proc handleClose*( # remining payload bytes are reason for closing reason = string.fromBytes(payLoad[2..payLoad.high]) + if not ws.binary and validateUTF8(reason) == false: + raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected in close reason") + var rcode: Status if code in {Status.Fulfilled}: rcode = Status.Fulfilled @@ -296,6 +299,9 @@ proc recv*( consumed += read ws.frame.consumed += read.uint64 + if not ws.binary and validateUTF8(pbuffer.toOpenArray(0, consumed - 1)) == false: + raise newException(WSInvalidUTF8, "Invalid UTF8 sequence detected") + return consumed.int except WebSocketError as exc: diff --git a/ws/types.nim b/ws/types.nim index f4ee492..323098f 100644 --- a/ws/types.nim +++ b/ws/types.nim @@ -125,6 +125,7 @@ type WSInvalidCloseCodeError* = object of WebSocketError WSPayloadLengthError* = object of WebSocketError WSInvalidOpcodeError* = object of WebSocketError + WSInvalidUTF8* = object of WebSocketError proc `name=`*(self: Extension, name: string) = raiseAssert "Can't change extensions name!" diff --git a/ws/utf8_dfa.nim b/ws/utf8_dfa.nim new file mode 100644 index 0000000..4d069bc --- /dev/null +++ b/ws/utf8_dfa.nim @@ -0,0 +1,39 @@ +## nim-ws +## Copyright (c) 2021 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +# DFA based UTF8 decoder/validator +# See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + +const + UTF8_ACCEPT* = 0 + UTF8_REJECT* = 1 + +const utf8Table = [ + 0'u8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8 +] + +proc validateUTF8*[T: byte | char](text: openArray[T]): bool = + var state = 0 + for c in text: + let x = utf8Table[c.int].int + state = utf8Table[256 + state*16 + x].int + state == UTF8_ACCEPT