From eda85be0b9aa7eea32e237a777e603220f2beb29 Mon Sep 17 00:00:00 2001 From: Felix Krause Date: Thu, 17 Mar 2016 18:44:35 +0100 Subject: [PATCH] Support % escape sequences in tag URIs --- private/fastparse.nim | 39 ++++++++++++++++++++++++++++++--------- private/tagLibrary.nim | 2 +- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/private/fastparse.nim b/private/fastparse.nim index d8c8b79..4bae745 100644 --- a/private/fastparse.nim +++ b/private/fastparse.nim @@ -569,7 +569,7 @@ proc unicodeSequence(lexer: var BaseLexer, length: int): digitPosition = length - i - 1 c = lexer.buf[lexer.bufpos] case c - of EndOFFile: + of EndOFFile, '\l', '\r': lexerError(lexer, "Unfinished unicode escape sequence") of '0' .. '9': unicodeChar = unicodechar or @@ -580,10 +580,29 @@ proc unicodeSequence(lexer: var BaseLexer, length: int): of 'a' .. 'f': unicodeChar = unicodechar or (cast[int](c) - 0x57) shl (digitPosition * 4) - else: - lexerError(lexer, "Invalid character in unicode escape sequence") + else: lexerError(lexer, "Invalid character in unicode escape sequence") return toUTF8(cast[Rune](unicodeChar)) +proc byteSequence(lexer: var BaseLexer): char {.raises: [YamlParserError].} = + debug("lex: byteSequence") + var charCode = 0.int8 + for i in 0 .. 1: + lexer.bufpos.inc() + let + digitPosition = int8(1 - i) + c = lexer.buf[lexer.bufpos] + case c + of EndOfFile, '\l', 'r': + lexerError(lexer, "Unfinished octet escape sequence") + of '0' .. '9': + charCode = charCode or (int8(c) - 0x30.int8) shl (digitPosition * 4) + of 'A' .. 'F': + charCode = charCode or (int8(c) - 0x37.int8) shl (digitPosition * 4) + of 'a' .. 'f': + charCode = charCode or (int8(c) - 0x57.int8) shl (digitPosition * 4) + else: lexerError(lexer, "Invalid character in octet escape sequence") + return char(charCode) + template processDoubleQuotedWhitespace(newlines: var int) {.dirty.} = var after = "" @@ -802,6 +821,7 @@ template tagHandle(lexer: var BaseLexer, content: var string, of '!': if shorthandEnd == -1 and i == 2: content.add(c) + continue elif shorthandEnd != 0: lexerError(lexer, "Illegal character in tag suffix") shorthandEnd = i @@ -813,18 +833,19 @@ template tagHandle(lexer: var BaseLexer, content: var string, if i == 1: shorthandEnd = -1 content = "" - else: - lexerError(lexer, "Illegal character in tag handle") + else: lexerError(lexer, "Illegal character in tag handle") of '>': if shorthandEnd == -1: lexer.bufpos.inc() if lexer.buf[lexer.bufpos] notin spaceOrLineEnd: lexerError(lexer, "Missing space after verbatim tag handle") break - else: - lexerError(lexer, "Illegal character in tag handle") - else: - lexerError(lexer, "Illegal character in tag handle") + else: lexerError(lexer, "Illegal character in tag handle") + of '%': + if shorthandEnd != 0: + content.add(lexer.byteSequence()) + else: lexerError(lexer, "Illegal character in tag handle") + else: lexerError(lexer, "Illegal character in tag handle") template anchorName(lexer: BaseLexer, content: var string) = debug("lex: anchorName") diff --git a/private/tagLibrary.nim b/private/tagLibrary.nim index 5803b17..63e53ae 100644 --- a/private/tagLibrary.nim +++ b/private/tagLibrary.nim @@ -32,7 +32,7 @@ proc initTagLibrary*(): TagLibrary = result.secondaryPrefix = yamlTagRepositoryPrefix result.nextCustomTagId = yFirstCustomTagId -proc registerUri*(tagLib: TagLibrary, uri: string): TagId = +proc registerUri*(tagLib: TagLibrary, uri: string): TagId = tagLib.tags[uri] = tagLib.nextCustomTagId result = tagLib.nextCustomTagId tagLib.nextCustomTagId = cast[TagId](cast[int](tagLib.nextCustomTagId) + 1)