Support % escape sequences in tag URIs

2016-03-17 18:44:35 +01:00 · 2016-03-17 18:44:35 +01:00 · eda85be0b9
parent d1c8c171a1
commit eda85be0b9
2 changed files with 31 additions and 10 deletions
--- a/private/fastparse.nim
+++ b/private/fastparse.nim
@ -569,7 +569,7 @@ proc unicodeSequence(lexer: var BaseLexer, length: int):
      digitPosition = length - i - 1
      c = lexer.buf[lexer.bufpos]
    case c
-    of EndOFFile:
+    of EndOFFile, '\l', '\r':
        lexerError(lexer, "Unfinished unicode escape sequence")
    of '0' .. '9':
        unicodeChar = unicodechar or
@ -580,10 +580,29 @@ proc unicodeSequence(lexer: var BaseLexer, length: int):
    of 'a' .. 'f':
        unicodeChar = unicodechar or
                (cast[int](c) - 0x57) shl (digitPosition * 4)
-    else:
+    else: lexerError(lexer, "Invalid character in unicode escape sequence")
      lexerError(lexer, "Invalid character in unicode escape sequence")
  return toUTF8(cast[Rune](unicodeChar))
 proc byteSequence(lexer: var BaseLexer): char {.raises: [YamlParserError].} =
  debug("lex: byteSequence")
  var charCode = 0.int8
  for i in 0 .. 1:
    lexer.bufpos.inc()
    let
      digitPosition = int8(1 - i)
      c = lexer.buf[lexer.bufpos]
    case c
    of EndOfFile, '\l', 'r':
      lexerError(lexer, "Unfinished octet escape sequence")
    of '0' .. '9':
      charCode = charCode or (int8(c) - 0x30.int8) shl (digitPosition * 4)
    of 'A' .. 'F':
      charCode = charCode or (int8(c) - 0x37.int8) shl (digitPosition * 4)
    of 'a' .. 'f':
      charCode = charCode or (int8(c) - 0x57.int8) shl (digitPosition * 4)
    else: lexerError(lexer, "Invalid character in octet escape sequence")
  return char(charCode)
 template processDoubleQuotedWhitespace(newlines: var int) {.dirty.} =
  var
    after = ""
@ -802,6 +821,7 @@ template tagHandle(lexer: var BaseLexer, content: var string,
    of '!':
      if shorthandEnd == -1 and i == 2:
        content.add(c)
        continue
      elif shorthandEnd != 0:
        lexerError(lexer, "Illegal character in tag suffix")
      shorthandEnd = i
@ -813,18 +833,19 @@ template tagHandle(lexer: var BaseLexer, content: var string,
      if i == 1:
        shorthandEnd = -1
        content = ""
-      else:
+      else: lexerError(lexer, "Illegal character in tag handle")
        lexerError(lexer, "Illegal character in tag handle")
    of '>':
      if shorthandEnd == -1:
        lexer.bufpos.inc()
        if lexer.buf[lexer.bufpos] notin spaceOrLineEnd:
          lexerError(lexer, "Missing space after verbatim tag handle")
        break
-      else:
+      else: lexerError(lexer, "Illegal character in tag handle")
-        lexerError(lexer, "Illegal character in tag handle")
+    of '%':
-    else:
+      if shorthandEnd != 0:
-      lexerError(lexer, "Illegal character in tag handle")
+        content.add(lexer.byteSequence())
      else: lexerError(lexer, "Illegal character in tag handle")
    else: lexerError(lexer, "Illegal character in tag handle")
 template anchorName(lexer: BaseLexer, content: var string) =
  debug("lex: anchorName")
--- a/private/tagLibrary.nim
+++ b/private/tagLibrary.nim
@ -32,7 +32,7 @@ proc initTagLibrary*(): TagLibrary =
    result.secondaryPrefix = yamlTagRepositoryPrefix
    result.nextCustomTagId = yFirstCustomTagId
-proc registerUri*(tagLib: TagLibrary, uri: string): TagId =  
+proc registerUri*(tagLib: TagLibrary, uri: string): TagId =
    tagLib.tags[uri] = tagLib.nextCustomTagId
    result = tagLib.nextCustomTagId
    tagLib.nextCustomTagId = cast[TagId](cast[int](tagLib.nextCustomTagId) + 1)