Performance optimizations in parser

2016-03-20 12:09:04 +01:00 · 2016-03-20 12:09:04 +01:00 · b6d363107a
parent 167c25af72
commit b6d363107a
1 changed files with 71 additions and 50 deletions
--- a/private/fastparse.nim
+++ b/private/fastparse.nim
@ -28,11 +28,11 @@ type
    cBlock, cFlow
 const
-  space          = [' ', '\t']
+  space          = {' ', '\t'}
-  lineEnd        = ['\l', '\c', EndOfFile]
+  lineEnd        = {'\l', '\c', EndOfFile}
-  spaceOrLineEnd = [' ', '\t', '\l', '\c', EndOfFile]
+  spaceOrLineEnd = {' ', '\t', '\l', '\c', EndOfFile}
-  digits         = '0'..'9'
+  digits         = {'0'..'9'}
-  flowIndicators = ['[', ']', '{', '}', ',']
+  flowIndicators = {'[', ']', '{', '}', ','}
  UTF8NextLine           = toUTF8(0x85.Rune)
  UTF8NonBreakingSpace   = toUTF8(0xA0.Rune)
@ -88,6 +88,13 @@ template yieldEmptyScalar() {.dirty.} =
  tag = yTagQuestionMark
  anchor = yAnchorNone
 template yieldShallowScalar(content: string) {.dirty.} =
  var e = YamlStreamEvent(kind: yamlScalar, scalarTag: tag,
                          scalarAnchor: anchor)
  shallowCopy(e.scalarContent, content)
  shallow(e.scalarContent)
  yield e
 template yieldLevelEnd() {.dirty.} =
  case level.kind
  of fplSequence: yield endSeqEvent()
@ -96,7 +103,7 @@ template yieldLevelEnd() {.dirty.} =
    yieldEmptyScalar()
    yield endMapEvent()
  of fplScalar:
-    yield scalarEvent(content, tag, anchor)
+    yieldShallowScalar(content)
    tag = yTagQuestionMark
    anchor = yAnchorNone
  of fplUnknown:
@ -265,27 +272,29 @@ template handleTagHandle() {.dirty.} =
  if level.kind != fplUnknown: parserError("Unexpected tag handle")
  if tag != yTagQuestionMark:
    parserError("Only one tag handle is allowed per node")
-  content = ""
+  content.setLen(0)
  var
    shorthandEnd: int
    tagUri: string
  p.lexer.tagHandle(content, shorthandEnd)
  if shorthandEnd != -1:
    try:
-      let prefix = shorthands[content[0..shorthandEnd]]
+      tagUri.setLen(0)
-      tagUri = prefix & content[shorthandEnd + 1 .. ^1]
+      tagUri.add(shorthands[content[0..shorthandEnd]])
      tagUri.add(content[shorthandEnd + 1 .. ^1])
    except KeyError:
      parserError("Undefined tag shorthand: " & content[0..shorthandEnd])
  else: shallowCopy(tagUri, content)
    try: tag = p.tagLib.tags[tagUri]
    except KeyError: tag = p.tagLib.registerUri(tagUri)
  else:
    try: tag = p.tagLib.tags[content]
    except KeyError: tag = p.tagLib.registerUri(content)
 template handleAnchor() {.dirty.} =
  startToken()
  if level.kind != fplUnknown: parserError("Unexpected token")
  if anchor != yAnchorNone:
    parserError("Only one anchor is allowed per node")
-  content = ""
+  content.setLen(0)
  p.lexer.anchorName(content)
  anchor = nextAnchorId
  anchors[content] = anchor
@ -296,7 +305,7 @@ template handleAlias() {.dirty.} =
  if level.kind != fplUnknown: parserError("Unexpected token")
  if anchor != yAnchorNone or tag != yTagQuestionMark:
    parserError("Alias may not have anchor or tag")
-  content = ""
+  content.setLen(0)
  p.lexer.anchorName(content)
  var id: AnchorId
  try: id = anchors[content]
@ -491,8 +500,8 @@ template tagShorthand(lexer: BaseLexer, shorthand: var string) =
  if lexer.buf[lexer.bufpos] notin spaceOrLineEnd:
    lexerError(lexer, "Missing space after tag shorthand")
-template tagUri(lexer: BaseLexer, uri: var string) =
+template tagUriMapping(lexer: BaseLexer, uri: var string) =
-  debug("lex: tagUri")
+  debug("lex: tagUriMapping")
  while lexer.buf[lexer.bufpos] in space:
    lexer.bufpos.inc()
  var c = lexer.buf[lexer.bufpos]
@ -580,7 +589,7 @@ proc byteSequence(lexer: var BaseLexer): char {.raises: [YamlParserError].} =
  return char(charCode)
 template processQuotedWhitespace(newlines: var int) {.dirty.} =
-  var after = ""
+  after.setLen(0)
  block outer:
    while true:
      case p.lexer.buf[p.lexer.bufpos]
@ -685,6 +694,17 @@ proc isPlainSafe(lexer: BaseLexer, index: int, context: YamlContext): bool =
  of flowIndicators: result = context == cBlock
  else: result = true
 # tried this for performance optimization, but it didn't optimize any
 # performance. keeping it around for future reference.
 #const
 #  plainCharOut   = {'!', '\"', '$'..'9',  ';'..'\xFF'}
 #  plainCharIn    = {'!', '\"', '$'..'+', '-'..'9', ';'..'Z', '\\', '^'..'z',
 #                    '|', '~'..'\xFF'}
 #template isPlainChar(c: char, context: YamlContext): bool =
 #  when context == cBlock: c in plainCharOut
 #  else: c in plainCharIn
 template plainScalar(lexer: BaseLexer, content: var string,
                     context: YamlContext) =
  debug("lex: plainScalar")
@ -694,9 +714,9 @@ template plainScalar(lexer: BaseLexer, content: var string,
      lexer.bufpos.inc()
      let c = lexer.buf[lexer.bufpos]
      case c
      of lineEnd: break
      of ' ', '\t':
-        var after = "" & c
+        after.setLen(1)
        after[0] = c
        while true:
          lexer.bufpos.inc()
          let c2 = lexer.buf[lexer.bufpos]
@ -719,9 +739,7 @@ template plainScalar(lexer: BaseLexer, content: var string,
            content.add(after)
            content.add(c2)
            break
-      of flowIndicators:
+      of lineEnd, flowIndicators: break
        if context == cBlock: content.add(c)
        else: break
      of ':':
        if lexer.isPlainSafe(lexer.bufpos + 1, context): content.add(':')
        else: break outer
@ -735,7 +753,7 @@ template continueMultilineScalar() {.dirty.} =
  state = fpBlockAfterPlainScalar
 template handleFlowPlainScalar() {.dirty.} =
-  content = ""
+  content.setLen(0)
  startToken()
  p.lexer.plainScalar(content, cFlow)
  if p.lexer.buf[p.lexer.bufpos] in {'{', '}', '[', ']', ',', ':', '#'}:
@ -771,7 +789,7 @@ template handleFlowPlainScalar() {.dirty.} =
          content.add(repeat(' ', newlines - 1))
          newlines = 0
        p.lexer.plainScalar(content, cFlow)
-  yield scalarEvent(content, tag, anchor)
+  yieldShallowScalar(content)
  handleObjectEnd(fpFlowAfterObject)
 template ensureCorrectIndentation() {.dirty.} =
@ -811,7 +829,7 @@ template tagHandle(lexer: var BaseLexer, content: var string,
    of '<':
      if i == 1:
        shorthandEnd = -1
-        content = ""
+        content.setLen(0)
      else: lexerError(lexer, "Illegal character in tag handle")
    of '>':
      if shorthandEnd == -1:
@ -880,7 +898,7 @@ template blockScalar(lexer: BaseLexer, content: var string,
        # TODO: is this correct?
  else: debugFail()
  var newlines = 0
-  content = ""
+  content.setLen(0)
  block outer:
    while true:
      block inner:
@ -1042,7 +1060,9 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
      shorthands: Table[string, string]
      anchors: Table[string, AnchorId]
      nextAnchorId: AnchorId
-      content: string
+      content: string = ""
      after: string = ""
      tagUri: string = ""
      tag: TagId
      anchor: AnchorId
      ancestry = newSeq[FastParseLevel]()
@ -1078,11 +1098,12 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
            p.lexer.lineEnding()
            handleLineEnd(false)
          of ldTag:
-            var shorthand, uri = ""
+            var shorthand = ""
            tagUri.setLen(0)
            startToken()
            p.lexer.tagShorthand(shorthand)
-            p.lexer.tagUri(uri)
+            p.lexer.tagUriMapping(tagUri)
-            shorthands[shorthand] = uri
+            shorthands[shorthand] = tagUri
            p.lexer.lineEnding()
            handleLineEnd(false)
          of ldUnknown:
@ -1117,7 +1138,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
            handleBlockSequenceIndicator()
            state = fpBlockObjectStart
          of lpdeScalarContent:
-            content = ""
+            content.setLen(0)
            p.lexer.plainScalar(content, cBlock)
            state = fpBlockAfterPlainScalar
        else:
@ -1151,7 +1172,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
              ensureCorrectIndentation()
              ancestry.add(level)
              level = initLevel(fplUnknown)
-              content = ""
+              content.setLen(0)
              p.lexer.plainScalar(content, cBlock)
              state = fpBlockAfterPlainScalar
        of '.':
@ -1174,7 +1195,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
              ensureCorrectIndentation()
              ancestry.add(level)
              level = initLevel(fplUnknown)
-              content = ""
+              content.setLen(0)
              p.lexer.plainScalar(content, cBlock)
              state = fpBlockAfterPlainScalar
        of ' ':
@ -1252,7 +1273,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
            startToken()
            parserError("Unexpected token")
        of '#':
-          yield scalarEvent(content, tag, anchor)
+          yieldShallowScalar(content)
          p.lexer.lineEnding()
          handleLineEnd(true)
          handleObjectEnd(fpBlockLineStart)
@ -1279,7 +1300,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
          p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos)
          state = fpBlockLineStart
        else:
-          yield scalarEvent(content, tag, anchor)
+          yieldShallowScalar(content)
          handleObjectEnd(fpBlockAfterObject)
      of fpBlockAfterObject:
        debug("state: blockAfterObject")
@ -1348,29 +1369,29 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
          level.indentation = UnknownIndentation
        of '\'':
          handleBlockItemStart()
-          content = ""
+          content.setLen(0)
          startToken()
          p.lexer.singleQuotedScalar(content)
          if tag == yTagQuestionMark: tag = yTagExclamationMark
-          yield scalarEvent(content, tag, anchor)
+          yieldShallowScalar(content)
          handleObjectEnd(fpBlockAfterObject)
        of '"':
          handleBlockItemStart()
-          content = ""
+          content.setLen(0)
          startToken()
          p.lexer.doubleQuotedScalar(content)
          if tag == yTagQuestionMark: tag = yTagExclamationMark
-          yield scalarEvent(content, tag, anchor)
+          yieldShallowScalar(content)
          handleObjectEnd(fpBlockAfterObject)
        of '|', '>':
          # TODO: this will scan for possible map start, which is not
          # neccessary in this case
          handleBlockItemStart()
          var stateAfter: FastParseState
-          content = ""
+          content.setLen(0)
          p.lexer.blockScalar(content, stateAfter)
          if tag == yTagQuestionMark: tag = yTagExclamationMark
-          yield scalarEvent(content, tag, anchor)
+          yieldShallowScalar(content)
          handleObjectEnd(stateAfter)
          if stateAfter == fpBlockObjectStart and
              p.lexer.buf[p.lexer.bufpos] != '#':
@ -1379,7 +1400,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
        of '-':
          if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock):
            handleBlockItemStart()
-            content = ""
+            content.setLen(0)
            startToken()
            p.lexer.plainScalar(content, cBlock)
            state = fpBlockAfterPlainScalar
@ -1401,7 +1422,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
        of '?':
          if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock):
            handleBlockItemStart()
-            content = ""
+            content.setLen(0)
            startToken()
            p.lexer.plainScalar(content, cBlock)
            state = fpBlockAfterPlainScalar
@ -1411,7 +1432,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
        of ':':
          if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock):
            handleBlockItemStart()
-            content = ""
+            content.setLen(0)
            startToken()
            p.lexer.plainScalar(content, cBlock)
            state = fpBlockAfterPlainScalar
@ -1422,7 +1443,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
          lexerError(p.lexer, "Reserved characters cannot start a plain scalar")
        else:
          handleBlockItemStart()
-          content = ""
+          content.setLen(0)
          startToken()
          p.lexer.plainScalar(content, cBlock)
          state = fpBlockAfterPlainScalar
@ -1602,19 +1623,19 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
            p.lexer.bufpos.inc()
        of '\'':
          handleFlowItemStart()
-          content = ""
+          content.setLen(0)
          startToken()
          p.lexer.singleQuotedScalar(content)
          if tag == yTagQuestionMark: tag = yTagExclamationMark
-          yield scalarEvent(content, tag, anchor)
+          yieldShallowScalar(content)
          handleObjectEnd(fpFlowAfterObject)
        of '"':
          handleFlowItemStart()
-          content = ""
+          content.setLen(0)
          startToken()
          p.lexer.doubleQuotedScalar(content)
          if tag == yTagQuestionMark: tag = yTagExclamationMark
-          yield scalarEvent(content, tag, anchor)
+          yieldShallowScalar(content)
          handleObjectEnd(fpFlowAfterObject)
        of '!':
          handleFlowItemStart()