Added benchmark. Fixed processing empty flow sets.

This commit is contained in:
Felix Krause 2015-12-29 00:14:47 +01:00
parent 7b6d9d18c5
commit d828f4ddc5
6 changed files with 202 additions and 13 deletions

2
.gitignore vendored
View File

@ -7,3 +7,5 @@ test/*.pdb
test/*.ilk test/*.ilk
yaml.html yaml.html
libyaml.* libyaml.*
bench/json

180
bench/json.nim Normal file
View File

@ -0,0 +1,180 @@
import "../yaml"
import math, strutils, stopwatch
type
ObjectKind = enum
otMap, otSequence
Level = tuple
kind: ObjectKind
len: int
proc genString(maxLen: int): string =
let len = random(maxLen)
result = "\""
var i = 0
while i < len - 1:
let c = cast[char](random(127 - 32) + 32)
case c
of '"', '\\':
result.add('\\')
result.add(c)
i += 2
else:
result.add(c)
i += 1
result.add('\"')
proc genJsonString(size: int, maxStringLen: int): string =
## Generates a random JSON string.
## size is in KiB, mayStringLen in characters.
randomize(size * maxStringLen)
result = "{"
let targetSize = size * 1024
var
indentation = 2
levels = newSeq[Level]()
curSize = 1
justOpened = true
levels.add((kind: otMap, len: 0))
while levels.len > 0:
let
objectCloseProbability =
float(levels[levels.high].len + levels.high) * 0.025
closeObject = random(1.0) <= objectCloseProbability
if (closeObject and levels.len > 1) or curSize > targetSize:
indentation -= 2
if justOpened:
justOpened = false
else:
result.add("\x0A")
result.add(repeat(' ', indentation))
curSize += indentation + 1
case levels[levels.high].kind
of otMap:
result.add('}')
of otSequence:
result.add(']')
curSize += 1
discard levels.pop()
continue
levels[levels.high].len += 1
if justOpened:
justOpened = false
result.add("\x0A")
result.add(repeat(' ', indentation))
curSize += indentation + 1
else:
result.add(",\x0A")
result.add(repeat(' ', indentation))
curSize += indentation + 2
case levels[levels.high].kind
of otMap:
let key = genString(maxStringLen)
result.add(key)
result.add(": ")
curSize += key.len + 2
of otSequence:
discard
let
objectValueProbability =
0.8 / float(levels.len * levels.len)
generateObjectValue = random(1.0) <= objectValueProbability
if generateObjectValue:
let objectKind = if random(2) == 0: otMap else: otSequence
case objectKind
of otMap:
result.add('{')
of otSequence:
result.add('[')
curSize += 1
levels.add((kind: objectKind, len: 0))
justOpened = true
indentation += 2
else:
var s: string
case random(11)
of 0..5:
s = genString(maxStringLen)
of 6..7:
s = $random(32000)
of 8..9:
s = $(random(424242.4242) - 212121.21)
of 10:
case random(3)
of 0:
s = "true"
of 1:
s = "false"
of 2:
s = "null"
else:
discard
else:
discard
result.add(s)
curSize += s.len
var cYaml1k, cYaml10k, cYaml100k, cJson1k, cJson10k, cJson100k: clock
randomize(42)
let
json1k = genJsonString(1, 32)
json10k = genJsonString(10, 32)
json100k = genJsonString(100, 32)
var s = newStringStream(json1k)
block:
bench(cYaml1k):
let res = parseToJson(s)
assert res[0].kind == JObject
s = newStringStream(json10k)
block:
bench(cYaml10k):
let res = parseToJson(s)
assert res[0].kind == JObject
s = newStringStream(json100k)
block:
bench(cYaml100k):
let res = parseToJson(s)
assert res[0].kind == JObject
block:
bench(cJson1k):
let res = parseJson(json1k)
assert res.kind == JObject
block:
bench(cJson10k):
let res = parseJson(json10k)
assert res.kind == JObject
block:
bench(cJson100k):
let res = parseJson(json100k)
assert res.kind == JObject
echo "Benchmark: Processing JSON input with YAML versus Nim's JSON implementation"
echo "==========================================================================="
echo "YAML (1k input): ", cYaml1k.nanoseconds div 1000, "μs"
echo "JSON (1k input): ", cJson1k.nanoseconds div 1000, "μs"
echo "YAML (10k input): ", cYaml10k.nanoseconds div 1000, "μs"
echo "JSON (10k input): ", cJson10k.nanoseconds div 1000, "μs"
echo "YAML (100k input): ", cYaml100k.nanoseconds div 1000, "μs"
echo "JSON (100k input): ", cJson100k.nanoseconds div 1000, "μs"

View File

@ -21,6 +21,11 @@ task parserTests, "Run parser tests":
task doc, "Generate documentation": task doc, "Generate documentation":
setCommand "doc2", "yaml" setCommand "doc2", "yaml"
task bench, "Benchmarking":
--d:release
--r
setCommand "c", "bench/json"
task clean, "Remove all generated files": task clean, "Remove all generated files":
exec "rm -f yaml.html libyaml.* test/tests test/parsing test/lexing" exec "rm -f yaml.html libyaml.* test/tests test/parsing test/lexing"
setCommand "nop" setCommand "nop"

View File

@ -164,8 +164,8 @@ proc open(my: var YamlLexer, input: Stream) =
my.indentations = newSeq[int]() my.indentations = newSeq[int]()
my.detect_encoding() my.detect_encoding()
my.content = "" my.content = ""
my.line = 0 my.line = 1
my.column = 0 my.column = 1
template yieldToken(kind: YamlLexerToken) {.dirty.} = template yieldToken(kind: YamlLexerToken) {.dirty.} =
when defined(yamlDebug): when defined(yamlDebug):
@ -205,6 +205,7 @@ template yieldLexerError(message: string) {.dirty.} =
when defined(yamlDebug): when defined(yamlDebug):
echo "Lexer error: " & message echo "Lexer error: " & message
my.content = message my.content = message
my.column = curPos
yield tError yield tError
my.content = "" my.content = ""
@ -212,13 +213,13 @@ template handleCR() {.dirty.} =
my.bufpos = lexbase.handleCR(my, my.bufpos + my.charoffset) + my.charlen - my.bufpos = lexbase.handleCR(my, my.bufpos + my.charoffset) + my.charlen -
my.charoffset - 1 my.charoffset - 1
my.line.inc() my.line.inc()
curPos = 0 curPos = 1
template handleLF() {.dirty.} = template handleLF() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
my.charlen - my.charoffset - 1 my.charlen - my.charoffset - 1
my.line.inc() my.line.inc()
curPos = 0 curPos = 1
template `or`(r: Rune, i: int): Rune = template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i) cast[Rune](cast[int](r) or i)
@ -376,7 +377,7 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
blockScalarIndentation = -1 blockScalarIndentation = -1
# when parsing a block scalar, this will be set to the indentation # when parsing a block scalar, this will be set to the indentation
# of the line that starts the flow scalar. # of the line that starts the flow scalar.
curPos = 0 curPos = 1
while true: while true:
let c = my.buf[my.bufpos + my.charoffset] let c = my.buf[my.bufpos + my.charoffset]
@ -410,12 +411,12 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylInitialContent: of ylInitialContent:
case c case c
of '-': of '-':
my.column = 0 my.column = curPos
state = ylDashes state = ylDashes
continue continue
of '.': of '.':
yieldToken(tLineStart) yieldToken(tLineStart)
my.column = 0 my.column = curPos
state = ylDots state = ylDots
continue continue
else: else:

View File

@ -766,12 +766,12 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
else: else:
discard discard
if lex.content.len > level.indentationColumn: if lex.content.len > level.indentationColumn - 1:
if blockScalar == bsFolded: if blockScalar == bsFolded:
if blockScalarTrailing == " ": if blockScalarTrailing == " ":
blockScalarTrailing = "\x0A" blockScalarTrailing = "\x0A"
scalarCache &= blockScalarTrailing & scalarCache &= blockScalarTrailing &
lex.content[level.indentationColumn..^1] lex.content[level.indentationColumn - 1..^1]
blockScalarTrailing = "" blockScalarTrailing = ""
of tScalarPart: of tScalarPart:
@ -859,8 +859,8 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
if level.mode == mUnknown: if level.mode == mUnknown:
yieldScalar("", yTypeUnknown) yieldScalar("", yTypeUnknown)
level = ancestry.pop() level = ancestry.pop()
if level.mode != mFlowMapValue: if level.mode != mFlowMapKey:
yieldUnexpectedToken() yieldUnexpectedToken($level.mode)
yield YamlStreamEvent(kind: yamlEndMap) yield YamlStreamEvent(kind: yamlEndMap)
if ancestry.len > 0: if ancestry.len > 0:
level = ancestry.pop() level = ancestry.pop()
@ -1002,3 +1002,5 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
else: else:
yieldUnexpectedToken("document end") yieldUnexpectedToken("document end")
token = nextToken(lex) token = nextToken(lex)
if token == tError:
yieldError("Lexer error: " & lex.content)

View File

@ -1,5 +1,4 @@
import "../yaml" import "../yaml"
import streams, tables
import unittest import unittest