Added benchmark for processing YAML input

This commit is contained in:
Felix Krause 2016-03-20 10:32:14 +01:00
parent eef247b2e8
commit 167c25af72
7 changed files with 220 additions and 26 deletions

2
.gitignore vendored
View File

@ -10,6 +10,8 @@ test/*.pdb
test/*.ilk
server/server
bench/jsonBench
bench/yamlBench
bench/bench
yaml.html
libyaml.dylib
libyaml.so

1
bench/bench.nim Normal file
View File

@ -0,0 +1 @@
import jsonBench, yamlBench

18
bench/common.nim Normal file
View File

@ -0,0 +1,18 @@
import stopwatch
template multiBench*(nanosecs: int64, times: int, body: stmt): stmt =
assert(times mod 2 == 0)
var arr: array[0..times - 1, int64]
for i in countup(0, times - 1):
var c: clock
bench(c):
body
arr[i] = c.nanoseconds()
sort(arr, cmp)
# ignore lowest and highest 10%
let tenth: int = times div 10
let lowest = arr[tenth]
var totaldiff = 0.int64
for i in countup(tenth + 1, times - tenth - 1):
totaldiff += arr[i] - lowest
nanosecs = lowest + totaldiff div (times - 2 * tenth)

View File

@ -1,4 +1,4 @@
import "../yaml"
import "../yaml", common
from nimlets_yaml import objKind
@ -6,23 +6,6 @@ import math, strutils, stopwatch, terminal, algorithm
proc cmp(left, right: clock): int = cmp(left.nanoseconds(), right.nanoseconds())
template multiBench(nanosecs: int64, times: int, body: stmt): stmt =
assert(times mod 2 == 0)
var arr: array[0..times - 1, int64]
for i in countup(0, times - 1):
var c: clock
bench(c):
body
arr[i] = c.nanoseconds()
sort(arr, cmp)
# ignore lowest and highest 10%
let tenth: int = times div 10
let lowest = arr[tenth]
var totaldiff = 0.int64
for i in countup(tenth + 1, times - tenth - 1):
totaldiff += arr[i] - lowest
nanosecs = lowest + totaldiff div (times - 2 * tenth)
type
ObjectKind = enum
otMap, otSequence

View File

@ -117,7 +117,7 @@ proc events(self: LoadContext): iterator(): yaml_event_t =
var event: yaml_event_t
while true:
if yaml_parser_parse(addr self.parser, addr event) != 1:
raise newException(Exception, "Malformed input: " & $self.parser.error)
raise newException(Exception, $self.parser.error & ": " & $self.parser.problem)
if event.typ == YAML_NO_EVENT:
break
@ -148,12 +148,11 @@ recognize[YAML_SCALAR_EVENT] = proc(self: LoadContext, event: yaml_event_t): Yam
of YAML_NULL_TAG: return YamlObj(kind : YamlObjKind.Null)
of YAML_BOOL_TAG:
result = YamlObj(kind : YamlObjKind.Bool)
if event.data.scalar.value == "true":
result.boolVal = true
elif event.data.scalar.value == "false":
result.boolVal = false
else:
assert(false, "Unknown boolean value " & $event.data.scalar.value)
case $event.data.scalar.value
of "true": result.boolVal = true
of "false": result.boolVal = false
else: assert(false,
"Unknown boolean value \"" & $event.data.scalar.value & '\"')
of YAML_INT_TAG:
return YamlObj(kind : YamlObjKind.Int, intVal : parseInt($event.data.scalar.value))
of YAML_FLOAT_TAG:

191
bench/yamlBench.nim Normal file
View File

@ -0,0 +1,191 @@
import "../yaml", common
import math, strutils, stopwatch, terminal, algorithm
from nimlets_yaml import objKind
type
Level = tuple
kind: YamlNodeKind
len: int
proc genString(maxLen: int): string =
let len = random(maxLen)
result = ""
for i in 1 .. len: result.add(cast[char](random(127 - 32) + 32))
proc genBlockString(): string =
let lines = 5 + random(10)
let flow = random(2) == 0
result = ""
for i in 1 .. lines:
let lineLen = 32 + random(12)
for i in i .. lineLen: result.add(cast[char](random(127 - 33) + 33))
result.add(if flow: ' ' else: '\l')
result.add('\l')
proc genKey(): string =
let genPossiblePlainKey = random(1.0) < 0.75
if genPossiblePlainKey:
result = ""
let len = random(24) + 1
for i in 1 .. len:
let c = random(26 + 26 + 10)
if c < 26: result.add(char(c + 65))
elif c < 52: result.add(char(c + 97 - 26))
else: result.add(char(c + 48 - 52))
else: result = genString(31) & char(random(26) + 65)
proc genYamlString(size: int, maxStringLen: int,
style: PresentationStyle): string =
## Generates a random YAML string.
## size is in KiB, mayStringLen in characters.
randomize(size * maxStringLen * ord(style))
result = "{"
let targetSize = size * 1024
var
target = newStringStream()
input = iterator(): YamlStreamEvent =
var
levels = newSeq[Level]()
curSize = 1
levels.add((kind: yMapping, len: 0))
yield startDocEvent()
yield startMapEvent()
while levels.len > 0:
let
objectCloseProbability =
float(levels[levels.high].len + levels.high) * 0.025
closeObject = random(1.0) <= objectCloseProbability
if (closeObject and levels.len > 1) or curSize > targetSize:
case levels[levels.high].kind
of yMapping: yield endMapEvent()
of ySequence: yield endSeqEvent()
else: assert(false)
curSize += 1
discard levels.pop()
continue
levels[levels.high].len += 1
if levels[levels.high].kind == yMapping:
let key = genKey()
yield scalarEvent(key)
let
objectValueProbability =
0.8 / float(levels.len * levels.len)
generateObjectValue = random(1.0) <= objectValueProbability
hasTag = random(2) == 0
var tag = yTagQuestionMark
if generateObjectValue:
let objectKind = if random(3) == 0: ySequence else: yMapping
case objectKind
of yMapping:
if hasTag: tag = yTagMapping
yield startMapEvent(tag)
of ySequence:
if hasTag: tag = yTagSequence
yield startSeqEvent(tag)
else: assert(false)
curSize += 1
levels.add((kind: objectKind, len: 0))
else:
var s: string
case random(11)
of 0..4:
s = genString(maxStringLen)
if hasTag: tag = yTagString
of 5:
s = genBlockString()
of 6..7:
s = $random(32000)
if hasTag: tag = yTagInteger
of 8..9:
s = $(random(424242.4242) - 212121.21)
if hasTag: tag = yTagFloat
of 10:
case random(3)
of 0:
s = "true"
if hasTag: tag = yTagBoolean
of 1:
s = "false"
if hasTag: tag = yTagBoolean
of 2:
s = "null"
if hasTag: tag = yTagNull
else: discard
else: discard
yield scalarEvent(s, tag)
curSize += s.len
yield endDocEvent()
var yStream = initYamlStream(input)
present(yStream, target, initExtendedTagLibrary(),
defineOptions(style=style, outputVersion=ov1_1))
result = target.data
var
cYaml1k, cYaml10k, cYaml100k, cLibYaml1k, cLibYaml10k, cLibYaml100k: int64
yaml1k = genYamlString(1, 32, psDefault)
yaml10k = genYamlString(10, 32, psDefault)
yaml100k = genYamlString(100, 32, psDefault)
tagLib = initExtendedTagLibrary()
parser = newYamlParser(tagLib)
block:
multibench(cYaml1k, 100):
var s = newStringStream(yaml1k)
let res = loadDOM(s)
assert res.root.kind == yMapping
block:
multibench(cYaml10k, 100):
var
s = newStringStream(yaml10k)
let res = loadDOM(s)
assert res.root.kind == yMapping
block:
multibench(cYaml100k, 100):
var s = newStringStream(yaml100k)
let res = loadDOM(s)
assert res.root.kind == yMapping
block:
multibench(cLibYaml1k, 100):
let res = nimlets_yaml.load(yaml1k)
assert res[0].objKind == nimlets_yaml.YamlObjKind.Map
block:
multibench(cLibYaml10k, 100):
let res = nimlets_yaml.load(yaml10k)
assert res[0].objKind == nimlets_yaml.YamlObjKind.Map
block:
multibench(cLibYaml100k, 100):
let res = nimlets_yaml.load(yaml100k)
assert res[0].objKind == nimlets_yaml.YamlObjKind.Map
proc writeResult(caption: string, num: int64) =
styledWriteLine(stdout, resetStyle, caption, fgGreen, $num, resetStyle, "μs")
setForegroundColor(fgWhite)
writeStyled "Benchmark: Processing YAML input\n"
writeStyled "================================\n"
writeStyled "1k input\n--------\n"
writeResult "NimYAML: ", cYaml1k div 1000
writeResult "LibYAML: ", cLibYaml1k div 1000
setForegroundColor(fgWhite)
writeStyled "10k input\n---------\n"
writeResult "NimYAML: ", cYaml10k div 1000
writeResult "LibYAML: ", cLibYaml10k div 1000
setForegroundColor(fgWhite)
writeStyled "100k input\n----------\n"
writeResult "NimYAML: ", cYaml100k div 1000
writeResult "LibYAML: ", cLibYaml100k div 1000

View File

@ -32,7 +32,7 @@ task bench, "Benchmarking":
--r
--w:off
--hints:off
setCommand "c", "bench/jsonBench"
setCommand "c", "bench/bench"
task clean, "Remove all generated files":
exec "rm -rf libyaml.* test/tests test/parsing test/lexing bench/json docout"