From 167c25af72eea69ee88708a2c82bf95801ffaa44 Mon Sep 17 00:00:00 2001 From: Felix Krause Date: Sun, 20 Mar 2016 10:32:14 +0100 Subject: [PATCH] Added benchmark for processing YAML input --- .gitignore | 2 + bench/bench.nim | 1 + bench/common.nim | 18 ++++ bench/jsonBench.nim | 19 +--- bench/nimlets_yaml.nim | 13 ++- bench/yamlBench.nim | 191 +++++++++++++++++++++++++++++++++++++++++ config.nims | 2 +- 7 files changed, 220 insertions(+), 26 deletions(-) create mode 100644 bench/bench.nim create mode 100644 bench/common.nim create mode 100644 bench/yamlBench.nim diff --git a/.gitignore b/.gitignore index d7182fc..d8eb3ab 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ test/*.pdb test/*.ilk server/server bench/jsonBench +bench/yamlBench +bench/bench yaml.html libyaml.dylib libyaml.so diff --git a/bench/bench.nim b/bench/bench.nim new file mode 100644 index 0000000..a02a5a5 --- /dev/null +++ b/bench/bench.nim @@ -0,0 +1 @@ +import jsonBench, yamlBench \ No newline at end of file diff --git a/bench/common.nim b/bench/common.nim new file mode 100644 index 0000000..4c01a4d --- /dev/null +++ b/bench/common.nim @@ -0,0 +1,18 @@ +import stopwatch + +template multiBench*(nanosecs: int64, times: int, body: stmt): stmt = + assert(times mod 2 == 0) + var arr: array[0..times - 1, int64] + for i in countup(0, times - 1): + var c: clock + bench(c): + body + arr[i] = c.nanoseconds() + sort(arr, cmp) + # ignore lowest and highest 10% + let tenth: int = times div 10 + let lowest = arr[tenth] + var totaldiff = 0.int64 + for i in countup(tenth + 1, times - tenth - 1): + totaldiff += arr[i] - lowest + nanosecs = lowest + totaldiff div (times - 2 * tenth) \ No newline at end of file diff --git a/bench/jsonBench.nim b/bench/jsonBench.nim index d846ed9..06b2867 100644 --- a/bench/jsonBench.nim +++ b/bench/jsonBench.nim @@ -1,4 +1,4 @@ -import "../yaml" +import "../yaml", common from nimlets_yaml import objKind @@ -6,23 +6,6 @@ import math, strutils, stopwatch, terminal, algorithm proc cmp(left, right: clock): int = cmp(left.nanoseconds(), right.nanoseconds()) -template multiBench(nanosecs: int64, times: int, body: stmt): stmt = - assert(times mod 2 == 0) - var arr: array[0..times - 1, int64] - for i in countup(0, times - 1): - var c: clock - bench(c): - body - arr[i] = c.nanoseconds() - sort(arr, cmp) - # ignore lowest and highest 10% - let tenth: int = times div 10 - let lowest = arr[tenth] - var totaldiff = 0.int64 - for i in countup(tenth + 1, times - tenth - 1): - totaldiff += arr[i] - lowest - nanosecs = lowest + totaldiff div (times - 2 * tenth) - type ObjectKind = enum otMap, otSequence diff --git a/bench/nimlets_yaml.nim b/bench/nimlets_yaml.nim index d264d8c..afd465a 100644 --- a/bench/nimlets_yaml.nim +++ b/bench/nimlets_yaml.nim @@ -117,7 +117,7 @@ proc events(self: LoadContext): iterator(): yaml_event_t = var event: yaml_event_t while true: if yaml_parser_parse(addr self.parser, addr event) != 1: - raise newException(Exception, "Malformed input: " & $self.parser.error) + raise newException(Exception, $self.parser.error & ": " & $self.parser.problem) if event.typ == YAML_NO_EVENT: break @@ -148,12 +148,11 @@ recognize[YAML_SCALAR_EVENT] = proc(self: LoadContext, event: yaml_event_t): Yam of YAML_NULL_TAG: return YamlObj(kind : YamlObjKind.Null) of YAML_BOOL_TAG: result = YamlObj(kind : YamlObjKind.Bool) - if event.data.scalar.value == "true": - result.boolVal = true - elif event.data.scalar.value == "false": - result.boolVal = false - else: - assert(false, "Unknown boolean value " & $event.data.scalar.value) + case $event.data.scalar.value + of "true": result.boolVal = true + of "false": result.boolVal = false + else: assert(false, + "Unknown boolean value \"" & $event.data.scalar.value & '\"') of YAML_INT_TAG: return YamlObj(kind : YamlObjKind.Int, intVal : parseInt($event.data.scalar.value)) of YAML_FLOAT_TAG: diff --git a/bench/yamlBench.nim b/bench/yamlBench.nim new file mode 100644 index 0000000..88fbe3a --- /dev/null +++ b/bench/yamlBench.nim @@ -0,0 +1,191 @@ +import "../yaml", common +import math, strutils, stopwatch, terminal, algorithm + +from nimlets_yaml import objKind + +type + Level = tuple + kind: YamlNodeKind + len: int + +proc genString(maxLen: int): string = + let len = random(maxLen) + result = "" + for i in 1 .. len: result.add(cast[char](random(127 - 32) + 32)) + +proc genBlockString(): string = + let lines = 5 + random(10) + let flow = random(2) == 0 + result = "" + for i in 1 .. lines: + let lineLen = 32 + random(12) + for i in i .. lineLen: result.add(cast[char](random(127 - 33) + 33)) + result.add(if flow: ' ' else: '\l') + result.add('\l') + +proc genKey(): string = + let genPossiblePlainKey = random(1.0) < 0.75 + if genPossiblePlainKey: + result = "" + let len = random(24) + 1 + for i in 1 .. len: + let c = random(26 + 26 + 10) + if c < 26: result.add(char(c + 65)) + elif c < 52: result.add(char(c + 97 - 26)) + else: result.add(char(c + 48 - 52)) + else: result = genString(31) & char(random(26) + 65) + +proc genYamlString(size: int, maxStringLen: int, + style: PresentationStyle): string = + ## Generates a random YAML string. + ## size is in KiB, mayStringLen in characters. + + randomize(size * maxStringLen * ord(style)) + result = "{" + + let targetSize = size * 1024 + var + target = newStringStream() + input = iterator(): YamlStreamEvent = + var + levels = newSeq[Level]() + curSize = 1 + levels.add((kind: yMapping, len: 0)) + yield startDocEvent() + yield startMapEvent() + + while levels.len > 0: + let + objectCloseProbability = + float(levels[levels.high].len + levels.high) * 0.025 + closeObject = random(1.0) <= objectCloseProbability + + if (closeObject and levels.len > 1) or curSize > targetSize: + case levels[levels.high].kind + of yMapping: yield endMapEvent() + of ySequence: yield endSeqEvent() + else: assert(false) + curSize += 1 + discard levels.pop() + continue + + levels[levels.high].len += 1 + if levels[levels.high].kind == yMapping: + let key = genKey() + yield scalarEvent(key) + + let + objectValueProbability = + 0.8 / float(levels.len * levels.len) + generateObjectValue = random(1.0) <= objectValueProbability + hasTag = random(2) == 0 + var tag = yTagQuestionMark + + if generateObjectValue: + let objectKind = if random(3) == 0: ySequence else: yMapping + case objectKind + of yMapping: + if hasTag: tag = yTagMapping + yield startMapEvent(tag) + of ySequence: + if hasTag: tag = yTagSequence + yield startSeqEvent(tag) + else: assert(false) + curSize += 1 + levels.add((kind: objectKind, len: 0)) + else: + var s: string + case random(11) + of 0..4: + s = genString(maxStringLen) + if hasTag: tag = yTagString + of 5: + s = genBlockString() + of 6..7: + s = $random(32000) + if hasTag: tag = yTagInteger + of 8..9: + s = $(random(424242.4242) - 212121.21) + if hasTag: tag = yTagFloat + of 10: + case random(3) + of 0: + s = "true" + if hasTag: tag = yTagBoolean + of 1: + s = "false" + if hasTag: tag = yTagBoolean + of 2: + s = "null" + if hasTag: tag = yTagNull + else: discard + else: discard + + yield scalarEvent(s, tag) + curSize += s.len + yield endDocEvent() + var yStream = initYamlStream(input) + present(yStream, target, initExtendedTagLibrary(), + defineOptions(style=style, outputVersion=ov1_1)) + result = target.data + +var + cYaml1k, cYaml10k, cYaml100k, cLibYaml1k, cLibYaml10k, cLibYaml100k: int64 + yaml1k = genYamlString(1, 32, psDefault) + yaml10k = genYamlString(10, 32, psDefault) + yaml100k = genYamlString(100, 32, psDefault) + tagLib = initExtendedTagLibrary() + parser = newYamlParser(tagLib) + +block: + multibench(cYaml1k, 100): + var s = newStringStream(yaml1k) + let res = loadDOM(s) + assert res.root.kind == yMapping + +block: + multibench(cYaml10k, 100): + var + s = newStringStream(yaml10k) + let res = loadDOM(s) + assert res.root.kind == yMapping + +block: + multibench(cYaml100k, 100): + var s = newStringStream(yaml100k) + let res = loadDOM(s) + assert res.root.kind == yMapping + +block: + multibench(cLibYaml1k, 100): + let res = nimlets_yaml.load(yaml1k) + assert res[0].objKind == nimlets_yaml.YamlObjKind.Map + +block: + multibench(cLibYaml10k, 100): + let res = nimlets_yaml.load(yaml10k) + assert res[0].objKind == nimlets_yaml.YamlObjKind.Map + +block: + multibench(cLibYaml100k, 100): + let res = nimlets_yaml.load(yaml100k) + assert res[0].objKind == nimlets_yaml.YamlObjKind.Map + +proc writeResult(caption: string, num: int64) = + styledWriteLine(stdout, resetStyle, caption, fgGreen, $num, resetStyle, "μs") + +setForegroundColor(fgWhite) + +writeStyled "Benchmark: Processing YAML input\n" +writeStyled "================================\n" +writeStyled "1k input\n--------\n" +writeResult "NimYAML: ", cYaml1k div 1000 +writeResult "LibYAML: ", cLibYaml1k div 1000 +setForegroundColor(fgWhite) +writeStyled "10k input\n---------\n" +writeResult "NimYAML: ", cYaml10k div 1000 +writeResult "LibYAML: ", cLibYaml10k div 1000 +setForegroundColor(fgWhite) +writeStyled "100k input\n----------\n" +writeResult "NimYAML: ", cYaml100k div 1000 +writeResult "LibYAML: ", cLibYaml100k div 1000 \ No newline at end of file diff --git a/config.nims b/config.nims index 872da61..fe7eabb 100644 --- a/config.nims +++ b/config.nims @@ -32,7 +32,7 @@ task bench, "Benchmarking": --r --w:off --hints:off - setCommand "c", "bench/jsonBench" + setCommand "c", "bench/bench" task clean, "Remove all generated files": exec "rm -rf libyaml.* test/tests test/parsing test/lexing bench/json docout"