mirror of
https://github.com/status-im/NimYAML.git
synced 2025-01-12 04:24:16 +00:00
Added benchmark. Fixed processing empty flow sets.
This commit is contained in:
parent
7b6d9d18c5
commit
d828f4ddc5
2
.gitignore
vendored
2
.gitignore
vendored
@ -7,3 +7,5 @@ test/*.pdb
|
|||||||
test/*.ilk
|
test/*.ilk
|
||||||
yaml.html
|
yaml.html
|
||||||
libyaml.*
|
libyaml.*
|
||||||
|
bench/json
|
||||||
|
|
||||||
|
180
bench/json.nim
Normal file
180
bench/json.nim
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
import "../yaml"
|
||||||
|
|
||||||
|
import math, strutils, stopwatch
|
||||||
|
|
||||||
|
type
|
||||||
|
ObjectKind = enum
|
||||||
|
otMap, otSequence
|
||||||
|
|
||||||
|
Level = tuple
|
||||||
|
kind: ObjectKind
|
||||||
|
len: int
|
||||||
|
|
||||||
|
proc genString(maxLen: int): string =
|
||||||
|
let len = random(maxLen)
|
||||||
|
result = "\""
|
||||||
|
var i = 0
|
||||||
|
while i < len - 1:
|
||||||
|
let c = cast[char](random(127 - 32) + 32)
|
||||||
|
case c
|
||||||
|
of '"', '\\':
|
||||||
|
result.add('\\')
|
||||||
|
result.add(c)
|
||||||
|
i += 2
|
||||||
|
else:
|
||||||
|
result.add(c)
|
||||||
|
i += 1
|
||||||
|
result.add('\"')
|
||||||
|
|
||||||
|
proc genJsonString(size: int, maxStringLen: int): string =
|
||||||
|
## Generates a random JSON string.
|
||||||
|
## size is in KiB, mayStringLen in characters.
|
||||||
|
|
||||||
|
randomize(size * maxStringLen)
|
||||||
|
result = "{"
|
||||||
|
|
||||||
|
let targetSize = size * 1024
|
||||||
|
var
|
||||||
|
indentation = 2
|
||||||
|
levels = newSeq[Level]()
|
||||||
|
curSize = 1
|
||||||
|
justOpened = true
|
||||||
|
levels.add((kind: otMap, len: 0))
|
||||||
|
|
||||||
|
while levels.len > 0:
|
||||||
|
let
|
||||||
|
objectCloseProbability =
|
||||||
|
float(levels[levels.high].len + levels.high) * 0.025
|
||||||
|
closeObject = random(1.0) <= objectCloseProbability
|
||||||
|
|
||||||
|
if (closeObject and levels.len > 1) or curSize > targetSize:
|
||||||
|
indentation -= 2
|
||||||
|
if justOpened:
|
||||||
|
justOpened = false
|
||||||
|
else:
|
||||||
|
result.add("\x0A")
|
||||||
|
result.add(repeat(' ', indentation))
|
||||||
|
curSize += indentation + 1
|
||||||
|
case levels[levels.high].kind
|
||||||
|
of otMap:
|
||||||
|
result.add('}')
|
||||||
|
of otSequence:
|
||||||
|
result.add(']')
|
||||||
|
curSize += 1
|
||||||
|
discard levels.pop()
|
||||||
|
continue
|
||||||
|
|
||||||
|
levels[levels.high].len += 1
|
||||||
|
|
||||||
|
if justOpened:
|
||||||
|
justOpened = false
|
||||||
|
result.add("\x0A")
|
||||||
|
result.add(repeat(' ', indentation))
|
||||||
|
curSize += indentation + 1
|
||||||
|
else:
|
||||||
|
result.add(",\x0A")
|
||||||
|
result.add(repeat(' ', indentation))
|
||||||
|
curSize += indentation + 2
|
||||||
|
|
||||||
|
case levels[levels.high].kind
|
||||||
|
of otMap:
|
||||||
|
let key = genString(maxStringLen)
|
||||||
|
result.add(key)
|
||||||
|
result.add(": ")
|
||||||
|
curSize += key.len + 2
|
||||||
|
of otSequence:
|
||||||
|
discard
|
||||||
|
|
||||||
|
let
|
||||||
|
objectValueProbability =
|
||||||
|
0.8 / float(levels.len * levels.len)
|
||||||
|
generateObjectValue = random(1.0) <= objectValueProbability
|
||||||
|
|
||||||
|
if generateObjectValue:
|
||||||
|
let objectKind = if random(2) == 0: otMap else: otSequence
|
||||||
|
case objectKind
|
||||||
|
of otMap:
|
||||||
|
result.add('{')
|
||||||
|
of otSequence:
|
||||||
|
result.add('[')
|
||||||
|
curSize += 1
|
||||||
|
levels.add((kind: objectKind, len: 0))
|
||||||
|
justOpened = true
|
||||||
|
indentation += 2
|
||||||
|
else:
|
||||||
|
var s: string
|
||||||
|
case random(11)
|
||||||
|
of 0..5:
|
||||||
|
s = genString(maxStringLen)
|
||||||
|
of 6..7:
|
||||||
|
s = $random(32000)
|
||||||
|
of 8..9:
|
||||||
|
s = $(random(424242.4242) - 212121.21)
|
||||||
|
of 10:
|
||||||
|
case random(3)
|
||||||
|
of 0:
|
||||||
|
s = "true"
|
||||||
|
of 1:
|
||||||
|
s = "false"
|
||||||
|
of 2:
|
||||||
|
s = "null"
|
||||||
|
else:
|
||||||
|
discard
|
||||||
|
else:
|
||||||
|
discard
|
||||||
|
|
||||||
|
result.add(s)
|
||||||
|
curSize += s.len
|
||||||
|
|
||||||
|
var cYaml1k, cYaml10k, cYaml100k, cJson1k, cJson10k, cJson100k: clock
|
||||||
|
|
||||||
|
randomize(42)
|
||||||
|
let
|
||||||
|
json1k = genJsonString(1, 32)
|
||||||
|
json10k = genJsonString(10, 32)
|
||||||
|
json100k = genJsonString(100, 32)
|
||||||
|
|
||||||
|
var s = newStringStream(json1k)
|
||||||
|
|
||||||
|
block:
|
||||||
|
bench(cYaml1k):
|
||||||
|
let res = parseToJson(s)
|
||||||
|
assert res[0].kind == JObject
|
||||||
|
|
||||||
|
s = newStringStream(json10k)
|
||||||
|
|
||||||
|
block:
|
||||||
|
bench(cYaml10k):
|
||||||
|
let res = parseToJson(s)
|
||||||
|
assert res[0].kind == JObject
|
||||||
|
|
||||||
|
s = newStringStream(json100k)
|
||||||
|
|
||||||
|
block:
|
||||||
|
bench(cYaml100k):
|
||||||
|
let res = parseToJson(s)
|
||||||
|
assert res[0].kind == JObject
|
||||||
|
|
||||||
|
block:
|
||||||
|
bench(cJson1k):
|
||||||
|
let res = parseJson(json1k)
|
||||||
|
assert res.kind == JObject
|
||||||
|
|
||||||
|
block:
|
||||||
|
bench(cJson10k):
|
||||||
|
let res = parseJson(json10k)
|
||||||
|
assert res.kind == JObject
|
||||||
|
|
||||||
|
block:
|
||||||
|
bench(cJson100k):
|
||||||
|
let res = parseJson(json100k)
|
||||||
|
assert res.kind == JObject
|
||||||
|
|
||||||
|
echo "Benchmark: Processing JSON input with YAML versus Nim's JSON implementation"
|
||||||
|
echo "==========================================================================="
|
||||||
|
echo "YAML (1k input): ", cYaml1k.nanoseconds div 1000, "μs"
|
||||||
|
echo "JSON (1k input): ", cJson1k.nanoseconds div 1000, "μs"
|
||||||
|
echo "YAML (10k input): ", cYaml10k.nanoseconds div 1000, "μs"
|
||||||
|
echo "JSON (10k input): ", cJson10k.nanoseconds div 1000, "μs"
|
||||||
|
echo "YAML (100k input): ", cYaml100k.nanoseconds div 1000, "μs"
|
||||||
|
echo "JSON (100k input): ", cJson100k.nanoseconds div 1000, "μs"
|
@ -21,6 +21,11 @@ task parserTests, "Run parser tests":
|
|||||||
task doc, "Generate documentation":
|
task doc, "Generate documentation":
|
||||||
setCommand "doc2", "yaml"
|
setCommand "doc2", "yaml"
|
||||||
|
|
||||||
|
task bench, "Benchmarking":
|
||||||
|
--d:release
|
||||||
|
--r
|
||||||
|
setCommand "c", "bench/json"
|
||||||
|
|
||||||
task clean, "Remove all generated files":
|
task clean, "Remove all generated files":
|
||||||
exec "rm -f yaml.html libyaml.* test/tests test/parsing test/lexing"
|
exec "rm -f yaml.html libyaml.* test/tests test/parsing test/lexing"
|
||||||
setCommand "nop"
|
setCommand "nop"
|
@ -164,8 +164,8 @@ proc open(my: var YamlLexer, input: Stream) =
|
|||||||
my.indentations = newSeq[int]()
|
my.indentations = newSeq[int]()
|
||||||
my.detect_encoding()
|
my.detect_encoding()
|
||||||
my.content = ""
|
my.content = ""
|
||||||
my.line = 0
|
my.line = 1
|
||||||
my.column = 0
|
my.column = 1
|
||||||
|
|
||||||
template yieldToken(kind: YamlLexerToken) {.dirty.} =
|
template yieldToken(kind: YamlLexerToken) {.dirty.} =
|
||||||
when defined(yamlDebug):
|
when defined(yamlDebug):
|
||||||
@ -205,6 +205,7 @@ template yieldLexerError(message: string) {.dirty.} =
|
|||||||
when defined(yamlDebug):
|
when defined(yamlDebug):
|
||||||
echo "Lexer error: " & message
|
echo "Lexer error: " & message
|
||||||
my.content = message
|
my.content = message
|
||||||
|
my.column = curPos
|
||||||
yield tError
|
yield tError
|
||||||
my.content = ""
|
my.content = ""
|
||||||
|
|
||||||
@ -212,13 +213,13 @@ template handleCR() {.dirty.} =
|
|||||||
my.bufpos = lexbase.handleCR(my, my.bufpos + my.charoffset) + my.charlen -
|
my.bufpos = lexbase.handleCR(my, my.bufpos + my.charoffset) + my.charlen -
|
||||||
my.charoffset - 1
|
my.charoffset - 1
|
||||||
my.line.inc()
|
my.line.inc()
|
||||||
curPos = 0
|
curPos = 1
|
||||||
|
|
||||||
template handleLF() {.dirty.} =
|
template handleLF() {.dirty.} =
|
||||||
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
|
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
|
||||||
my.charlen - my.charoffset - 1
|
my.charlen - my.charoffset - 1
|
||||||
my.line.inc()
|
my.line.inc()
|
||||||
curPos = 0
|
curPos = 1
|
||||||
|
|
||||||
template `or`(r: Rune, i: int): Rune =
|
template `or`(r: Rune, i: int): Rune =
|
||||||
cast[Rune](cast[int](r) or i)
|
cast[Rune](cast[int](r) or i)
|
||||||
@ -376,7 +377,7 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
|
|||||||
blockScalarIndentation = -1
|
blockScalarIndentation = -1
|
||||||
# when parsing a block scalar, this will be set to the indentation
|
# when parsing a block scalar, this will be set to the indentation
|
||||||
# of the line that starts the flow scalar.
|
# of the line that starts the flow scalar.
|
||||||
curPos = 0
|
curPos = 1
|
||||||
|
|
||||||
while true:
|
while true:
|
||||||
let c = my.buf[my.bufpos + my.charoffset]
|
let c = my.buf[my.bufpos + my.charoffset]
|
||||||
@ -410,12 +411,12 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
|
|||||||
of ylInitialContent:
|
of ylInitialContent:
|
||||||
case c
|
case c
|
||||||
of '-':
|
of '-':
|
||||||
my.column = 0
|
my.column = curPos
|
||||||
state = ylDashes
|
state = ylDashes
|
||||||
continue
|
continue
|
||||||
of '.':
|
of '.':
|
||||||
yieldToken(tLineStart)
|
yieldToken(tLineStart)
|
||||||
my.column = 0
|
my.column = curPos
|
||||||
state = ylDots
|
state = ylDots
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
@ -766,12 +766,12 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
|
|||||||
else:
|
else:
|
||||||
discard
|
discard
|
||||||
|
|
||||||
if lex.content.len > level.indentationColumn:
|
if lex.content.len > level.indentationColumn - 1:
|
||||||
if blockScalar == bsFolded:
|
if blockScalar == bsFolded:
|
||||||
if blockScalarTrailing == " ":
|
if blockScalarTrailing == " ":
|
||||||
blockScalarTrailing = "\x0A"
|
blockScalarTrailing = "\x0A"
|
||||||
scalarCache &= blockScalarTrailing &
|
scalarCache &= blockScalarTrailing &
|
||||||
lex.content[level.indentationColumn..^1]
|
lex.content[level.indentationColumn - 1..^1]
|
||||||
blockScalarTrailing = ""
|
blockScalarTrailing = ""
|
||||||
|
|
||||||
of tScalarPart:
|
of tScalarPart:
|
||||||
@ -859,8 +859,8 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
|
|||||||
if level.mode == mUnknown:
|
if level.mode == mUnknown:
|
||||||
yieldScalar("", yTypeUnknown)
|
yieldScalar("", yTypeUnknown)
|
||||||
level = ancestry.pop()
|
level = ancestry.pop()
|
||||||
if level.mode != mFlowMapValue:
|
if level.mode != mFlowMapKey:
|
||||||
yieldUnexpectedToken()
|
yieldUnexpectedToken($level.mode)
|
||||||
yield YamlStreamEvent(kind: yamlEndMap)
|
yield YamlStreamEvent(kind: yamlEndMap)
|
||||||
if ancestry.len > 0:
|
if ancestry.len > 0:
|
||||||
level = ancestry.pop()
|
level = ancestry.pop()
|
||||||
@ -1002,3 +1002,5 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
|
|||||||
else:
|
else:
|
||||||
yieldUnexpectedToken("document end")
|
yieldUnexpectedToken("document end")
|
||||||
token = nextToken(lex)
|
token = nextToken(lex)
|
||||||
|
if token == tError:
|
||||||
|
yieldError("Lexer error: " & lex.content)
|
@ -1,5 +1,4 @@
|
|||||||
import "../yaml"
|
import "../yaml"
|
||||||
import streams, tables
|
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user