mirror of
https://github.com/waku-org/nwaku.git
synced 2025-02-16 00:47:24 +00:00
64 lines
1.5 KiB
Nim
64 lines
1.5 KiB
Nim
import strutils
|
|
|
|
import unicode_data
|
|
import derived_data
|
|
import min_perfect_hash
|
|
import utils
|
|
|
|
proc parseComps(
|
|
decompsRaw: seq[string],
|
|
exclude: seq[int] # todo: make it a set
|
|
): seq[Record[seq[int]]] =
|
|
var maxCompSize = 0
|
|
for dcp in decompsRaw:
|
|
if dcp.len > 0:
|
|
inc maxCompSize
|
|
|
|
result = newSeqOfCap[Record[seq[int]]](maxCompSize)
|
|
for cp, dcp in pairs(decompsRaw):
|
|
if dcp.len == 0:
|
|
continue
|
|
if cp in exclude:
|
|
continue
|
|
if "<" in dcp: # Compatibility decomp
|
|
continue
|
|
let dcpParts = dcp.split(" ")
|
|
assert len(dcpParts) == 2
|
|
let
|
|
cp_a = parseHexInt("0x$#" % dcpParts[0])
|
|
cp_b = parseHexInt("0x$#" % dcpParts[1])
|
|
result.add((
|
|
key: @[cp_a, cp_b],
|
|
value: @[cp_a, cp_b, cp]))
|
|
|
|
const compsTemplate = """## This is auto-generated. Do not modify it
|
|
|
|
const
|
|
compsHashes* = [
|
|
$#
|
|
]
|
|
compsValues* = [
|
|
$#
|
|
]
|
|
"""
|
|
|
|
when isMainModule:
|
|
var decomps = parseComps(
|
|
parseUDDecomps("./gen/UCD/UnicodeData.txt"),
|
|
parseDNPExclusion("./gen/UCD/DerivedNormalizationProps.txt"))
|
|
var mphTables = mph(decomps)
|
|
echo mphLookup(mphTables.h, mphTables.v, [65, 768])
|
|
|
|
var compValues = newSeq[string](len(mphTables.v))
|
|
for i, v in mphTables.v:
|
|
assert len(v) == 3
|
|
compValues[i] = "[$#]" % join(v, "'i32, ")
|
|
|
|
var f = open("./src/unicodedb/compositions_data.nim", fmWrite)
|
|
try:
|
|
f.write(compsTemplate % [
|
|
prettyTable(mphTables.h, 15, "'i16"),
|
|
join(compValues, ",\L ")])
|
|
finally:
|
|
close(f)
|