mirror of
https://github.com/waku-org/nwaku.git
synced 2025-02-22 20:08:40 +00:00
91 lines
1.7 KiB
Nim
91 lines
1.7 KiB
Nim
## East Asian Width - tr11 (http://www.unicode.org/reports/tr11/)
|
|
|
|
import algorithm
|
|
import strutils
|
|
|
|
import derived_data
|
|
import unicode_data
|
|
import two_stage_table
|
|
import utils
|
|
|
|
const maxCP = 0x10FFFF
|
|
|
|
type
|
|
UnicodeWidth = enum
|
|
uwdtAmbiguous = 0x01 # A
|
|
uwdtFull = 0x02 # F
|
|
uwdtHalf = 0x04 # H
|
|
uwdtNarrow = 0x08 # Na
|
|
uwdtWide = 0x10 # W
|
|
uwdtNeutral = 0x20 # N
|
|
|
|
proc widthMap(uwdt: string): int =
|
|
case uwdt
|
|
of "A":
|
|
uwdtAmbiguous.ord
|
|
of "F":
|
|
uwdtFull.ord
|
|
of "H":
|
|
uwdtHalf.ord
|
|
of "Na":
|
|
uwdtNarrow.ord
|
|
of "W":
|
|
uwdtWide.ord
|
|
of "N":
|
|
uwdtNeutral.ord
|
|
else:
|
|
raise newException(ValueError, "Bad value: " & uwdt)
|
|
|
|
proc parseWidth(data: seq[seq[string]]): seq[int] =
|
|
result = newSeq[int](maxCP+1)
|
|
result.fill("N".widthMap())
|
|
for cp, d in data:
|
|
if d.len == 0: continue
|
|
result[cp] = d[0].widthMap()
|
|
|
|
proc parse(filePath: string): seq[int] =
|
|
filePath.parseUDDNoDups.parseWidth
|
|
|
|
proc build(data: seq[int]): Stages[int] =
|
|
buildTwoStageTable(data)
|
|
|
|
const dataTemplate = """## This is auto-generated. Do not modify it
|
|
|
|
type
|
|
UnicodeWidth* = enum
|
|
uwdtAmbiguous = $#
|
|
uwdtFull = $#
|
|
uwdtHalf = $#
|
|
uwdtNarrow = $#
|
|
uwdtWide = $#
|
|
uwdtNeutral = $#
|
|
|
|
const
|
|
widthsIndices* = [
|
|
$#
|
|
]
|
|
widthsData* = [
|
|
$#
|
|
]
|
|
|
|
blockSize* = $#
|
|
"""
|
|
|
|
when isMainModule:
|
|
let stages = "./gen/UCD/EastAsianWidth.txt".parse.build
|
|
|
|
var f = open("./src/unicodedb/widths_data.nim", fmWrite)
|
|
try:
|
|
f.write(dataTemplate % [
|
|
$uwdtAmbiguous.ord,
|
|
$uwdtFull.ord,
|
|
$uwdtHalf.ord,
|
|
$uwdtNarrow.ord,
|
|
$uwdtWide.ord,
|
|
$uwdtNeutral.ord,
|
|
prettyTable(stages.stage1, 15, "'i8"),
|
|
prettyTable(stages.stage2, 15, "'i8"),
|
|
$stages.blockSize])
|
|
finally:
|
|
close(f)
|