multicodec/validate.py

113 lines
3.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2019-01-18 13:47:49 +00:00
import csv
import sys
2019-04-21 21:05:19 +00:00
import re
2019-01-18 13:47:49 +00:00
# We have some duplicates
ALIAS_TABLE = [
{"ipfs", "p2p"},
]
def check(fname='table.csv'):
class CheckError(Exception):
pass
aliases = {}
for nameset in ALIAS_TABLE:
for name in nameset:
aliases[name] = nameset
success = True
with open(fname) as table:
tablereader = csv.reader(table, strict=True, skipinitialspace=False)
2019-01-18 13:47:49 +00:00
codes = {}
names = {}
headerOffsets = []
lastCode = -1
2019-01-18 13:47:49 +00:00
for line, row in enumerate(tablereader):
try:
# Check the padding of each column
offset = 0
for col, item in enumerate(row):
le = len(item)
if col == 0: # first column 0 has no padding
offset = le
continue
offset = offset + le
thisOffset = offset - len(item.lstrip())
if line == 0: # header line sets the standard
headerOffsets.append(thisOffset)
elif col < len(headerOffsets) or le != 0:
if thisOffset != headerOffsets[col - 1]:
raise CheckError(f"bad spacing at column {col}")
# Skip the header
if line == 0:
continue
2019-01-18 13:47:49 +00:00
# Check for invalid rows
2020-04-15 16:37:05 +00:00
if len(row) != 5:
raise CheckError(f"expected 4 items, got {len(row)}")
2019-01-18 13:47:49 +00:00
2020-04-15 16:37:05 +00:00
[name, _, code, _, _] = row
2019-01-18 13:47:49 +00:00
# Check for a name
2019-01-18 13:47:49 +00:00
if not name:
raise CheckError(f"empty protocol name for code '{code}'")
# Check code format
if not re.match(r"^\s*0x([0-9a-f][0-9a-f])+$", code):
2019-04-21 21:05:19 +00:00
raise CheckError(f"code for '{name}' does not look like a byte sequence: '{code}'")
2019-01-18 13:47:49 +00:00
# Check name format
if not re.match(r"^[a-z][a-z0-9_-]+$", name):
raise CheckError(f"name '{name}' violates naming restrictions")
2019-01-18 13:47:49 +00:00
# Parse the code
try:
code = int(code, 16)
2019-01-18 13:47:49 +00:00
except Exception as e:
raise CheckError(f"failed to parse code '{code}' for '{name}': {e}")
2019-01-18 13:47:49 +00:00
# Check codes are ascending
ooo = code < lastCode
lastCode = code
if ooo:
raise CheckError(f"code {code} is out of order, previous code was {lastCode}")
2019-01-18 13:47:49 +00:00
# Finally, check for duplicates
if name in names:
raise CheckError(f"found duplicate {name}: {code} and {names[name]}")
else:
names[name] = code
if code in codes:
dup = codes[code]
if name in aliases:
if dup in aliases[name]:
# Skip aliased names
continue
raise CheckError(
f"found duplicate for code {hex(code)} "
f"for '{codes[code]}' and '{name}'"
)
else:
codes[code] = name
# Reserved Code Range: Private Use Area Do not permit any codes in this range
if code in range(0x300000, 0x400000):
raise CheckError(
f"found code in Private Use Area: {hex(code)} with name '{name}'"
)
2019-01-18 13:47:49 +00:00
except CheckError as e:
success = False
print(f"row {line}: {e}", file=sys.stderr)
return success
if __name__ == "__main__":
if not check():
sys.exit(1)