multicodec/import-mime.py

139 lines
3.7 KiB
Python
Executable File

#!/usr/bin/env python
import csv
import os
import urllib.request
import xml.etree.ElementTree as ET
sections = {
"application": 0x200000,
"audio": 0x210000,
"font": 0x220000,
"image": 0x230000,
"message": 0x240000,
"model": 0x250000,
"multipart": 0x260000,
"text": 0x270000,
"video": 0x280000,
}
ns = {'a': 'http://www.iana.org/assignments'}
source = "https://www.iana.org/assignments/media-types/media-types.xml"
class Table(list):
def __init__(self, fname='table.csv'):
self._fname = fname
with open(fname) as table:
self.extend(csv.reader(table, skipinitialspace=True))
def save(self):
widths = {}
for row in self:
for i, cell in enumerate(row):
if len(cell) > widths.get(i, 0):
widths[i] = len(cell)
formatted = ((("" if i == 0 else " " *
(1 + widths[i - 1] - len(row[i - 1]))) + cell
for i, cell in enumerate(row)) for row in self)
tmpfname = self._fname + ".tmp"
with open(tmpfname, 'w') as table:
writer = csv.writer(table)
writer.writerows(formatted)
os.rename(tmpfname, self._fname)
def formatCode(code: int) -> str:
nbytes = 0
if code == 0:
nbytes = 1
else:
remaining = code
while remaining > 0:
remaining >>= 7
nbytes += 1
return f"0x{code:0{nbytes*2}x}"
def main():
table = Table("table.csv")
lastCode = sections.copy()
assigned = {}
mimeStart = 0
mimeEnd = 0
for mimeStart, [_, tag, _, _] in enumerate(table[1:]):
if tag == "mimetype":
break
else:
mimeStart += 1
mimeStart += 1 # initial offset
for mimeEnd, [name, tag, code,
description] in enumerate(table[mimeStart:]):
if tag != "mimetype":
break
code = int(code, 16)
assigned[name] = (code, description)
parts = name.split('/')
section = parts[0]
if section not in sections:
raise RuntimeError(f"unknown mime base type {name}")
if len(parts) == 1:
continue
elif len(parts) != 2:
raise RuntimeError(f"invalid mimetype {name}")
subtype = parts[1]
lastCode[section] += 1
if code & 0xff0000 != sections[section]:
raise RuntimeError(f"wrong section for type")
if lastCode[section] != code:
raise RuntimeError(
f"expected code 0x{lastCode[section]:x}, got 0x{code:x}")
else:
mimeEnd += 1
mimeEnd += mimeStart # initial offset
for [_, tag, _, _] in table[mimeEnd:]:
if tag == "mimetype":
raise RuntimeError(
f"did not expect an mimetype out of the mime range")
with urllib.request.urlopen(source) as f:
root = ET.parse(f).getroot()
if root.get("id") != "media-types":
raise RuntimeError("expected root node to have id 'media-types'")
for mimetype in root.iterfind(
'./a:registry/a:record/a:file',
ns,
):
mimetype = mimetype.text
if mimetype in assigned:
continue
[section, subtype] = mimetype.split('/', 1)
code = lastCode[section] + 1
lastCode[section] = code
assigned[mimetype] = (code, "")
items = [(code, name, description)
for name, (code, description) in assigned.items()]
items.sort(key=lambda item: item[0])
table[mimeStart:mimeEnd] = [(name, "mimetype", formatCode(code),
description)
for (code, name, description) in items]
table.save()
if __name__ == "__main__":
main()