475 lines
19 KiB
Python
475 lines
19 KiB
Python
|
#!/usr/bin/python3
|
||
|
#
|
||
|
# Copyright (c) 2016 The Khronos Group Inc.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
# Used for automatic reflow of Vulkan spec to satisfy the agreed layout to
|
||
|
# minimize git churn. Most of the logic has to to with detecting asciidoc
|
||
|
# markup or block types that *shouldn't* be reflowed (tables, code) and
|
||
|
# ignoring them. It's very likely there are many asciidoc constructs not yet
|
||
|
# accounted for in the script, our usage of asciidoc markup is intentionally
|
||
|
# somewhat limited.
|
||
|
|
||
|
# Usage: reflow.py [-overwrite] [-out dir] [-suffix str] files
|
||
|
# -overwrite updates in place (usually not desired, and risky)
|
||
|
# -out specifies directory to create output file in, default 'out'
|
||
|
# -suffix specifies suffix to add to output files, default ''
|
||
|
# files are asciidoc source files from the Vulkan spec to reflow.
|
||
|
|
||
|
# For error and file-loading interfaces only
|
||
|
from reflib import *
|
||
|
|
||
|
import argparse, copy, os, pdb, re, string, sys
|
||
|
|
||
|
# Markup that always ends a paragraph
|
||
|
# empty line or whitespace
|
||
|
# [block options]
|
||
|
# [[anchor]]
|
||
|
# // comment
|
||
|
# <<<< page break
|
||
|
# :attribute-setting
|
||
|
# macro-directive::terms
|
||
|
# + standalone list item continuation
|
||
|
# label:: labelled list - label must be standalone
|
||
|
endPara = re.compile('^( *|\[.*\]|//.*|<<<<|:.*|[a-z]+::.*|\+|.*::)$')
|
||
|
|
||
|
# Markup that's OK in a contiguous paragraph but otherwise passed through
|
||
|
# .anything
|
||
|
# === Section Titles
|
||
|
endParaContinue = re.compile('^(\..*|=+ .*)$')
|
||
|
|
||
|
# Markup for block delimiters whose contents *should* be reformatted
|
||
|
# -- (exactly two) (open block)
|
||
|
# **** (4 or more) (sidebar block - why do we have these?!)
|
||
|
# ==== (4 or more) (example block)
|
||
|
# ____ (4 or more) (quote block)
|
||
|
blockReflow = re.compile('^(--|[*=_]{4,})$')
|
||
|
|
||
|
# Markup for block delimiters whose contents should *not* be reformatted
|
||
|
# |=== (3 or more) (table)
|
||
|
# ++++ (4 or more) (passthrough block)
|
||
|
# .... (4 or more) (literal block)
|
||
|
# //// (4 or more) (comment block)
|
||
|
# ---- (4 or more) (listing block)
|
||
|
# **** (4 or more) (sidebar block)
|
||
|
blockPassthrough = re.compile('^(\|={3,}|[-+./]{4,})$')
|
||
|
|
||
|
# Markup for introducing bullet points (hanging paragraphs)
|
||
|
# * bullet
|
||
|
# ** bullet
|
||
|
# -- bullet
|
||
|
# . bullet
|
||
|
beginBullet = re.compile('^( *[*-]+ | *\. )')
|
||
|
|
||
|
# Text that (may) not end sentences
|
||
|
|
||
|
# A single letter followed by a period, typically a middle initial.
|
||
|
endInitial = re.compile('^[[:upper:]]\.$')
|
||
|
# An abbreviation, which doesn't (usually) end a line.
|
||
|
endAbbrev = re.compile('^(e\.g|i\.e|c\.f)\.$')
|
||
|
|
||
|
# State machine for reflowing.
|
||
|
#
|
||
|
# blockStack - The last element is a line with the asciidoc block delimiter
|
||
|
# that's currently in effect, such as
|
||
|
# '--', '----', '****', '======', or '+++++++++'.
|
||
|
# This affects whether or not the block contents should be formatted.
|
||
|
# reflowStack - The last element is True or False if the current blockStack
|
||
|
# contents should be reflowed
|
||
|
# margin - margin to reflow text to
|
||
|
# para - list of lines in the paragraph being accumulated. When this is
|
||
|
# non-empty, there is a current paragraph.
|
||
|
# leadIndent - indent level (in spaces) of the first line of a paragraph.
|
||
|
# hangIndent - indent level of the remaining lines of a paragraph.
|
||
|
# file - file pointer to write to
|
||
|
# filename - base name of file being read from
|
||
|
# lineNumber - line number being read from the input file
|
||
|
# breakPeriod - True if justification should break to a new line after
|
||
|
# the end of a sentence
|
||
|
# breakInitial - True if justification should break to a new
|
||
|
class ReflowState:
|
||
|
"""Represents the state of the reflow operation"""
|
||
|
def __init__(self, filename, margin = 76, file = sys.stdout, breakPeriod = True):
|
||
|
self.blockStack = [ None ]
|
||
|
self.reflowStack = [ True ]
|
||
|
self.margin = margin
|
||
|
self.para = []
|
||
|
self.leadIndent = 0
|
||
|
self.hangIndent = 0
|
||
|
self.file = file
|
||
|
self.filename = filename
|
||
|
self.lineNumber = 0
|
||
|
self.breakPeriod = breakPeriod
|
||
|
self.breakInitial = True
|
||
|
|
||
|
def incrLineNumber(self):
|
||
|
self.lineNumber = self.lineNumber + 1
|
||
|
|
||
|
# Print an array of lines with newlines already present
|
||
|
def printLines(self, lines):
|
||
|
logDiag(':: printLines:', len(lines), 'lines: ', lines[0], end='')
|
||
|
for line in lines:
|
||
|
print(line, file=self.file, end='')
|
||
|
|
||
|
# Returns True if word ends with a sentence-period, False otherwise. Allows
|
||
|
# for contraction cases which won't end a line:
|
||
|
# - A single letter (if breakInitial is True)
|
||
|
# - Abbreviations: 'c.f.', 'e.g.', 'i.e.' (or mixed-case versions)
|
||
|
def endSentence(self,word):
|
||
|
if (word[-1:] != '.' or
|
||
|
endAbbrev.match(word, re.IGNORECASE) or
|
||
|
(self.breakInitial and endInitial.match(word))):
|
||
|
return False
|
||
|
else:
|
||
|
return True
|
||
|
|
||
|
# Reflow the current paragraph, respecting the paragraph lead and
|
||
|
# hanging indentation levels. The algorithm also respects trailing '+'
|
||
|
# signs that indicate imbedded newlines, and will not reflow a very long
|
||
|
# word immediately after a bullet point.
|
||
|
def reflowPara(self):
|
||
|
logDiag('reflowPara lead indent = ', self.leadIndent,
|
||
|
'hangIndent =', self.hangIndent,
|
||
|
'para:', self.para[0], end='')
|
||
|
|
||
|
# Total words processed (we care about the *first* word vs. others)
|
||
|
wordCount = 0
|
||
|
|
||
|
# Tracks the *previous* word processed. It must not be empty.
|
||
|
prevWord = ' '
|
||
|
|
||
|
#import pdb; pdb.set_trace()
|
||
|
|
||
|
for line in self.para:
|
||
|
line = line.rstrip()
|
||
|
words = line.split()
|
||
|
|
||
|
# logDiag('reflowPara: input line =', line)
|
||
|
numWords = len(words) - 1
|
||
|
|
||
|
for i in range(0, numWords + 1):
|
||
|
word = words[i]
|
||
|
wordLen = len(word)
|
||
|
wordCount += 1
|
||
|
|
||
|
endEscape = False
|
||
|
if (i == numWords and word == '+'):
|
||
|
# Trailing ' +' must stay on the same line
|
||
|
endEscape = word
|
||
|
# logDiag('reflowPara last word of line =', word, 'prevWord =', prevWord, 'endEscape =', endEscape)
|
||
|
else:
|
||
|
True
|
||
|
# logDiag('reflowPara wordCount =', wordCount, 'word =', word, 'prevWord =', prevWord)
|
||
|
|
||
|
if wordCount == 1:
|
||
|
# The first word of the paragraph is treated specially.
|
||
|
# The loop logic becomes trickier if all this code is
|
||
|
# done prior to looping over lines and words, so all the
|
||
|
# setup logic is done here.
|
||
|
|
||
|
outPara = []
|
||
|
outLine = ''.ljust(self.leadIndent) + word
|
||
|
outLineLen = self.leadIndent + wordLen
|
||
|
|
||
|
# If the paragraph begins with a bullet point, generate
|
||
|
# a hanging indent level if there isn't one already.
|
||
|
if beginBullet.match(self.para[0]):
|
||
|
bulletPoint = True
|
||
|
if len(self.para) > 1:
|
||
|
logDiag('reflowPara first line matches bullet point',
|
||
|
'but indent already hanging @ input line',
|
||
|
self.lineNumber)
|
||
|
else:
|
||
|
logDiag('reflowPara first line matches bullet point -'
|
||
|
'single line, assuming hangIndent @ input line',
|
||
|
self.lineNumber)
|
||
|
self.hangIndent = outLineLen + 1
|
||
|
else:
|
||
|
bulletPoint = False
|
||
|
else:
|
||
|
# Possible actions to take with this word
|
||
|
#
|
||
|
# addWord - add word to current line
|
||
|
# closeLine - append line and start a new (null) one
|
||
|
# startLine - add word to a new line
|
||
|
|
||
|
# Default behavior if all the tests below fail is to add
|
||
|
# this word to the current line, and keep accumulating
|
||
|
# that line.
|
||
|
(addWord, closeLine, startLine) = (True, False, False)
|
||
|
|
||
|
# How long would this line be if the word were added?
|
||
|
newLen = outLineLen + 1 + wordLen
|
||
|
|
||
|
# Are we on the first word following a bullet point?
|
||
|
firstBullet = (wordCount == 2 and bulletPoint)
|
||
|
|
||
|
if (endEscape):
|
||
|
# If the new word ends the input line with ' +',
|
||
|
# add it to the current line.
|
||
|
|
||
|
(addWord, closeLine, startLine) = (True, True, False)
|
||
|
elif newLen > self.margin:
|
||
|
if firstBullet:
|
||
|
# If the word follows a bullet point, add it to
|
||
|
# the current line no matter its length.
|
||
|
|
||
|
(addWord, closeLine, startLine) = (True, True, False)
|
||
|
else:
|
||
|
# The word overflows, so add it to a new line.
|
||
|
|
||
|
(addWord, closeLine, startLine) = (False, True, True)
|
||
|
elif (self.breakPeriod and
|
||
|
(wordCount > 2 or not firstBullet) and
|
||
|
self.endSentence(prevWord)):
|
||
|
# If the previous word ends a sentence and
|
||
|
# breakPeriod is set, start a new line.
|
||
|
# The complicated logic allows for leading bullet
|
||
|
# points which are periods (implicitly numbered lists).
|
||
|
# @@@ But not yet for explicitly numbered lists.
|
||
|
|
||
|
(addWord, closeLine, startLine) = (False, True, True)
|
||
|
|
||
|
# Add a word to the current line
|
||
|
if addWord:
|
||
|
if outLine:
|
||
|
outLine += ' ' + word
|
||
|
outLineLen = newLen
|
||
|
else:
|
||
|
# Fall through to startLine case if there's no
|
||
|
# current line yet.
|
||
|
startLine = True
|
||
|
|
||
|
# Add current line to the output paragraph. Force
|
||
|
# starting a new line, although we don't yet know if it
|
||
|
# will ever have contents.
|
||
|
if closeLine:
|
||
|
if outLine:
|
||
|
outPara.append(outLine + '\n')
|
||
|
outLine = None
|
||
|
|
||
|
# Start a new line and add a word to it
|
||
|
if startLine:
|
||
|
outLine = ''.ljust(self.hangIndent) + word
|
||
|
outLineLen = self.hangIndent + wordLen
|
||
|
|
||
|
# Track the previous word, for use in breaking at end of
|
||
|
# a sentence
|
||
|
prevWord = word
|
||
|
|
||
|
# Add this line to the output paragraph.
|
||
|
if (outLine):
|
||
|
outPara.append(outLine + '\n')
|
||
|
|
||
|
return outPara
|
||
|
|
||
|
# Emit a paragraph, possibly reflowing it depending on the block
|
||
|
# context. Reset the paragraph accumulator.
|
||
|
def emitPara(self):
|
||
|
if self.para != []:
|
||
|
if self.reflowStack[-1]:
|
||
|
self.printLines(self.reflowPara())
|
||
|
else:
|
||
|
self.printLines(self.para)
|
||
|
|
||
|
# Reset the paragraph, including its indentation level
|
||
|
self.para = []
|
||
|
self.leadIndent = 0
|
||
|
self.hangIndent = 0
|
||
|
|
||
|
# 'line' ends a paragraph and should itself be emitted.
|
||
|
# line may be None to indicate EOF or other exception.
|
||
|
def endPara(self, line):
|
||
|
logDiag('endPara line', self.lineNumber, ': emitting paragraph')
|
||
|
|
||
|
# Emit current paragraph, this line, and reset tracker
|
||
|
self.emitPara()
|
||
|
|
||
|
if line:
|
||
|
self.printLines( [ line ] )
|
||
|
|
||
|
# 'line' ends a paragraph (unless there's already a paragraph being
|
||
|
# accumulated, e.g. len(para) > 0 - currently not implemented)
|
||
|
def endParaContinue(self, line):
|
||
|
self.endPara(line)
|
||
|
|
||
|
# 'line' begins or ends a block. If beginning a block, tag whether or
|
||
|
# not to reflow the contents.
|
||
|
def endBlock(self, line, reflow = False):
|
||
|
self.endPara(line)
|
||
|
|
||
|
if self.blockStack[-1] == line:
|
||
|
logDiag('endBlock line', self.lineNumber,
|
||
|
': popping block end depth:', len(self.blockStack),
|
||
|
':', line, end='')
|
||
|
self.blockStack.pop()
|
||
|
self.reflowStack.pop()
|
||
|
else:
|
||
|
# Start a block
|
||
|
self.blockStack.append(line)
|
||
|
self.reflowStack.append(reflow)
|
||
|
|
||
|
logDiag('endBlock reflow =', reflow, ' line', self.lineNumber,
|
||
|
': pushing block start depth', len(self.blockStack),
|
||
|
':', line, end='')
|
||
|
|
||
|
# 'line' begins or ends a block. The paragraphs in the block *should* be
|
||
|
# reformatted (e.g. a NOTE).
|
||
|
def endParaBlockReflow(self, line):
|
||
|
self.endBlock(line, reflow = True)
|
||
|
|
||
|
# 'line' begins or ends a block. The paragraphs in the block should
|
||
|
# *not* be reformatted (e.g. a NOTE).
|
||
|
def endParaBlockPassthrough(self, line):
|
||
|
self.endBlock(line, reflow = False)
|
||
|
|
||
|
# 'line' starts or continues a paragraph.
|
||
|
# Paragraphs may have "hanging indent", e.g.
|
||
|
# * Bullet point...
|
||
|
# ... continued
|
||
|
# In this case, when the higher indentation level ends, so does the
|
||
|
# paragraph.
|
||
|
def addLine(self, line):
|
||
|
logDiag('addLine line', self.lineNumber, ':', line, end='')
|
||
|
|
||
|
# See https://stackoverflow.com/questions/13648813/what-is-the-pythonic-way-to-count-the-leading-spaces-in-a-string
|
||
|
indent = len(line) - len(line.lstrip())
|
||
|
|
||
|
# A hanging paragraph ends due to a less-indented line.
|
||
|
if self.para != [] and indent < self.hangIndent:
|
||
|
logDiag('addLine: line reduces indentation, emit paragraph')
|
||
|
self.emitPara()
|
||
|
|
||
|
# A bullet point (or something that looks like one) always ends the
|
||
|
# current paragraph.
|
||
|
if beginBullet.match(line):
|
||
|
logDiag('addLine: line matches beginBullet, emit paragraph')
|
||
|
self.emitPara()
|
||
|
|
||
|
if self.para == []:
|
||
|
# Begin a new paragraph
|
||
|
self.para = [ line ]
|
||
|
self.leadIndent = indent
|
||
|
self.hangIndent = indent
|
||
|
else:
|
||
|
# Add a line to a paragraph. Increase the hanging indentation
|
||
|
# level - once.
|
||
|
if self.hangIndent == self.leadIndent:
|
||
|
self.hangIndent = indent
|
||
|
self.para.append(line)
|
||
|
|
||
|
def reflowFile(filename, overwrite, outDir, suffix):
|
||
|
logDiag('reflow: filename', filename)
|
||
|
|
||
|
lines = loadFile(filename)
|
||
|
if (lines == None):
|
||
|
return
|
||
|
|
||
|
# Output file handle and reflow object for this file. There are no race
|
||
|
# conditions on overwriting the input, but it's not recommended unless
|
||
|
# you have backing store such as git.
|
||
|
|
||
|
if overwrite:
|
||
|
outFilename = filename
|
||
|
else:
|
||
|
outFilename = outDir + '/' + os.path.basename(filename) + suffix
|
||
|
|
||
|
try:
|
||
|
fp = open(outFilename, 'w')
|
||
|
except:
|
||
|
logWarn('Cannot open output file', filename, ':', sys.exc_info()[0])
|
||
|
return None
|
||
|
|
||
|
state = ReflowState(filename, file = fp)
|
||
|
|
||
|
for line in lines:
|
||
|
state.incrLineNumber()
|
||
|
|
||
|
# The logic here is broken. If we're in a non-reflowable block and
|
||
|
# this line *doesn't* end the block, it should always be
|
||
|
# accumulated.
|
||
|
|
||
|
if endPara.match(line):
|
||
|
# Ending a paragraph. Emit the current paragraph, if any, and
|
||
|
# prepare to begin a new paragraph.
|
||
|
|
||
|
state.endPara(line)
|
||
|
elif endParaContinue.match(line):
|
||
|
# For now, always just end the paragraph.
|
||
|
# Could check see if len(para) > 0 to accumulate.
|
||
|
|
||
|
state.endParaContinue(line)
|
||
|
elif blockReflow.match(line):
|
||
|
# Starting or ending a block whose contents may be reflowed.
|
||
|
# Blocks cannot be nested.
|
||
|
|
||
|
state.endParaBlockReflow(line)
|
||
|
elif blockPassthrough.match(line):
|
||
|
# Starting or ending a block whose contents must not be reflowed.
|
||
|
# These are tables, etc. Blocks cannot be nested.
|
||
|
|
||
|
state.endParaBlockPassthrough(line)
|
||
|
else:
|
||
|
# Just accumulate a line to the current paragraph. Watch out for
|
||
|
# hanging indents / bullet-points and track that indent level.
|
||
|
|
||
|
state.addLine(line)
|
||
|
|
||
|
# Cleanup at end of file
|
||
|
state.endPara(None)
|
||
|
|
||
|
# Sanity check on block nesting
|
||
|
if len(state.blockStack) > 1:
|
||
|
logWarn('file', filename,
|
||
|
'mismatched asciidoc block delimiters at EOF:',
|
||
|
state.blockStack[-1])
|
||
|
|
||
|
fp.close()
|
||
|
|
||
|
# Patterns used to recognize interesting lines in an asciidoc source file.
|
||
|
# These patterns are only compiled once.
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = argparse.ArgumentParser()
|
||
|
|
||
|
parser.add_argument('-diag', action='store', dest='diagFile',
|
||
|
help='Set the diagnostic file')
|
||
|
parser.add_argument('-warn', action='store', dest='warnFile',
|
||
|
help='Set the warning file')
|
||
|
parser.add_argument('-log', action='store', dest='logFile',
|
||
|
help='Set the log file for both diagnostics and warnings')
|
||
|
parser.add_argument('-overwrite', action='store_true',
|
||
|
help='Overwrite input filenames instead of writing different output filenames')
|
||
|
parser.add_argument('-out', action='store', dest='outDir',
|
||
|
default='out',
|
||
|
help='Set the output directory in which updated files are generated (default: out)')
|
||
|
parser.add_argument('-suffix', action='store', dest='suffix',
|
||
|
default='',
|
||
|
help='Set the suffix added to updated file names (default: none)')
|
||
|
parser.add_argument('files', metavar='filename', nargs='*',
|
||
|
help='a filename to reflow text in')
|
||
|
parser.add_argument('--version', action='version', version='%(prog)s 1.0')
|
||
|
|
||
|
results = parser.parse_args()
|
||
|
|
||
|
setLogFile(True, True, results.logFile)
|
||
|
setLogFile(True, False, results.diagFile)
|
||
|
setLogFile(False, True, results.warnFile)
|
||
|
|
||
|
if (results.overwrite):
|
||
|
logWarn('reflow.py: will overwrite all input files')
|
||
|
|
||
|
for file in results.files:
|
||
|
reflowFile(file, results.overwrite, results.outDir, results.suffix)
|