Iterate over all folders, combining them all into one big book.

This commit is contained in:
Jacques Wagener 2018-02-23 13:12:59 +02:00
parent 41a84da96e
commit 6d3641559e
No known key found for this signature in database
GPG Key ID: 6ADD11FE56C539DC
6 changed files with 43 additions and 34 deletions

1
.gitignore vendored
View File

@ -108,3 +108,4 @@ venv.bak/
# mypy
.mypy_cache/
texts/

Binary file not shown.

76
main.py
View File

@ -94,13 +94,11 @@ def replace_style(el, style_renaming, document_id):
stylename = el.attributes.get(('urn:oasis:names:tc:opendocument:xmlns:text:1.0', 'style-name'))
if stylename and style_renaming.get(stylename):
print('Replacing style-name: %s with %s' % (stylename, style_renaming.get(stylename)))
el.attributes[('urn:oasis:names:tc:opendocument:xmlns:text:1.0', 'style-name')] = \
style_renaming[stylename]
parent_stylename = el.attributes.get(('urn:oasis:names:tc:opendocument:xmlns:text:1.0', 'parent-style-name'))
if parent_stylename and style_renaming.get(parent_stylename):
print('Replacing parent-style-name: %s with: %s' % (parent_stylename, style_renaming.get(parent_stylename)))
el.attributes[('urn:oasis:names:tc:opendocument:xmlns:text:1.0', 'parent-style-name')] = \
style_renaming[parent_stylename]
@ -174,48 +172,58 @@ def main():
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
# TODO Impement pagination
# TODO Implement recursive directories
results = service.files().list(
q="mimeType = 'application/vnd.google-apps.document' and '{0}' in parents".format(DRIVE_ID), # for subdirectories later
folder_results = service.files().list(
q="mimeType = 'application/vnd.google-apps.folder' and '{0}' in parents".format(DRIVE_ID),
includeTeamDriveItems=True, corpora='teamDrive', supportsTeamDrives=True, teamDriveId=DRIVE_ID,
orderBy='createdTime', pageSize=25, fields="nextPageToken, files(id, name)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
else:
print('Files:')
if os.path.exists(TEXTS_DIR):
# Clear out texts directory so it's easier to see which documents are changed in git commit
# (yes I know storing binaries in git is bad)
shutil.rmtree(TEXTS_DIR, onerror=remove_readonly)
).execute()
folders = folder_results.get('files', [])
document_id = 0
# Clear temp texts directory.
if os.path.exists(TEXTS_DIR):
shutil.rmtree(TEXTS_DIR, onerror=remove_readonly)
os.makedirs(TEXTS_DIR)
# Hacky but to prevent step iterator issue when mergning documents
doc_stream = get_document(service, items[0]['id'])
doc_file = os.path.join(TEXTS_DIR, "{0}.odt".format(items[0]['name']))
with open(doc_file, 'wb') as out:
out.write(doc_stream.getvalue())
book_of_status = OpenDocumentText()
book_of_status = None
for folder in folders:
folder_name = folder['name']
results = service.files().list(
q="mimeType = 'application/vnd.google-apps.document' and '{0}' in parents".format(folder['id']),
includeTeamDriveItems=True, corpora='teamDrive', supportsTeamDrives=True, teamDriveId=DRIVE_ID,
orderBy='createdTime', pageSize=25, fields="nextPageToken, files(id, name)"
).execute()
files = results.get('files', [])
# withbreak = Style(name="WithBreak", parentstylename="Standard", family="paragraph")
# withbreak.addElement(ParagraphProperties(breakbefore="page"))
# book_of_status.automaticstyles.addElement(withbreak)
if not files:
print('No files found.')
else:
print('Folder: {}'.format(folder_name))
print('Files:')
for doc_file in files[:]:
document_id = 0
for item in items[:]:
print('{0} ({1})'.format(item['name'], item['id']))
doc_stream = get_document(service, item['id'])
doc_file = os.path.join(TEXTS_DIR, "{0}.odt".format(item['name']))
with open(doc_file, 'wb') as out:
out.write(doc_stream.getvalue())
print('{0} ({1}) [{2}]'.format(doc_file['name'], doc_file['id'], document_id))
doc_stream = get_document(service, doc_file['id'])
doc_file = os.path.join(TEXTS_DIR, "{0}.odt".format(document_id))
with open(doc_file, 'wb') as out:
out.write(doc_stream.getvalue())
book_of_status = merge(doc_stream, book_of_status, str(document_id))
document_id += 1
# OpenDocumentText() can not be trusted to be compatible with
# the WebODF viewer rendering, causing 'Step iterator must be on a step' error.
# Instead use the first document as our source.
if document_id == 0:
book_of_status = load(doc_stream)
else:
book_of_status = merge(doc_stream, book_of_status, str(document_id))
document_id += 1
# Doesn't work well
if book_of_status:
book_of_status = replace_tokens(book_of_status)
book_of_status.save("book-of-status.odt")
if __name__ == '__main__':
main()
# book_of_status.save("book-of-status.odt")

Binary file not shown.

Binary file not shown.

Binary file not shown.