*** WIP ***

Moving files to the filesystem
Tools for reading/writing metadata for categories and specs
This commit is contained in:
mike cullerton 2021-12-16 08:41:49 -05:00
parent e62a99af96
commit 338352017b
4 changed files with 430 additions and 1 deletions

View File

@ -20,7 +20,7 @@ from crc import session, app
from crc.api.common import ApiError from crc.api.common import ApiError
from crc.models.data_store import DataStoreModel from crc.models.data_store import DataStoreModel
from crc.models.file import FileType, FileDataModel, FileModel, LookupFileModel, LookupDataModel from crc.models.file import FileType, FileDataModel, FileModel, LookupFileModel, LookupDataModel
from crc.models.workflow import WorkflowSpecModel, WorkflowModel, WorkflowSpecDependencyFile, WorkflowLibraryModel from crc.models.workflow import WorkflowSpecModel, WorkflowModel, WorkflowSpecDependencyFile, WorkflowLibraryModel, WorkflowSpecModelSchema, WorkflowSpecCategoryModel, WorkflowSpecCategoryModelSchema
from crc.services.cache_service import cache from crc.services.cache_service import cache
from crc.services.user_service import UserService from crc.services.user_service import UserService
import re import re
@ -593,3 +593,81 @@ class FileService(object):
else: else:
raise ApiError(code='bad_keep', raise ApiError(code='bad_keep',
message='You must keep at least 1 version') message='You must keep at least 1 version')
# @staticmethod
# def write_file_to_system(file, category):
# file_path = os.path.join(app.root_path, '..', 'files')
#
# print(f'write_file_to_system: file_path: {file_path}')
@staticmethod
def write_file_to_system(file_model):
SYNC_FILE_ROOT = os.path.join(app.root_path, '..', 'files')
def process_category(category):
# Make sure a directory exists for the category
# Add a json file dumped from the category model
category_path = os.path.join(SYNC_FILE_ROOT, category.display_name)
os.makedirs(os.path.dirname(category_path), exist_ok=True)
json_file_name = f'{category.display_name}.json'
json_file_path = os.path.join(SYNC_FILE_ROOT, json_file_name)
category_model_schema = WorkflowSpecCategoryModelSchema().dumps(category)
with open(json_file_path, 'w') as j_handle:
j_handle.write(category_model_schema)
def process_workflow_spec(workflow_spec, category_name_string):
# Make sure a directory exists for the workflow spec
# Add a json file dumped from the workflow spec model
workflow_spec_path = os.path.join(SYNC_FILE_ROOT, category_name_string, workflow_spec.display_name)
os.makedirs(os.path.dirname(workflow_spec_path), exist_ok=True)
json_file_name = f'{workflow_spec.display_name}.json'
json_file_path = os.path.join(SYNC_FILE_ROOT, category_name_string, json_file_name)
workflow_spec_schema = WorkflowSpecModelSchema().dumps(workflow_spec)
with open(json_file_path, 'w') as j_handle:
j_handle.write(workflow_spec_schema)
file_path = category_name = None
if file_model.workflow_spec_id is not None:
# we have a workflow spec file
workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == file_model.workflow_spec_id).first()
if workflow_spec_model:
if workflow_spec_model.category_id is not None:
category_model = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id == workflow_spec_model.category_id).first()
process_category(category_model)
process_workflow_spec(workflow_spec_model, category_model.display_name)
category_name = category_model.display_name
elif workflow_spec_model.is_master_spec:
category_name = 'Master Specification'
elif workflow_spec_model.library:
category_name = 'Library Specs'
if category_name is not None:
# ensure_category_folder_exists(category_name)
# ensure_spec_folder_exists(workflow_spec_model.display_name)
file_path = os.path.join(SYNC_FILE_ROOT,
category_name,
workflow_spec_model.display_name,
file_model.name)
elif file_model.workflow_id is not None:
# we have a workflow file
pass
elif file_model.is_reference:
# we have a reference file?
print(f'Reference file: {file_model.name}')
else:
print(f'Not processed: {file_model.name}')
if file_path is not None:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
file_data_model = session.query(FileDataModel). \
filter(FileDataModel.file_model_id == file_model.id). \
order_by(desc(FileDataModel.version)). \
first()
with open(file_path, 'wb') as f_handle:
f_handle.write(file_data_model.data)
# print(f'write_file_to_system: file_path: {file_path}')

View File

@ -0,0 +1,192 @@
"""Move files to filesystem
Revision ID: 7225d990740e
Revises: 44dd9397c555
Create Date: 2021-12-14 10:52:50.785342
"""
import json
from alembic import op
import sqlalchemy as sa
from crc import app, db, session
# from crc.models.file import FileModel, FileDataModel
# from crc.models.workflow import WorkflowSpecModel, WorkflowSpecModelSchema, WorkflowSpecCategoryModel, WorkflowSpecCategoryModelSchema
# import os
#
# from crc.services.file_service import FileService
# revision identifiers, used by Alembic.
revision = '7225d990740e'
down_revision = '44dd9397c555'
branch_labels = None
depends_on = None
class TempCategoryModel(db.Model):
__tablename__ = 'temp_category'
id = db.Column(db.Integer, primary_key=True)
display_name = db.Column(db.String)
display_order = db.Column(db.Integer)
admin = db.Column(db.Boolean)
class TempSpecModel(db.Model):
__tablename__ = 'temp_spec'
id = db.Column(db.String, primary_key=True)
display_name = db.Column(db.String)
display_order = db.Column(db.Integer, nullable=True)
description = db.Column(db.Text)
category_id = db.Column(db.Integer, db.ForeignKey('workflow_spec_category.id'), nullable=True)
category = db.relationship("WorkflowSpecCategoryModel")
is_master_spec = db.Column(db.Boolean, default=False)
standalone = db.Column(db.Boolean, default=False)
library = db.Column(db.Boolean, default=False)
#
# def process_directory(directory):
# files = []
# directories = []
# directory_items = os.scandir(directory)
# for item in directory_items:
# if item.is_dir():
# directories.append(item)
# elif item.is_file():
# files.append(item)
#
# return files, directories
#
#
# def process_workflow_spec(json_file, directory):
# file_path = os.path.join(directory, json_file)
#
# with open(file_path, 'r') as f_open:
# data = f_open.read()
# data_obj = json.loads(data)
# workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.display_name==data_obj['display_name']).first()
# if not workflow_spec_model:
# workflow_spec_model = WorkflowSpecModel(display_name=data_obj['display_name'],
# description=data_obj['description'],
# is_master_spec=data_obj['is_master_spec'],
# category_id=data_obj['category_id'],
# display_order=data_obj['display_order'],
# standalone=data_obj['standalone'],
# library=data_obj['library'])
# session.add(workflow_spec_model)
#
# # session.commit()
#
# print(f'process_workflow_spec: workflow_spec_model: {workflow_spec_model}')
# return workflow_spec_model
#
#
# def process_workflow_spec_files():
# pass
#
#
# def process_category(json_file, root):
# print(f'process_category: json_file: {json_file}')
# file_path = os.path.join(root, json_file)
#
# with open(file_path, 'r') as f_open:
# data = f_open.read()
# data_obj = json.loads(data)
# category = session.query(TempCategoryModel).filter(
# TempCategoryModel.display_name == data_obj['display_name']).first()
# if not category:
# category = TempCategoryModel(display_name=data_obj['display_name'],
# display_order=data_obj['display_order'],
# admin=data_obj['admin'])
# session.add(category)
# else:
# category.display_order = data_obj['display_order']
# category.admin = data_obj['admin']
# # print(data)
# print(f'process_category: category: {category}')
#
# session.commit()
# return category
#
#
# def process_workflow_spec_directory(spec_directory):
# print(f'process_workflow_spec_directory: {spec_directory}')
# files, directories = process_directory(spec_directory)
#
# for file in files:
# print(f'process_workflow_spec_directory: file: {file}')
#
#
# def process_category_directory(category_directory):
# print(f'process_category_directory: {category_directory}')
# files, directories = process_directory(category_directory)
#
# for file in files:
# if file.name.endswith('.json'):
# workflow_spec = process_workflow_spec(file, category_directory)
#
# for workflow_spec_directory in directories:
# directory_path = os.path.join(category_directory, workflow_spec_directory)
# process_workflow_spec_directory(directory_path)
#
#
# def process_root_directory(root_directory):
#
# files, directories = process_directory(root_directory)
# for file in files:
# if file.name.endswith('.json'):
# category_model = process_category(file, root_directory)
#
# for directory in directories:
# directory_path = os.path.join(root_directory, directory)
# process_category_directory(directory_path)
#
#
# def update_file_metadata_from_filesystem(root_directory):
# process_root_directory(root_directory)
def temp_tables():
op.create_table('temp_category',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('display_name', sa.String(), nullable=True),
sa.Column('display_order', sa.String(), nullable=True),
sa.Column('admin', sa.Boolean(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('temp_spec',
sa.Column('id', sa.String(), nullable=False),
sa.Column('display_name', sa.String()),
sa.Column('description', sa.Text()),
sa.Column('is_master_spec', sa.Boolean(), nullable=True),
sa.Column('category_id', sa.Integer(), nullable=True),
sa.Column('category', sa.Integer(), nullable=True),
sa.Column('display_order', sa.Integer(), nullable=True),
sa.Column('standalone', sa.Boolean(), nullable=True),
sa.Column('library', sa.Boolean(), nullable=True),
sa.ForeignKeyConstraint(['category_id'], ['temp_category.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_foreign_key(None, 'temp_spec', 'temp_category', ['category'], ['id'])
def upgrade():
files = session.query(FileModel).all()
for file in files:
if file.archived is not True:
FileService.write_file_to_system(file)
print('upgrade: done: ')
def downgrade():
temp_tables()
print(f'temp category count: {session.query(TempCategoryModel).count()}')
# Update DB from the filesystem
SYNC_FILE_ROOT = os.path.join(app.root_path, '..', 'files')
update_file_metadata_from_filesystem(SYNC_FILE_ROOT)
print('downgrade: ')

View File

@ -0,0 +1,125 @@
from tests.base_test import BaseTest
from crc import app, session
from crc.models.file import FileModel, FileDataModel
from crc.models.workflow import WorkflowSpecModel, WorkflowSpecModelSchema, WorkflowSpecCategoryModel, WorkflowSpecCategoryModelSchema
from crc.services.workflow_service import WorkflowService
import os
import json
from crc.services.file_service import FileService
def process_directory(directory):
files = []
directories = []
directory_items = os.scandir(directory)
for item in directory_items:
if item.is_dir():
directories.append(item)
elif item.is_file():
files.append(item)
return files, directories
def process_workflow_spec(json_file, directory):
file_path = os.path.join(directory, json_file)
with open(file_path, 'r') as f_open:
data = f_open.read()
data_obj = json.loads(data)
workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.display_name==data_obj['display_name']).first()
if not workflow_spec_model:
category_id = session.query(WorkflowSpecCategoryModel.id).filter(WorkflowSpecCategoryModel.display_name==data_obj['display_name']).scalar()
workflow_spec_model = WorkflowSpecModel(id=data_obj['id'],
display_name=data_obj['display_name'],
description=data_obj['description'],
is_master_spec=data_obj['is_master_spec'],
category_id=category_id,
display_order=data_obj['display_order'],
standalone=data_obj['standalone'],
library=data_obj['library'])
session.add(workflow_spec_model)
session.commit()
print(f'process_workflow_spec: workflow_spec_model: {workflow_spec_model}')
return workflow_spec_model
def process_workflow_spec_files():
pass
def process_category(json_file, root):
print(f'process_category: json_file: {json_file}')
file_path = os.path.join(root, json_file)
with open(file_path, 'r') as f_open:
data = f_open.read()
data_obj = json.loads(data)
category = session.query(WorkflowSpecCategoryModel).filter(
WorkflowSpecCategoryModel.display_name == data_obj['display_name']).first()
if not category:
category = WorkflowSpecCategoryModel(display_name=data_obj['display_name'],
display_order=data_obj['display_order'],
admin=data_obj['admin'])
session.add(category)
else:
category.display_order = data_obj['display_order']
category.admin = data_obj['admin']
# print(data)
print(f'process_category: category: {category}')
session.commit()
return category
def process_workflow_spec_directory(spec_directory):
print(f'process_workflow_spec_directory: {spec_directory}')
files, directories = process_directory(spec_directory)
for file in files:
print(f'process_workflow_spec_directory: file: {file}')
def process_category_directory(category_directory):
print(f'process_category_directory: {category_directory}')
files, directories = process_directory(category_directory)
for file in files:
if file.name.endswith('.json'):
workflow_spec = process_workflow_spec(file, category_directory)
for workflow_spec_directory in directories:
directory_path = os.path.join(category_directory, workflow_spec_directory)
process_workflow_spec_directory(directory_path)
def process_root_directory(root_directory):
files, directories = process_directory(root_directory)
for file in files:
if file.name.endswith('.json'):
category_model = process_category(file, root_directory)
WorkflowService.cleanup_workflow_spec_category_display_order()
for directory in directories:
directory_path = os.path.join(root_directory, directory)
process_category_directory(directory_path)
def update_file_metadata_from_filesystem(root_directory):
process_root_directory(root_directory)
class TestFilesFromFilesystem(BaseTest):
def test_files_from_filesystem(self):
self.load_example_data()
SYNC_FILE_ROOT = os.path.join(app.root_path, '..', 'files')
update_file_metadata_from_filesystem(SYNC_FILE_ROOT)
print(f'test_files_from_filesystem')

View File

@ -0,0 +1,34 @@
from tests.base_test import BaseTest
from crc import app, session
from crc.models.file import FileModel, FileDataModel
from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel
from crc.services.file_service import FileService
import os
class TestFilesToFilesystem(BaseTest):
def test_files_to_filesystem(self):
# # category = filename = ''
# # data = 'asdf'
# self.load_example_data()
#
# file_model = session.query(FileModel).first()
# # filename = file_model.name
# file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model.id).first()
# if file_model.workflow_spec_id is None:
# file_model.workflow_spec_id = 'random_fact'
# workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == file_model.workflow_spec_id).first()
# if workflow_spec_model.category_id is None:
# workflow_spec_model.category_id = 0
# category_model = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id == workflow_spec_model.category_id).first()
# file_path = os.path.join(app.root_path, '..', 'files', category_model.display_name, file_model.name)
# os.makedirs(os.path.dirname(file_path), exist_ok=True)
#
# with open(file_path, 'wb') as f_handle:
# f_handle.write(file_data_model.data)
print('test_files_to_filesystem')