Make a change to make sure that if there is a new file locally that is not present remotely when we pull from the remote, the new local file gets deleted.

Also: add several things to the requirements.txt that should have been there in the first place.
This commit is contained in:
Kelly McDonald 2020-12-08 13:42:01 -05:00
parent 44c72115ae
commit 0e1aa59fa1
4 changed files with 95 additions and 4 deletions

View File

@ -152,6 +152,38 @@ paths:
items:
$ref: "#/components/schemas/Study"
/workflow_spec/{workflow_spec_id}/files/sync:
get:
operationId: crc.api.workflow.sync_changed_files
summary: Provides a list of files that were updated
security: [] # Disable security for this endpoint only - we'll sanity check
# in the endpoint
parameters:
- name: workflow_spec_id
in: path
required: true
description: The workflow_spec id
schema:
type: string
- name: remote
in: query
required: true
description: The remote endpoint
schema:
type: string
tags:
- Workflow Spec States
responses:
'200':
description: An array of workflow specs, with last touched date and file signature.
content:
application/json:
schema:
type: array
items:
$ref: "#/components/schemas/Study"
/workflow_spec/{workflow_spec_id}/files/diff:
get:
operationId: crc.api.workflow.get_changed_files
@ -334,6 +366,7 @@ paths:
get:
operationId: crc.api.workflow.get_workflow_specification
summary: Returns a single workflow specification
security: []
tags:
- Workflow Specifications
responses:
@ -578,7 +611,7 @@ paths:
responses:
'204':
description: The file has been removed.
/file/{md5_hash}/data:
/file/{md5_hash}/hash_data:
parameters:
- name: md5_hash
in: path

View File

@ -1,6 +1,7 @@
import hashlib
import json
import uuid
from io import StringIO
from hashlib import md5
import pandas as pd
@ -319,8 +320,56 @@ def get_changed_workflows(remote):
# return the list as a dict, let swagger convert it to json
return output.reset_index().to_dict(orient='records')
def sync_all_changed_files(remote):
pass
def get_changed_files(remote,workflow_spec_id):
def sync_changed_files(remote,workflow_spec_id):
# make sure that spec is local before syncing files
remotespectext = requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id)
specdict = json.loads(remotespectext.text)
localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
if localspec is None:
localspec = WorkflowSpecModel()
localspec.id = workflow_spec_id
if specdict['category'] == None:
localspec.category = None
else:
localspec.category = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id
== specdict['category']['id']).first()
localspec.display_order = specdict['display_order']
localspec.display_name = specdict['display_name']
localspec.name = specdict['name']
localspec.description = specdict['description']
session.add(localspec)
changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))]
deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))]
for delfile in deletefiles.reset_index().to_dict(orient='records'):
currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
FileModel.name == delfile['filename']).first()
FileService.delete_file(currentfile.id)
for updatefile in updatefiles.reset_index().to_dict(orient='records'):
currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
FileModel.name == updatefile['filename']).first()
if not currentfile:
currentfile = FileModel()
currentfile.name = updatefile['filename']
currentfile.workflow_spec_id = workflow_spec_id
currentfile.date_created = updatefile['date_created']
currentfile.type = updatefile['type']
currentfile.primary = updatefile['primary']
currentfile.content_type = updatefile['content_type']
currentfile.primary_process_id = updatefile['primary_process_id']
session.add(currentfile)
response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data')
FileService.update_file(currentfile,response.content,updatefile['type'])
session.commit()
def get_changed_files(remote,workflow_spec_id,as_df=False):
"""
gets a remote endpoint - gets the files for a workflow_spec on both
local and remote and determines what files have been change and returns a list of those
@ -369,7 +418,10 @@ def get_changed_files(remote,workflow_spec_id):
changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
changedfiles = changedfiles.replace({pd.np.nan: None})
# return the list as a dict, let swagger convert it to json
return changedfiles.reset_index().to_dict(orient='records')
if as_df:
return changedfiles
else:
return changedfiles.reset_index().to_dict(orient='records')

View File

@ -69,7 +69,7 @@ class WorkflowStatus(enum.Enum):
class WorkflowSpecDependencyFile(db.Model):
"""Connects a workflow to the version of the specification files it depends on to execute"""
"""Connects to a workflow to test the version of the specification files it depends on to execute"""
file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True)
workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True)

View File

@ -22,8 +22,10 @@ docutils==0.16
docxtpl==0.9.2
et-xmlfile==1.0.1
flask==1.1.2
flask-admin==1.5.7
flask-bcrypt==0.7.1
flask-cors==3.0.8
flask-mail==0.9.1
flask-marshmallow==0.12.0
flask-migrate==2.5.3
flask-restful==0.3.8
@ -55,17 +57,21 @@ pandas==1.0.3
psycopg2-binary==2.8.5
pyasn1==0.4.8
pycparser==2.20
PyGithub==1.53
pygments==2.6.1
pyjwt==1.7.1
pyparsing==2.4.7
pyrsistent==0.16.0
python-box==5.2.0
python-dateutil==2.8.1
python-docx==0.8.10
python-editor==1.0.4
python-Levenshtein==0.12.0
pytz==2020.1
pyyaml==5.3.1
recommonmark==0.6.0
requests==2.23.0
sentry-sdk==0.14.4
six==1.14.0
snowballstemmer==2.0.0
soupsieve==2.0.1