Allow for synch to work even if the local set of workflow specifications are completely empty.

This commit is contained in:
Dan 2021-01-08 13:23:01 -05:00
parent b0dc834682
commit 72a73c1fc4
2 changed files with 22 additions and 2 deletions

View File

@ -99,6 +99,13 @@ def load_example_rrt_data():
ExampleDataLoader.clean_db()
ExampleDataLoader().load_rrt()
@app.cli.command()
def load_reference_files():
"""Load example data into the database."""
from example_data import ExampleDataLoader
ExampleDataLoader().load_reference_documents()
@app.cli.command()
def clear_db():
"""Load example data into the database."""

View File

@ -37,6 +37,14 @@ def get_changed_workflows(remote,as_df=False):
# get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
local = get_all_spec_state_dataframe().reset_index()
if local.empty:
# return the list as a dict, let swagger convert it to json
remote_workflows['new'] = True
if as_df:
return remote_workflows
else:
return remote_workflows.reset_index().to_dict(orient='records')
# merge these on workflow spec id and hash - this will
# make two different date columns date_x and date_y
different = remote_workflows.merge(local,
@ -44,8 +52,7 @@ def get_changed_workflows(remote,as_df=False):
left_on=['workflow_spec_id','md5_hash'],
how = 'outer' ,
indicator=True).loc[lambda x : x['_merge']!='both']
if len(different)==0:
return []
# each line has a tag on it - if was in the left or the right,
# label it so we know if that was on the remote or local machine
different.loc[different['_merge']=='left_only','location'] = 'remote'
@ -74,6 +81,8 @@ def get_changed_workflows(remote,as_df=False):
changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
# return the list as a dict, let swagger convert it to json
if as_df:
return output
@ -295,6 +304,10 @@ def get_all_spec_state_dataframe():
'date_created':file.date_created})
df = pd.DataFrame(filelist)
# If the file list is empty, return an empty data frame
if df.empty:
return df
# get a distinct list of file_model_id's with the most recent file_data retained
df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy()