From 92aa1b971dfa0c7a05dd19149a01d33c9923ed0b Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Wed, 2 Dec 2020 16:04:00 -0500
Subject: [PATCH 01/22] commit before removing big long SQL

---
 crc/api.yml         | 17 ++++++++++++
 crc/api/workflow.py | 67 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index ad22677e..44138510 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -100,6 +100,23 @@ paths:
                 type: array
                 items:
                   $ref: "#/components/schemas/Study"
+  /workflow_spec/all:
+    get:
+      operationId: crc.api.workflow.get_all_spec_state
+      summary: Provides a list of workflow specs and their signature
+      tags:
+        - Workflow Spec States
+      responses:
+        '200':
+          description: An array of workflow specs, with last touched date and file signature.
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/Study"
+
+
   /study/all:
     get:
       operationId: crc.api.study.all_studies
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index a148328a..37a5059a 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -1,11 +1,15 @@
+import hashlib
+import json
 import uuid
+from hashlib import md5
 
+import pandas as pd
 from SpiffWorkflow.util.deep_merge import DeepMerge
 from flask import g
-from crc import session, app
+from crc import session, db
 from crc.api.common import ApiError, ApiErrorSchema
 from crc.models.api_models import WorkflowApi, WorkflowApiSchema, NavigationItem, NavigationItemSchema
-from crc.models.file import FileModel, LookupDataSchema
+from crc.models.file import FileModel, LookupDataSchema, FileDataModel
 from crc.models.study import StudyModel, WorkflowMetadata
 from crc.models.task_event import TaskEventModel, TaskEventModelSchema, TaskEvent, TaskEventSchema
 from crc.models.workflow import WorkflowModel, WorkflowSpecModelSchema, WorkflowSpecModel, WorkflowSpecCategoryModel, \
@@ -257,3 +261,62 @@ def _verify_user_and_role(processor, spiff_task):
         raise ApiError.from_task("permission_denied",
                                  f"This task must be completed by '{allowed_users}', "
                                  f"but you are {user.uid}", spiff_task)
+def join_uuids(uuids):
+    """Joins a pandas Series of uuids and combines them in one hash"""
+    combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always
+                                                                          # in the same order
+    return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
+
+def get_all_spec_state():
+#     big_nasty_sql = """
+#
+#     with first as (
+# select file_model_id,version,max(version) over (partition by file_model_id) maxversion,f
+#     .name,
+#        workflow_spec_id,date_created,md5_hash
+# from file f join file_data fd on fd.
+#     file_model_id = f.id
+# where workflow_spec_id is not null
+# order by workflow_spec_id,file_model_id,version desc),
+# second as (select name,workflow_spec_id,date_created,md5_hash from first
+# where maxversion = version)
+# select row_to_json(t) workflow_spec_state
+# from (
+#      select workflow_spec_id,max(date_created) last_update,md5(max(
+#        (
+#          select array_to_json(array_agg(row_to_json(d)))
+#          from (select name,md5_hash from second where second.workflow_spec_id = second2.workflow_spec_id) d
+#
+#                 )::text)) as files_hash
+#     from second as second2
+#     group by 1
+#          ) t
+#     """
+#     json_results = []
+#     result = db.engine.execute(big_nasty_sql)
+    x = session.query(FileDataModel).join(FileModel)
+
+    # there might be a cleaner way of getting a data frome from some of the
+    # fields in the ORM - but this works OK
+    filelist = []
+    for file in x:
+        filelist.append({'file_model_id':file.file_model_id,
+                         'workflow_spec_id': file.file_model.workflow_spec_id,
+                         'md5_hash':file.md5_hash,
+                         'filename':file.file_model.name,
+                         'date_created':file.date_created})
+    df = pd.DataFrame(filelist)
+
+    # get a distinct list of file_model_id's with the most recent file_data retained
+    df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy()
+
+    # take that list and then group by workflow_spec and retain the most recently touched file
+    # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each
+    # workflow spec
+    df = df.groupby('workflow_spec_id').agg({'date_created':'max',
+                                             'md5_hash':join_uuids}).copy()
+
+    df = df.reset_index()[['workflow_spec_id','date_created','md5_hash']].copy()
+
+    return df.to_json(orient='records')
+

From 0e8913434abe61eddc21e92f4485bd92b4e904b0 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Thu, 3 Dec 2020 08:44:15 -0500
Subject: [PATCH 02/22] refactor a bit

---
 crc/api/workflow.py | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 37a5059a..7f517e22 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -267,33 +267,16 @@ def join_uuids(uuids):
                                                                           # in the same order
     return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
 
+@cross_origin() # allow all cross origin requests so we can hit it with a dev box
 def get_all_spec_state():
-#     big_nasty_sql = """
-#
-#     with first as (
-# select file_model_id,version,max(version) over (partition by file_model_id) maxversion,f
-#     .name,
-#        workflow_spec_id,date_created,md5_hash
-# from file f join file_data fd on fd.
-#     file_model_id = f.id
-# where workflow_spec_id is not null
-# order by workflow_spec_id,file_model_id,version desc),
-# second as (select name,workflow_spec_id,date_created,md5_hash from first
-# where maxversion = version)
-# select row_to_json(t) workflow_spec_state
-# from (
-#      select workflow_spec_id,max(date_created) last_update,md5(max(
-#        (
-#          select array_to_json(array_agg(row_to_json(d)))
-#          from (select name,md5_hash from second where second.workflow_spec_id = second2.workflow_spec_id) d
-#
-#                 )::text)) as files_hash
-#     from second as second2
-#     group by 1
-#          ) t
-#     """
-#     json_results = []
-#     result = db.engine.execute(big_nasty_sql)
+    df = get_all_spec_state_dataframe()
+    return df.reset_index().to_json(orient='records')
+
+def get_all_spec_state_dataframe():
+    """
+    Return a list of all workflow specs along with last updated date and a
+    thumbprint of all of the files that are used for that workflow_spec
+    """
     x = session.query(FileDataModel).join(FileModel)
 
     # there might be a cleaner way of getting a data frome from some of the
@@ -315,8 +298,9 @@ def get_all_spec_state():
     # workflow spec
     df = df.groupby('workflow_spec_id').agg({'date_created':'max',
                                              'md5_hash':join_uuids}).copy()
+    df = df[['date_created','md5_hash']].copy()
+    df['date_created'] = df['date_created'].astype('str')
 
-    df = df.reset_index()[['workflow_spec_id','date_created','md5_hash']].copy()
 
-    return df.to_json(orient='records')
+    return df
 

From bcb45a59c8e42a65dcbd93d3c861273626d854d1 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Thu, 3 Dec 2020 08:46:34 -0500
Subject: [PATCH 03/22] allow cors

---
 crc/api/workflow.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 7f517e22..a5f2d3b1 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -20,7 +20,7 @@ from crc.services.study_service import StudyService
 from crc.services.user_service import UserService
 from crc.services.workflow_processor import WorkflowProcessor
 from crc.services.workflow_service import WorkflowService
-
+from flask_cors import cross_origin
 
 def all_specifications():
     schema = WorkflowSpecModelSchema(many=True)
@@ -267,7 +267,7 @@ def join_uuids(uuids):
                                                                           # in the same order
     return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
 
-@cross_origin() # allow all cross origin requests so we can hit it with a dev box
+@cross_origin() # allow even dev boxes to hit this without restrictions
 def get_all_spec_state():
     df = get_all_spec_state_dataframe()
     return df.reset_index().to_json(orient='records')

From 0f59d3de096f2f938e601d6ef75ffc62611f5e3c Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Thu, 3 Dec 2020 14:45:57 -0500
Subject: [PATCH 04/22] add endpoint that gets a record for all changed /new
 workflow_specs at a remote endpoint

---
 crc/api.yml         | 28 ++++++++++++++++++++++++++++
 crc/api/workflow.py | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index 44138510..959d6061 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -100,10 +100,38 @@ paths:
                 type: array
                 items:
                   $ref: "#/components/schemas/Study"
+  /workflow_spec/diff:
+    get:
+      operationId: crc.api.workflow.get_changed_workflows
+      summary: Provides a list of workflow specs and their signature
+      security: []  # Disable security for this endpoint only - we'll sanity check
+      # in the endpoint
+      parameters:
+        - name: remote
+          in: query
+          required: true
+          description: The remote endpoint
+          schema:
+            type: string
+      tags:
+        - Workflow Spec States
+      responses:
+        '200':
+          description: An array of workflow specs, with last touched date and file signature.
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/Study"
+
+
   /workflow_spec/all:
     get:
       operationId: crc.api.workflow.get_all_spec_state
       summary: Provides a list of workflow specs and their signature
+      security: []  # Disable security for this endpoint only - we'll sanity check
+                    # in the endpoint
       tags:
         - Workflow Spec States
       responses:
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index a5f2d3b1..d899d4fb 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -21,6 +21,7 @@ from crc.services.user_service import UserService
 from crc.services.workflow_processor import WorkflowProcessor
 from crc.services.workflow_service import WorkflowService
 from flask_cors import cross_origin
+import requests
 
 def all_specifications():
     schema = WorkflowSpecModelSchema(many=True)
@@ -267,10 +268,44 @@ def join_uuids(uuids):
                                                                           # in the same order
     return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
 
-@cross_origin() # allow even dev boxes to hit this without restrictions
+def get_changed_workflows(remote):
+    """
+    gets a remote endpoint - gets the workflows and then
+    determines what workflows are different from the remote endpoint
+    """
+    x = requests.get('http://'+remote+'/v1.0/workflow_spec/all')
+
+    # This is probably very and may allow cross site attacks - fix later
+    remote = pd.DataFrame(eval(x.text))
+    local = get_all_spec_state_dataframe().reset_index()
+    different  = remote.merge(local, right_on=['workflow_spec_id','md5_hash'], left_on=['workflow_spec_id',
+                                                                                        'md5_hash'], how = 'outer' , indicator=True).loc[
+        lambda x : x['_merge']!='both']
+
+    different.loc[different['_merge']=='left_only','location'] = 'remote'
+    different.loc[different['_merge']=='right_only','location'] = 'local'
+    #changedfiles = different.copy()
+    index = different['date_created_x'].isnull()
+    different.loc[index,'date_created_x'] = different[index]['date_created_y']
+    different = different[['workflow_spec_id','date_created_x','location']].copy()
+    different.columns=['workflow_spec_id','date_created','location']
+    changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
+    remote_spec_ids = remote[['workflow_spec_id']]
+    local_spec_ids = local[['workflow_spec_id']]
+
+    left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
+    right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
+    changedfiles['new'] = False
+    changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
+    output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
+    return output.reset_index().to_dict(orient='records')
+
+
+
+
 def get_all_spec_state():
     df = get_all_spec_state_dataframe()
-    return df.reset_index().to_json(orient='records')
+    return df.reset_index().to_dict(orient='records')
 
 def get_all_spec_state_dataframe():
     """

From 10dce542ec612027ba2d2a8d995f4c48410708d0 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Thu, 3 Dec 2020 15:27:45 -0500
Subject: [PATCH 05/22] documentation change

---
 crc/api/workflow.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index d899d4fb..a65f6dda 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -304,6 +304,11 @@ def get_changed_workflows(remote):
 
 
 def get_all_spec_state():
+    """
+    Return a list of all workflow specs along with last updated date and a
+    thumbprint of all of the files that are used for that workflow_spec
+    Convert into a dict list from a dataframe
+    """
     df = get_all_spec_state_dataframe()
     return df.reset_index().to_dict(orient='records')
 
@@ -311,6 +316,7 @@ def get_all_spec_state_dataframe():
     """
     Return a list of all workflow specs along with last updated date and a
     thumbprint of all of the files that are used for that workflow_spec
+    Return a dataframe
     """
     x = session.query(FileDataModel).join(FileModel)
 

From 3e8d4ca7c942af25f5a3f8f0608417068462de5b Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 4 Dec 2020 10:23:03 -0500
Subject: [PATCH 06/22] Add some inline documentation to make the process more
 clear

---
 crc/api/workflow.py | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index a65f6dda..726aa78e 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -273,31 +273,50 @@ def get_changed_workflows(remote):
     gets a remote endpoint - gets the workflows and then
     determines what workflows are different from the remote endpoint
     """
-    x = requests.get('http://'+remote+'/v1.0/workflow_spec/all')
+    response = requests.get('http://'+remote+'/v1.0/workflow_spec/all')
 
     # This is probably very and may allow cross site attacks - fix later
-    remote = pd.DataFrame(eval(x.text))
+    remote = pd.DataFrame(json.loads(response.text))
+    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
     local = get_all_spec_state_dataframe().reset_index()
-    different  = remote.merge(local, right_on=['workflow_spec_id','md5_hash'], left_on=['workflow_spec_id',
-                                                                                        'md5_hash'], how = 'outer' , indicator=True).loc[
-        lambda x : x['_merge']!='both']
 
+    # merge these on workflow spec id and hash - this will
+    # make two different date columns date_x and date_y
+    different  = remote.merge(local,
+                              right_on=['workflow_spec_id','md5_hash'],
+                              left_on=['workflow_spec_id','md5_hash'],
+                              how = 'outer' ,
+                              indicator=True).loc[lambda x : x['_merge']!='both']
+
+    # each line has a tag on it - if was in the left or the right,
+    # label it so we know if that was on the remote or local machine
     different.loc[different['_merge']=='left_only','location'] = 'remote'
     different.loc[different['_merge']=='right_only','location'] = 'local'
-    #changedfiles = different.copy()
+
+    # this takes the different date_created_x and date-created_y columns and
+    # combines them back into one date_created column
     index = different['date_created_x'].isnull()
     different.loc[index,'date_created_x'] = different[index]['date_created_y']
     different = different[['workflow_spec_id','date_created_x','location']].copy()
     different.columns=['workflow_spec_id','date_created','location']
+
+    # our different list will have multiple entries for a workflow if there is a version on either side
+    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
     changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
+
+    # get an exclusive or list of workflow ids - that is we want lists of files that are
+    # on one machine or the other, but not both
     remote_spec_ids = remote[['workflow_spec_id']]
     local_spec_ids = local[['workflow_spec_id']]
-
     left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
     right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
+
+    # flag files as new that are only on the remote box and remove the files that are only on the local box
     changedfiles['new'] = False
     changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
     output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
+
+    # return the list as a dict, let swagger convert it to json
     return output.reset_index().to_dict(orient='records')
 
 
@@ -339,9 +358,9 @@ def get_all_spec_state_dataframe():
     # workflow spec
     df = df.groupby('workflow_spec_id').agg({'date_created':'max',
                                              'md5_hash':join_uuids}).copy()
+    # get only the columns we are really interested in returning
     df = df[['date_created','md5_hash']].copy()
+    # convert dates to string
     df['date_created'] = df['date_created'].astype('str')
-
-
     return df
 

From d41d018fe34fa91569c5c64225c69622514e0201 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 4 Dec 2020 11:49:07 -0500
Subject: [PATCH 07/22] For a given workflow - find the files that are
 different from a remote endpoint for the same workflow

---
 crc/api.yml         | 61 +++++++++++++++++++++++++++++++++-
 crc/api/workflow.py | 81 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index 959d6061..afeb2bc2 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -126,10 +126,69 @@ paths:
                   $ref: "#/components/schemas/Study"
 
 
+  /workflow_spec/{workflow_spec_id}/files:
+    get:
+      operationId: crc.api.workflow.get_workflow_spec_files
+      summary: Provides a list of workflow specs and their signature
+      security: []  # Disable security for this endpoint only - we'll sanity check
+                    # in the endpoint
+      parameters:
+        - name: workflow_spec_id
+          in: path
+          required: true
+          description: The workflow_spec id
+          schema:
+            type: string
+
+      tags:
+        - Workflow Spec States
+      responses:
+        '200':
+          description: An array of workflow specs, with last touched date and file signature.
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/Study"
+
+  /workflow_spec/{workflow_spec_id}/files/diff:
+    get:
+      operationId: crc.api.workflow.get_changed_files
+      summary: Provides a list of workflow specs and their signature
+      security: []  # Disable security for this endpoint only - we'll sanity check
+                    # in the endpoint
+      parameters:
+        - name: workflow_spec_id
+          in: path
+          required: true
+          description: The workflow_spec id
+          schema:
+            type: string
+        - name: remote
+          in: query
+          required: true
+          description: The remote endpoint
+          schema:
+            type: string
+
+      tags:
+        - Workflow Spec States
+      responses:
+        '200':
+          description: An array of workflow specs, with last touched date and file signature.
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/Study"
+
+
   /workflow_spec/all:
     get:
       operationId: crc.api.workflow.get_all_spec_state
-      summary: Provides a list of workflow specs and their signature
+      summary: Provides a list of files for a workflow spec
       security: []  # Disable security for this endpoint only - we'll sanity check
                     # in the endpoint
       tags:
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 726aa78e..9484d183 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -320,6 +320,53 @@ def get_changed_workflows(remote):
     return output.reset_index().to_dict(orient='records')
 
 
+def get_changed_files(remote,workflow_spec_id):
+    """
+    gets a remote endpoint - gets the files for a workflow_spec on both
+    local and remote and determines what files have been change and returns a list of those
+    files
+    """
+    response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files')
+    # This is probably very and may allow cross site attacks - fix later
+    remote = pd.DataFrame(json.loads(response.text))
+    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
+    local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
+    local['md5_hash'] = local['md5_hash'].astype('str')
+    different = remote.merge(local,
+                             right_on=['filename','md5_hash'],
+                             left_on=['filename','md5_hash'],
+                             how = 'outer' ,
+                             indicator=True).loc[lambda x : x['_merge']!='both']
+
+    # each line has a tag on it - if was in the left or the right,
+    # label it so we know if that was on the remote or local machine
+    different.loc[different['_merge']=='left_only','location'] = 'remote'
+    different.loc[different['_merge']=='right_only','location'] = 'local'
+
+    # this takes the different date_created_x and date-created_y columns and
+    # combines them back into one date_created column
+    index = different['date_created_x'].isnull()
+    different.loc[index,'date_created_x'] = different[index]['date_created_y']
+    different = different[['date_created_x','filename','location']].copy()
+
+    different.columns=['date_created','filename','location']
+    # our different list will have multiple entries for a workflow if there is a version on either side
+    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
+    changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
+
+    # get an exclusive or list of workflow ids - that is we want lists of files that are
+    # on one machine or the other, but not both
+    remote_spec_ids = remote[['filename']]
+    local_spec_ids = local[['filename']]
+    left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])]
+    right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])]
+    changedfiles['new'] = False
+    changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
+    changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
+
+    # return the list as a dict, let swagger convert it to json
+    return changedfiles.reset_index().to_dict(orient='records')
+
 
 
 def get_all_spec_state():
@@ -331,6 +378,39 @@ def get_all_spec_state():
     df = get_all_spec_state_dataframe()
     return df.reset_index().to_dict(orient='records')
 
+
+def get_workflow_spec_files(workflow_spec_id):
+    """
+    Return a list of all workflow specs along with last updated date and a
+    thumbprint of all of the files that are used for that workflow_spec
+    Convert into a dict list from a dataframe
+    """
+    df = get_workflow_spec_files_dataframe(workflow_spec_id)
+    return df.reset_index().to_dict(orient='records')
+
+
+def get_workflow_spec_files_dataframe(workflowid):
+    """
+    Return a list of all files for a workflow_spec along with last updated date and a
+    hash so we can determine file differences for a changed workflow on a box.
+    Return a dataframe
+    """
+    x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid)
+    # there might be a cleaner way of getting a data frome from some of the
+    # fields in the ORM - but this works OK
+    filelist = []
+    for file in x:
+        filelist.append({'file_model_id':file.file_model_id,
+                         'workflow_spec_id': file.file_model.workflow_spec_id,
+                         'md5_hash':file.md5_hash,
+                         'filename':file.file_model.name,
+                         'date_created':file.date_created})
+    df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
+    df['date_created'] = df['date_created'].astype('str')
+    return df
+
+
+
 def get_all_spec_state_dataframe():
     """
     Return a list of all workflow specs along with last updated date and a
@@ -338,7 +418,6 @@ def get_all_spec_state_dataframe():
     Return a dataframe
     """
     x = session.query(FileDataModel).join(FileModel)
-
     # there might be a cleaner way of getting a data frome from some of the
     # fields in the ORM - but this works OK
     filelist = []

From cad613cf6395c5fd3fbfb03e5f6f14b205fe0294 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 4 Dec 2020 12:00:02 -0500
Subject: [PATCH 08/22] Fix problem when method is run for a workflow that is
 non-existant locally

---
 crc/api/workflow.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 9484d183..983097a6 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -405,6 +405,8 @@ def get_workflow_spec_files_dataframe(workflowid):
                          'md5_hash':file.md5_hash,
                          'filename':file.file_model.name,
                          'date_created':file.date_created})
+    if len(filelist) == 0:
+        return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
     df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
     df['date_created'] = df['date_created'].astype('str')
     return df

From f26a8615a431dbd4a31e448cd29c3b4048a8a4af Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Mon, 7 Dec 2020 08:49:38 -0500
Subject: [PATCH 09/22] Get more file details so we can fill out everything
 locally and also add a method to download the file by md5_hash

---
 crc/api.yml         | 23 +++++++++++++++++++++++
 crc/api/file.py     |  5 ++++-
 crc/api/workflow.py | 28 ++++++++++++++++++++++------
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index afeb2bc2..b35ad793 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -578,6 +578,29 @@ paths:
       responses:
         '204':
           description: The file has been removed.
+  /file/{md5_hash}/data:
+    parameters:
+      - name: md5_hash
+        in: path
+        required: true
+        description: The md5 hash of the file requested
+        schema:
+          type: string
+    get:
+      operationId: crc.api.file.get_file_data_by_hash
+      summary: Returns only the file contents
+      security: []  # Disable security for this endpoint only.
+      tags:
+        - Files
+      responses:
+        '200':
+          description: Returns the actual file
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
   /file/{file_id}/data:
     parameters:
       - name: file_id
diff --git a/crc/api/file.py b/crc/api/file.py
index 5cf54221..4f0b655f 100644
--- a/crc/api/file.py
+++ b/crc/api/file.py
@@ -6,7 +6,7 @@ from flask import send_file
 
 from crc import session
 from crc.api.common import ApiError
-from crc.models.file import FileSchema, FileModel, File, FileModelSchema
+from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel
 from crc.models.workflow import WorkflowSpecModel
 from crc.services.file_service import FileService
 
@@ -99,6 +99,9 @@ def update_file_data(file_id):
     file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
     return FileSchema().dump(to_file_api(file_model))
 
+def get_file_data_by_hash(md5_hash):
+    filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
+    return get_file_data(filedatamodel.file_model_id)
 
 def get_file_data(file_id, version=None):
     file_data = FileService.get_file_data(file_id, version)
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 983097a6..f9c6ac58 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id):
 
     # this takes the different date_created_x and date-created_y columns and
     # combines them back into one date_created column
-    index = different['date_created_x'].isnull()
-    different.loc[index,'date_created_x'] = different[index]['date_created_y']
-    different = different[['date_created_x','filename','location']].copy()
+    dualfields = ['date_created','type','primary','content_type','primary_process_id']
+    for merge in dualfields:
+        index = different[merge+'_x'].isnull()
+        different.loc[index,merge+'_x'] = different[index][merge+'_y']
 
-    different.columns=['date_created','filename','location']
+    fieldlist = [fld+'_x' for fld in dualfields]
+    different = different[ fieldlist + ['md5_hash','filename','location']].copy()
+
+    different.columns=dualfields+['md5_hash','filename','location']
     # our different list will have multiple entries for a workflow if there is a version on either side
     # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
     changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
@@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id):
     changedfiles['new'] = False
     changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
     changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
-
+    changedfiles = changedfiles.replace({pd.np.nan: None})
     # return the list as a dict, let swagger convert it to json
     return changedfiles.reset_index().to_dict(orient='records')
 
@@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid):
                          'workflow_spec_id': file.file_model.workflow_spec_id,
                          'md5_hash':file.md5_hash,
                          'filename':file.file_model.name,
+                         'type':file.file_model.type.name,
+                         'primary':file.file_model.primary,
+                         'content_type':file.file_model.content_type,
+                         'primary_process_id':file.file_model.primary_process_id,
                          'date_created':file.date_created})
     if len(filelist) == 0:
-        return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
+        return pd.DataFrame(columns=['file_model_id',
+                                     'workflow_spec_id',
+                                     'md5_hash',
+                                     'filename',
+                                     'type',
+                                     'primary',
+                                     'content_type',
+                                     'primary_process_id',
+                                     'date_created'])
     df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
     df['date_created'] = df['date_created'].astype('str')
     return df

From 44c72115ae1cb2ff1134202485f112c55a349267 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Mon, 7 Dec 2020 08:50:20 -0500
Subject: [PATCH 10/22] Make sure we get the file we intended

---
 crc/api/file.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crc/api/file.py b/crc/api/file.py
index 4f0b655f..861f9f04 100644
--- a/crc/api/file.py
+++ b/crc/api/file.py
@@ -101,7 +101,7 @@ def update_file_data(file_id):
 
 def get_file_data_by_hash(md5_hash):
     filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
-    return get_file_data(filedatamodel.file_model_id)
+    return get_file_data(filedatamodel.file_model_id,version=filedatamodel.version)
 
 def get_file_data(file_id, version=None):
     file_data = FileService.get_file_data(file_id, version)

From 0e1aa59fa18373e87aeb15a09c031499e5124caa Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Tue, 8 Dec 2020 13:42:01 -0500
Subject: [PATCH 11/22] Make a change to make sure that if there is a new file
 locally that is not present remotely when we pull from the remote, the new
 local file gets deleted.

Also: add several things to the requirements.txt that should have been there in the first place.
---
 crc/api.yml             | 35 +++++++++++++++++++++++++-
 crc/api/workflow.py     | 56 +++++++++++++++++++++++++++++++++++++++--
 crc/models/workflow.py  |  2 +-
 deploy/requirements.txt |  6 +++++
 4 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index b35ad793..3aa1f5d8 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -152,6 +152,38 @@ paths:
                 items:
                   $ref: "#/components/schemas/Study"
 
+  /workflow_spec/{workflow_spec_id}/files/sync:
+    get:
+      operationId: crc.api.workflow.sync_changed_files
+      summary: Provides a list of files that were updated
+      security: []  # Disable security for this endpoint only - we'll sanity check
+                    # in the endpoint
+      parameters:
+        - name: workflow_spec_id
+          in: path
+          required: true
+          description: The workflow_spec id
+          schema:
+            type: string
+        - name: remote
+          in: query
+          required: true
+          description: The remote endpoint
+          schema:
+            type: string
+
+      tags:
+        - Workflow Spec States
+      responses:
+        '200':
+          description: An array of workflow specs, with last touched date and file signature.
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/Study"
+
   /workflow_spec/{workflow_spec_id}/files/diff:
     get:
       operationId: crc.api.workflow.get_changed_files
@@ -334,6 +366,7 @@ paths:
     get:
       operationId: crc.api.workflow.get_workflow_specification
       summary: Returns a single workflow specification
+      security: []
       tags:
         - Workflow Specifications
       responses:
@@ -578,7 +611,7 @@ paths:
       responses:
         '204':
           description: The file has been removed.
-  /file/{md5_hash}/data:
+  /file/{md5_hash}/hash_data:
     parameters:
       - name: md5_hash
         in: path
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index f9c6ac58..fb0c1e4b 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -1,6 +1,7 @@
 import hashlib
 import json
 import uuid
+from io import StringIO
 from hashlib import md5
 
 import pandas as pd
@@ -319,8 +320,56 @@ def get_changed_workflows(remote):
     # return the list as a dict, let swagger convert it to json
     return output.reset_index().to_dict(orient='records')
 
+def sync_all_changed_files(remote):
+    pass
 
-def get_changed_files(remote,workflow_spec_id):
+def sync_changed_files(remote,workflow_spec_id):
+    # make sure that spec is local before syncing files
+    remotespectext  =  requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id)
+    specdict = json.loads(remotespectext.text)
+    localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
+    if localspec is None:
+        localspec = WorkflowSpecModel()
+        localspec.id = workflow_spec_id
+    if specdict['category'] == None:
+        localspec.category = None
+    else:
+        localspec.category = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id
+                                                                         == specdict['category']['id']).first()
+    localspec.display_order = specdict['display_order']
+    localspec.display_name = specdict['display_name']
+    localspec.name = specdict['name']
+    localspec.description = specdict['description']
+    session.add(localspec)
+
+    changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
+    updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))]
+    deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))]
+    for delfile in deletefiles.reset_index().to_dict(orient='records'):
+        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
+                                            FileModel.name == delfile['filename']).first()
+        FileService.delete_file(currentfile.id)
+    for updatefile in updatefiles.reset_index().to_dict(orient='records'):
+        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
+                                            FileModel.name == updatefile['filename']).first()
+        if not currentfile:
+            currentfile = FileModel()
+            currentfile.name = updatefile['filename']
+            currentfile.workflow_spec_id = workflow_spec_id
+
+        currentfile.date_created = updatefile['date_created']
+        currentfile.type = updatefile['type']
+        currentfile.primary = updatefile['primary']
+        currentfile.content_type = updatefile['content_type']
+        currentfile.primary_process_id = updatefile['primary_process_id']
+        session.add(currentfile)
+
+        response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data')
+        FileService.update_file(currentfile,response.content,updatefile['type'])
+    session.commit()
+
+
+def get_changed_files(remote,workflow_spec_id,as_df=False):
     """
     gets a remote endpoint - gets the files for a workflow_spec on both
     local and remote and determines what files have been change and returns a list of those
@@ -369,7 +418,10 @@ def get_changed_files(remote,workflow_spec_id):
     changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
     changedfiles = changedfiles.replace({pd.np.nan: None})
     # return the list as a dict, let swagger convert it to json
-    return changedfiles.reset_index().to_dict(orient='records')
+    if as_df:
+        return changedfiles
+    else:
+        return changedfiles.reset_index().to_dict(orient='records')
 
 
 
diff --git a/crc/models/workflow.py b/crc/models/workflow.py
index 718dfccf..0da32aec 100644
--- a/crc/models/workflow.py
+++ b/crc/models/workflow.py
@@ -69,7 +69,7 @@ class WorkflowStatus(enum.Enum):
 
 
 class WorkflowSpecDependencyFile(db.Model):
-    """Connects a workflow to the version of the specification files it depends on to execute"""
+    """Connects to a workflow to test the version of the specification files it depends on to execute"""
     file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True)
     workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True)
 
diff --git a/deploy/requirements.txt b/deploy/requirements.txt
index 420a888f..af827f27 100644
--- a/deploy/requirements.txt
+++ b/deploy/requirements.txt
@@ -22,8 +22,10 @@ docutils==0.16
 docxtpl==0.9.2
 et-xmlfile==1.0.1
 flask==1.1.2
+flask-admin==1.5.7
 flask-bcrypt==0.7.1
 flask-cors==3.0.8
+flask-mail==0.9.1
 flask-marshmallow==0.12.0
 flask-migrate==2.5.3
 flask-restful==0.3.8
@@ -55,17 +57,21 @@ pandas==1.0.3
 psycopg2-binary==2.8.5
 pyasn1==0.4.8
 pycparser==2.20
+PyGithub==1.53
 pygments==2.6.1
 pyjwt==1.7.1
 pyparsing==2.4.7
 pyrsistent==0.16.0
+python-box==5.2.0
 python-dateutil==2.8.1
 python-docx==0.8.10
 python-editor==1.0.4
+python-Levenshtein==0.12.0
 pytz==2020.1
 pyyaml==5.3.1
 recommonmark==0.6.0
 requests==2.23.0
+sentry-sdk==0.14.4
 six==1.14.0
 snowballstemmer==2.0.0
 soupsieve==2.0.1

From c57b17df1ed1beabdde7084eaba72f25013b4870 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Wed, 9 Dec 2020 12:13:17 -0500
Subject: [PATCH 12/22] Add a robust way of adding an API key, update examples
 and documentation for swagger API and add the ability to completely sync the
 local system from the remote system.

---
 config/default.py   |   8 ++
 crc/api.yml         | 177 +++++++++++++++++++++++++++++++++++++-------
 crc/api/workflow.py |  56 +++++++++++---
 3 files changed, 204 insertions(+), 37 deletions(-)

diff --git a/config/default.py b/config/default.py
index 13fc79ab..44651fd5 100644
--- a/config/default.py
+++ b/config/default.py
@@ -6,6 +6,14 @@ basedir = os.path.abspath(os.path.dirname(__file__))
 
 JSON_SORT_KEYS = False  # CRITICAL.  Do not sort the data when returning values to the front end.
 
+# The API_TOKEN is used to ensure that the
+# workflow synch can work without a lot of
+# back and forth.
+# you may want to change this to something simple for testing!!
+# NB, if you change this in the local endpoint,
+# it needs to be changed in the remote endpoint as well
+API_TOKEN = 'af95596f327c9ecc007b60414fc84b61'
+
 NAME = "CR Connect Workflow"
 FLASK_PORT = environ.get('PORT0') or environ.get('FLASK_PORT', default="5000")
 CORS_ALLOW_ORIGINS = re.split(r',\s*', environ.get('CORS_ALLOW_ORIGINS', default="localhost:4200, localhost:5002"))
diff --git a/crc/api.yml b/crc/api.yml
index 3aa1f5d8..2789e249 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -100,11 +100,12 @@ paths:
                 type: array
                 items:
                   $ref: "#/components/schemas/Study"
-  /workflow_spec/diff:
+  /workflow_spec/pullall:
     get:
-      operationId: crc.api.workflow.get_changed_workflows
-      summary: Provides a list of workflow specs and their signature
-      security: []  # Disable security for this endpoint only - we'll sanity check
+      operationId: crc.api.workflow.sync_all_changed_workflows
+      summary: Sync all workflows that have changed on the remote side and provide a list of the results
+      security:
+        - ApiKeyAuth : []
       # in the endpoint
       parameters:
         - name: remote
@@ -117,21 +118,51 @@ paths:
         - Workflow Spec States
       responses:
         '200':
-          description: An array of workflow specs, with last touched date and file signature.
+          description: An array of workflow specs that were synced from remote.
           content:
             application/json:
               schema:
                 type: array
                 items:
-                  $ref: "#/components/schemas/Study"
+                  type: string
+                example : ['top_level_workflow','3b495037-f7d4-4509-bf58-cee41c0c6b0e']
+
+
+
+
+  /workflow_spec/diff:
+    get:
+      operationId: crc.api.workflow.get_changed_workflows
+      summary: Provides a list of workflow that differ from remote and if it is new or not
+      security :
+        - ApiKeyAuth : []
+      # in the endpoint
+      parameters:
+        - name: remote
+          in: query
+          required: true
+          description: The remote endpoint
+          schema:
+            type: string
+      tags:
+        - Workflow Spec States
+      responses:
+        '200':
+          description: An array of workflow specs, with last touched date and which one is most recent.
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/WorkflowSpecDiffList"
 
 
   /workflow_spec/{workflow_spec_id}/files:
     get:
       operationId: crc.api.workflow.get_workflow_spec_files
-      summary: Provides a list of workflow specs and their signature
-      security: []  # Disable security for this endpoint only - we'll sanity check
-                    # in the endpoint
+      summary: Provides a list of files for a workflow spec on this machine.
+      security :
+        - ApiKeyAuth : []
       parameters:
         - name: workflow_spec_id
           in: path
@@ -144,20 +175,20 @@ paths:
         - Workflow Spec States
       responses:
         '200':
-          description: An array of workflow specs, with last touched date and file signature.
+          description: An array of files for a workflow spec on the local system, with details.
           content:
             application/json:
               schema:
                 type: array
                 items:
-                  $ref: "#/components/schemas/Study"
+                  $ref: "#/components/schemas/WorkflowSpecFilesList"
 
   /workflow_spec/{workflow_spec_id}/files/sync:
     get:
       operationId: crc.api.workflow.sync_changed_files
-      summary: Provides a list of files that were updated
-      security: []  # Disable security for this endpoint only - we'll sanity check
-                    # in the endpoint
+      summary: Syncs files from a workflow on a remote system and provides a list of files that were updated
+      security :
+        - ApiKeyAuth : []
       parameters:
         - name: workflow_spec_id
           in: path
@@ -176,20 +207,23 @@ paths:
         - Workflow Spec States
       responses:
         '200':
-          description: An array of workflow specs, with last touched date and file signature.
+          description: A list of files that were synced for the workflow.
           content:
             application/json:
               schema:
                 type: array
                 items:
-                  $ref: "#/components/schemas/Study"
+                  type : string
+                example : ["data_security_plan.dmn",'some_other_file.xml']
+
 
   /workflow_spec/{workflow_spec_id}/files/diff:
     get:
       operationId: crc.api.workflow.get_changed_files
-      summary: Provides a list of workflow specs and their signature
-      security: []  # Disable security for this endpoint only - we'll sanity check
-                    # in the endpoint
+      summary: Provides a list of files for a workflow specs that differ from remote and their signature
+      security :
+        - ApiKeyAuth : []
+
       parameters:
         - name: workflow_spec_id
           in: path
@@ -208,21 +242,22 @@ paths:
         - Workflow Spec States
       responses:
         '200':
-          description: An array of workflow specs, with last touched date and file signature.
+          description: An array of files that are different from remote, with last touched date and file signature.
           content:
             application/json:
               schema:
                 type: array
                 items:
-                  $ref: "#/components/schemas/Study"
+                  $ref: "#/components/schemas/WorkflowSpecFilesDiff"
 
 
   /workflow_spec/all:
     get:
       operationId: crc.api.workflow.get_all_spec_state
-      summary: Provides a list of files for a workflow spec
-      security: []  # Disable security for this endpoint only - we'll sanity check
-                    # in the endpoint
+      summary: Provides a list of workflow specs, last update date and thumbprint
+      security:
+        - ApiKeyAuth : []
+
       tags:
         - Workflow Spec States
       responses:
@@ -233,7 +268,7 @@ paths:
               schema:
                 type: array
                 items:
-                  $ref: "#/components/schemas/Study"
+                  $ref: "#/components/schemas/WorkflowSpecAll"
 
 
   /study/all:
@@ -1314,6 +1349,12 @@ components:
       scheme: bearer
       bearerFormat: JWT
       x-bearerInfoFunc: crc.api.user.verify_token_admin
+    ApiKeyAuth :
+        type : apiKey
+        in : header
+        name : X-CR-API-KEY
+        x-apikeyInfoFunc: crc.api.workflow.verify_token
+
   schemas:
     User:
       properties:
@@ -1337,6 +1378,92 @@ components:
       properties:
         id:
           type: string
+    WorkflowSpecDiffList:
+      properties:
+        workflow_spec_id:
+          type: string
+          example : top_level_workflow
+        date_created :
+          type: string
+          example : 2020-12-09 16:55:12.951500+00:00
+        location :
+          type : string
+          example : remote
+        new :
+          type : boolean
+          example : false
+    WorkflowSpecFilesList:
+      properties:
+        file_model_id:
+          type : integer
+          example : 171
+        workflow_spec_id :
+          type: string
+          example : top_level_workflow
+        filename :
+          type: string
+          example : data_security_plan.dmn
+        date_created :
+          type: string
+          example : 2020-12-01 13:58:12.420333+00:00
+        type:
+          type : string
+          example : dmn
+        primary :
+          type : boolean
+          example : false
+        content_type:
+          type: string
+          example : text/xml
+        primary_process_id:
+          type : string
+          example : null
+        md5_hash:
+          type: string
+          example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b
+
+
+    WorkflowSpecFilesDiff:
+      properties:
+        filename :
+          type: string
+          example : data_security_plan.dmn
+        date_created :
+          type: string
+          example : 2020-12-01 13:58:12.420333+00:00
+        type:
+          type : string
+          example : dmn
+        primary :
+          type : boolean
+          example : false
+        content_type:
+          type: string
+          example : text/xml
+        primary_process_id:
+          type : string
+          example : null
+        md5_hash:
+          type: string
+          example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b
+        location:
+          type : string
+          example : remote
+        new:
+          type: boolean
+          example : false
+
+    WorkflowSpecAll:
+      properties:
+        workflow_spec_id :
+          type: string
+          example : acaf1258-43b4-437e-8846-f612afa66811
+        date_created :
+          type: string
+          example : 2020-12-01 13:58:12.420333+00:00
+        md5_hash:
+          type: string
+          example: c30fd597f21715018eab12f97f9d4956
     Study:
       properties:
         id:
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index fb0c1e4b..cc290435 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -7,7 +7,7 @@ from hashlib import md5
 import pandas as pd
 from SpiffWorkflow.util.deep_merge import DeepMerge
 from flask import g
-from crc import session, db
+from crc import session, db, app
 from crc.api.common import ApiError, ApiErrorSchema
 from crc.models.api_models import WorkflowApi, WorkflowApiSchema, NavigationItem, NavigationItemSchema
 from crc.models.file import FileModel, LookupDataSchema, FileDataModel
@@ -269,12 +269,19 @@ def join_uuids(uuids):
                                                                           # in the same order
     return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
 
-def get_changed_workflows(remote):
+def verify_token(token, required_scopes):
+    if token == app.config['API_TOKEN']:
+        return {'scope':['any']}
+    else:
+        raise ApiError("permission_denied","API Token information is not correct")
+
+
+def get_changed_workflows(remote,as_df=False):
     """
     gets a remote endpoint - gets the workflows and then
     determines what workflows are different from the remote endpoint
     """
-    response = requests.get('http://'+remote+'/v1.0/workflow_spec/all')
+    response = requests.get('http://'+remote+'/v1.0/workflow_spec/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']})
 
     # This is probably very and may allow cross site attacks - fix later
     remote = pd.DataFrame(json.loads(response.text))
@@ -318,14 +325,25 @@ def get_changed_workflows(remote):
     output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
 
     # return the list as a dict, let swagger convert it to json
-    return output.reset_index().to_dict(orient='records')
+    if as_df:
+        return output
+    else:
+        return output.reset_index().to_dict(orient='records')
+
+
+def sync_all_changed_workflows(remote):
+
+    workflowsdf = get_changed_workflows(remote,as_df=True)
+    workflows = workflowsdf.reset_index().to_dict(orient='records')
+    for workflow in workflows:
+        sync_changed_files(remote,workflow['workflow_spec_id'])
+    return [x['workflow_spec_id'] for x in workflows]
 
-def sync_all_changed_files(remote):
-    pass
 
 def sync_changed_files(remote,workflow_spec_id):
     # make sure that spec is local before syncing files
-    remotespectext  =  requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id)
+    remotespectext  =  requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id,
+                                    headers={'X-CR-API-KEY': app.config['API_TOKEN']})
     specdict = json.loads(remotespectext.text)
     localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
     if localspec is None:
@@ -343,13 +361,20 @@ def sync_changed_files(remote,workflow_spec_id):
     session.add(localspec)
 
     changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
+    if len(changedfiles)==0:
+        return []
     updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))]
+    updatefiles = updatefiles.reset_index().to_dict(orient='records')
+
     deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))]
-    for delfile in deletefiles.reset_index().to_dict(orient='records'):
+    deletefiles = deletefiles.reset_index().to_dict(orient='records')
+
+    for delfile in deletefiles:
         currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
                                             FileModel.name == delfile['filename']).first()
         FileService.delete_file(currentfile.id)
-    for updatefile in updatefiles.reset_index().to_dict(orient='records'):
+
+    for updatefile in updatefiles:
         currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
                                             FileModel.name == updatefile['filename']).first()
         if not currentfile:
@@ -364,9 +389,11 @@ def sync_changed_files(remote,workflow_spec_id):
         currentfile.primary_process_id = updatefile['primary_process_id']
         session.add(currentfile)
 
-        response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data')
+        response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data',
+                                headers={'X-CR-API-KEY': app.config['API_TOKEN']})
         FileService.update_file(currentfile,response.content,updatefile['type'])
     session.commit()
+    return [x['filename'] for x in updatefiles]
 
 
 def get_changed_files(remote,workflow_spec_id,as_df=False):
@@ -375,7 +402,8 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
     local and remote and determines what files have been change and returns a list of those
     files
     """
-    response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files')
+    response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files',
+                            headers={'X-CR-API-KEY':app.config['API_TOKEN']})
     # This is probably very and may allow cross site attacks - fix later
     remote = pd.DataFrame(json.loads(response.text))
     # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
@@ -386,7 +414,11 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
                              left_on=['filename','md5_hash'],
                              how = 'outer' ,
                              indicator=True).loc[lambda x : x['_merge']!='both']
-
+    if len(different) == 0:
+        if as_df:
+            return different
+        else:
+            return []
     # each line has a tag on it - if was in the left or the right,
     # label it so we know if that was on the remote or local machine
     different.loc[different['_merge']=='left_only','location'] = 'remote'

From e377a05deaec8a372a6c52a397a48baaff073ee5 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Wed, 9 Dec 2020 13:50:52 -0500
Subject: [PATCH 13/22] Add some punctuation

---
 crc/api.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crc/api.yml b/crc/api.yml
index 2789e249..78436178 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -220,7 +220,7 @@ paths:
   /workflow_spec/{workflow_spec_id}/files/diff:
     get:
       operationId: crc.api.workflow.get_changed_files
-      summary: Provides a list of files for a workflow specs that differ from remote and their signature
+      summary: Provides a list of files for a workflow specs that differ from remote and their signature.
       security :
         - ApiKeyAuth : []
 

From a8203ed01ddd86a48904c22caabda5d3acc3718a Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Thu, 10 Dec 2020 10:06:21 -0500
Subject: [PATCH 14/22] save changes before refactor

---
 config/default.py   |  2 +-
 crc/api/workflow.py | 20 +++++++++++++++++---
 example_data.py     |  1 +
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/config/default.py b/config/default.py
index 44651fd5..1570811c 100644
--- a/config/default.py
+++ b/config/default.py
@@ -12,7 +12,7 @@ JSON_SORT_KEYS = False  # CRITICAL.  Do not sort the data when returning values
 # you may want to change this to something simple for testing!!
 # NB, if you change this in the local endpoint,
 # it needs to be changed in the remote endpoint as well
-API_TOKEN = 'af95596f327c9ecc007b60414fc84b61'
+API_TOKEN = environ.get('API_TOKEN', default = 'af95596f327c9ecc007b60414fc84b61')
 
 NAME = "CR Connect Workflow"
 FLASK_PORT = environ.get('PORT0') or environ.get('FLASK_PORT', default="5000")
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index cc290435..8da5c88a 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -352,8 +352,17 @@ def sync_changed_files(remote,workflow_spec_id):
     if specdict['category'] == None:
         localspec.category = None
     else:
-        localspec.category = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id
-                                                                         == specdict['category']['id']).first()
+        localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name
+                                                                        == specdict['category']['name']).first()
+        if localcategory == None:
+            #category doesn't exist - lets make it
+            localcategory = WorkflowSpecCategoryModel()
+            localcategory.name = specdict['category']['name']
+            localcategory.display_name = specdict['category']['display_name']
+            localcategory.display_order = specdict['category']['display_order']
+            session.add(localcategory)
+        localspec.category = localcategory
+
     localspec.display_order = specdict['display_order']
     localspec.display_name = specdict['display_name']
     localspec.name = specdict['name']
@@ -372,7 +381,12 @@ def sync_changed_files(remote,workflow_spec_id):
     for delfile in deletefiles:
         currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
                                             FileModel.name == delfile['filename']).first()
-        FileService.delete_file(currentfile.id)
+
+        # it is more appropriate to archive the file than delete
+        # due to the fact that we might have workflows that are using the
+        # file data
+        currentfile.archived = True
+        session.add(currentfile)
 
     for updatefile in updatefiles:
         currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
diff --git a/example_data.py b/example_data.py
index d9b4c73b..a46112a2 100644
--- a/example_data.py
+++ b/example_data.py
@@ -67,6 +67,7 @@ class ExampleDataLoader:
                 display_order=6
             ),
         ]
+        db.session.execute("select setval('workflow_spec_category_id_seq',7);")
         db.session.add_all(categories)
         db.session.commit()
 

From 3f56dfe48477677206b01e303084ab355ee4a077 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Thu, 10 Dec 2020 10:46:23 -0500
Subject: [PATCH 15/22] Move all workflow sync stuff into new file Make changes
 to api naming scheme add some error checking around endpoints for
 missing/invalid endpoints

---
 crc/api.yml              |  52 +++---
 crc/api/workflow.py      | 305 -----------------------------------
 crc/api/workflow_sync.py | 336 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 362 insertions(+), 331 deletions(-)
 create mode 100644 crc/api/workflow_sync.py

diff --git a/crc/api.yml b/crc/api.yml
index 78436178..41e76f90 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -100,9 +100,9 @@ paths:
                 type: array
                 items:
                   $ref: "#/components/schemas/Study"
-  /workflow_spec/pullall:
+  /workflow_sync/pullall:
     get:
-      operationId: crc.api.workflow.sync_all_changed_workflows
+      operationId: crc.api.workflow_sync.sync_all_changed_workflows
       summary: Sync all workflows that have changed on the remote side and provide a list of the results
       security:
         - ApiKeyAuth : []
@@ -115,7 +115,7 @@ paths:
           schema:
             type: string
       tags:
-        - Workflow Spec States
+        - Workflow Sync API
       responses:
         '200':
           description: An array of workflow specs that were synced from remote.
@@ -130,9 +130,9 @@ paths:
 
 
 
-  /workflow_spec/diff:
+  /workflow_sync/diff:
     get:
-      operationId: crc.api.workflow.get_changed_workflows
+      operationId: crc.api.workflow_sync.get_changed_workflows
       summary: Provides a list of workflow that differ from remote and if it is new or not
       security :
         - ApiKeyAuth : []
@@ -145,7 +145,7 @@ paths:
           schema:
             type: string
       tags:
-        - Workflow Spec States
+        - Workflow Sync API
       responses:
         '200':
           description: An array of workflow specs, with last touched date and which one is most recent.
@@ -157,9 +157,9 @@ paths:
                   $ref: "#/components/schemas/WorkflowSpecDiffList"
 
 
-  /workflow_spec/{workflow_spec_id}/files:
+  /workflow_sync/{workflow_spec_id}/files:
     get:
-      operationId: crc.api.workflow.get_workflow_spec_files
+      operationId: crc.api.workflow_sync.get_workflow_spec_files
       summary: Provides a list of files for a workflow spec on this machine.
       security :
         - ApiKeyAuth : []
@@ -172,7 +172,7 @@ paths:
             type: string
 
       tags:
-        - Workflow Spec States
+        - Workflow Sync API
       responses:
         '200':
           description: An array of files for a workflow spec on the local system, with details.
@@ -183,9 +183,9 @@ paths:
                 items:
                   $ref: "#/components/schemas/WorkflowSpecFilesList"
 
-  /workflow_spec/{workflow_spec_id}/files/sync:
+  /workflow_sync/{workflow_spec_id}/files/sync:
     get:
-      operationId: crc.api.workflow.sync_changed_files
+      operationId: crc.api.workflow_sync.sync_changed_files
       summary: Syncs files from a workflow on a remote system and provides a list of files that were updated
       security :
         - ApiKeyAuth : []
@@ -204,7 +204,7 @@ paths:
             type: string
 
       tags:
-        - Workflow Spec States
+        - Workflow Sync API
       responses:
         '200':
           description: A list of files that were synced for the workflow.
@@ -217,9 +217,9 @@ paths:
                 example : ["data_security_plan.dmn",'some_other_file.xml']
 
 
-  /workflow_spec/{workflow_spec_id}/files/diff:
+  /workflow_sync/{workflow_spec_id}/files/diff:
     get:
-      operationId: crc.api.workflow.get_changed_files
+      operationId: crc.api.workflow_sync.get_changed_files
       summary: Provides a list of files for a workflow specs that differ from remote and their signature.
       security :
         - ApiKeyAuth : []
@@ -239,7 +239,7 @@ paths:
             type: string
 
       tags:
-        - Workflow Spec States
+        - Workflow Sync API
       responses:
         '200':
           description: An array of files that are different from remote, with last touched date and file signature.
@@ -251,15 +251,15 @@ paths:
                   $ref: "#/components/schemas/WorkflowSpecFilesDiff"
 
 
-  /workflow_spec/all:
+  /workflow_sync/all:
     get:
-      operationId: crc.api.workflow.get_all_spec_state
+      operationId: crc.api.workflow_sync.get_all_spec_state
       summary: Provides a list of workflow specs, last update date and thumbprint
       security:
         - ApiKeyAuth : []
 
       tags:
-        - Workflow Spec States
+        - Workflow Sync API
       responses:
         '200':
           description: An array of workflow specs, with last touched date and file signature.
@@ -547,7 +547,7 @@ paths:
           description: The workflow spec category has been removed.
   /file:
     parameters:
-      - name: workflow_spec_id
+      - name: workflow_sync_id
         in: query
         required: false
         description: The unique id of a workflow specification
@@ -1353,7 +1353,7 @@ components:
         type : apiKey
         in : header
         name : X-CR-API-KEY
-        x-apikeyInfoFunc: crc.api.workflow.verify_token
+        x-apikeyInfoFunc: crc.api.workflow_sync.verify_token
 
   schemas:
     User:
@@ -1380,7 +1380,7 @@ components:
           type: string
     WorkflowSpecDiffList:
       properties:
-        workflow_spec_id:
+        workflow_sync_id:
           type: string
           example : top_level_workflow
         date_created :
@@ -1397,7 +1397,7 @@ components:
         file_model_id:
           type : integer
           example : 171
-        workflow_spec_id :
+        workflow_sync_id :
           type: string
           example : top_level_workflow
         filename :
@@ -1455,7 +1455,7 @@ components:
 
     WorkflowSpecAll:
       properties:
-        workflow_spec_id :
+        workflow_sync_id :
           type: string
           example : acaf1258-43b4-437e-8846-f612afa66811
         date_created :
@@ -1587,7 +1587,7 @@ components:
         content_type:
           type: string
           example: "application/xml"
-        workflow_spec_id:
+        workflow_sync_id:
           type: string
           example: "random_fact"
           x-nullable: true
@@ -1609,7 +1609,7 @@ components:
             $ref: "#/components/schemas/NavigationItem"
         next_task:
           $ref: "#/components/schemas/Task"
-        workflow_spec_id:
+        workflow_sync_id:
           type: string
         spec_version:
           type: string
@@ -1625,7 +1625,7 @@ components:
       example:
         id: 291234
         status: 'user_input_required'
-        workflow_spec_id: 'random_fact'
+        workflow_sync_id: 'random_fact'
         spec_version: 'v1.1 [22,23]'
         is_latest_spec: True
         next_task:
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 8da5c88a..d14daf11 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -1,11 +1,4 @@
-import hashlib
-import json
 import uuid
-from io import StringIO
-from hashlib import md5
-
-import pandas as pd
-from SpiffWorkflow.util.deep_merge import DeepMerge
 from flask import g
 from crc import session, db, app
 from crc.api.common import ApiError, ApiErrorSchema
@@ -21,8 +14,6 @@ from crc.services.study_service import StudyService
 from crc.services.user_service import UserService
 from crc.services.workflow_processor import WorkflowProcessor
 from crc.services.workflow_service import WorkflowService
-from flask_cors import cross_origin
-import requests
 
 def all_specifications():
     schema = WorkflowSpecModelSchema(many=True)
@@ -263,299 +254,3 @@ def _verify_user_and_role(processor, spiff_task):
         raise ApiError.from_task("permission_denied",
                                  f"This task must be completed by '{allowed_users}', "
                                  f"but you are {user.uid}", spiff_task)
-def join_uuids(uuids):
-    """Joins a pandas Series of uuids and combines them in one hash"""
-    combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always
-                                                                          # in the same order
-    return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
-
-def verify_token(token, required_scopes):
-    if token == app.config['API_TOKEN']:
-        return {'scope':['any']}
-    else:
-        raise ApiError("permission_denied","API Token information is not correct")
-
-
-def get_changed_workflows(remote,as_df=False):
-    """
-    gets a remote endpoint - gets the workflows and then
-    determines what workflows are different from the remote endpoint
-    """
-    response = requests.get('http://'+remote+'/v1.0/workflow_spec/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']})
-
-    # This is probably very and may allow cross site attacks - fix later
-    remote = pd.DataFrame(json.loads(response.text))
-    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
-    local = get_all_spec_state_dataframe().reset_index()
-
-    # merge these on workflow spec id and hash - this will
-    # make two different date columns date_x and date_y
-    different  = remote.merge(local,
-                              right_on=['workflow_spec_id','md5_hash'],
-                              left_on=['workflow_spec_id','md5_hash'],
-                              how = 'outer' ,
-                              indicator=True).loc[lambda x : x['_merge']!='both']
-
-    # each line has a tag on it - if was in the left or the right,
-    # label it so we know if that was on the remote or local machine
-    different.loc[different['_merge']=='left_only','location'] = 'remote'
-    different.loc[different['_merge']=='right_only','location'] = 'local'
-
-    # this takes the different date_created_x and date-created_y columns and
-    # combines them back into one date_created column
-    index = different['date_created_x'].isnull()
-    different.loc[index,'date_created_x'] = different[index]['date_created_y']
-    different = different[['workflow_spec_id','date_created_x','location']].copy()
-    different.columns=['workflow_spec_id','date_created','location']
-
-    # our different list will have multiple entries for a workflow if there is a version on either side
-    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
-    changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
-
-    # get an exclusive or list of workflow ids - that is we want lists of files that are
-    # on one machine or the other, but not both
-    remote_spec_ids = remote[['workflow_spec_id']]
-    local_spec_ids = local[['workflow_spec_id']]
-    left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
-    right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
-
-    # flag files as new that are only on the remote box and remove the files that are only on the local box
-    changedfiles['new'] = False
-    changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
-    output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
-
-    # return the list as a dict, let swagger convert it to json
-    if as_df:
-        return output
-    else:
-        return output.reset_index().to_dict(orient='records')
-
-
-def sync_all_changed_workflows(remote):
-
-    workflowsdf = get_changed_workflows(remote,as_df=True)
-    workflows = workflowsdf.reset_index().to_dict(orient='records')
-    for workflow in workflows:
-        sync_changed_files(remote,workflow['workflow_spec_id'])
-    return [x['workflow_spec_id'] for x in workflows]
-
-
-def sync_changed_files(remote,workflow_spec_id):
-    # make sure that spec is local before syncing files
-    remotespectext  =  requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id,
-                                    headers={'X-CR-API-KEY': app.config['API_TOKEN']})
-    specdict = json.loads(remotespectext.text)
-    localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
-    if localspec is None:
-        localspec = WorkflowSpecModel()
-        localspec.id = workflow_spec_id
-    if specdict['category'] == None:
-        localspec.category = None
-    else:
-        localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name
-                                                                        == specdict['category']['name']).first()
-        if localcategory == None:
-            #category doesn't exist - lets make it
-            localcategory = WorkflowSpecCategoryModel()
-            localcategory.name = specdict['category']['name']
-            localcategory.display_name = specdict['category']['display_name']
-            localcategory.display_order = specdict['category']['display_order']
-            session.add(localcategory)
-        localspec.category = localcategory
-
-    localspec.display_order = specdict['display_order']
-    localspec.display_name = specdict['display_name']
-    localspec.name = specdict['name']
-    localspec.description = specdict['description']
-    session.add(localspec)
-
-    changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
-    if len(changedfiles)==0:
-        return []
-    updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))]
-    updatefiles = updatefiles.reset_index().to_dict(orient='records')
-
-    deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))]
-    deletefiles = deletefiles.reset_index().to_dict(orient='records')
-
-    for delfile in deletefiles:
-        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
-                                            FileModel.name == delfile['filename']).first()
-
-        # it is more appropriate to archive the file than delete
-        # due to the fact that we might have workflows that are using the
-        # file data
-        currentfile.archived = True
-        session.add(currentfile)
-
-    for updatefile in updatefiles:
-        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
-                                            FileModel.name == updatefile['filename']).first()
-        if not currentfile:
-            currentfile = FileModel()
-            currentfile.name = updatefile['filename']
-            currentfile.workflow_spec_id = workflow_spec_id
-
-        currentfile.date_created = updatefile['date_created']
-        currentfile.type = updatefile['type']
-        currentfile.primary = updatefile['primary']
-        currentfile.content_type = updatefile['content_type']
-        currentfile.primary_process_id = updatefile['primary_process_id']
-        session.add(currentfile)
-
-        response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data',
-                                headers={'X-CR-API-KEY': app.config['API_TOKEN']})
-        FileService.update_file(currentfile,response.content,updatefile['type'])
-    session.commit()
-    return [x['filename'] for x in updatefiles]
-
-
-def get_changed_files(remote,workflow_spec_id,as_df=False):
-    """
-    gets a remote endpoint - gets the files for a workflow_spec on both
-    local and remote and determines what files have been change and returns a list of those
-    files
-    """
-    response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files',
-                            headers={'X-CR-API-KEY':app.config['API_TOKEN']})
-    # This is probably very and may allow cross site attacks - fix later
-    remote = pd.DataFrame(json.loads(response.text))
-    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
-    local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
-    local['md5_hash'] = local['md5_hash'].astype('str')
-    different = remote.merge(local,
-                             right_on=['filename','md5_hash'],
-                             left_on=['filename','md5_hash'],
-                             how = 'outer' ,
-                             indicator=True).loc[lambda x : x['_merge']!='both']
-    if len(different) == 0:
-        if as_df:
-            return different
-        else:
-            return []
-    # each line has a tag on it - if was in the left or the right,
-    # label it so we know if that was on the remote or local machine
-    different.loc[different['_merge']=='left_only','location'] = 'remote'
-    different.loc[different['_merge']=='right_only','location'] = 'local'
-
-    # this takes the different date_created_x and date-created_y columns and
-    # combines them back into one date_created column
-    dualfields = ['date_created','type','primary','content_type','primary_process_id']
-    for merge in dualfields:
-        index = different[merge+'_x'].isnull()
-        different.loc[index,merge+'_x'] = different[index][merge+'_y']
-
-    fieldlist = [fld+'_x' for fld in dualfields]
-    different = different[ fieldlist + ['md5_hash','filename','location']].copy()
-
-    different.columns=dualfields+['md5_hash','filename','location']
-    # our different list will have multiple entries for a workflow if there is a version on either side
-    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
-    changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
-
-    # get an exclusive or list of workflow ids - that is we want lists of files that are
-    # on one machine or the other, but not both
-    remote_spec_ids = remote[['filename']]
-    local_spec_ids = local[['filename']]
-    left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])]
-    right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])]
-    changedfiles['new'] = False
-    changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
-    changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
-    changedfiles = changedfiles.replace({pd.np.nan: None})
-    # return the list as a dict, let swagger convert it to json
-    if as_df:
-        return changedfiles
-    else:
-        return changedfiles.reset_index().to_dict(orient='records')
-
-
-
-def get_all_spec_state():
-    """
-    Return a list of all workflow specs along with last updated date and a
-    thumbprint of all of the files that are used for that workflow_spec
-    Convert into a dict list from a dataframe
-    """
-    df = get_all_spec_state_dataframe()
-    return df.reset_index().to_dict(orient='records')
-
-
-def get_workflow_spec_files(workflow_spec_id):
-    """
-    Return a list of all workflow specs along with last updated date and a
-    thumbprint of all of the files that are used for that workflow_spec
-    Convert into a dict list from a dataframe
-    """
-    df = get_workflow_spec_files_dataframe(workflow_spec_id)
-    return df.reset_index().to_dict(orient='records')
-
-
-def get_workflow_spec_files_dataframe(workflowid):
-    """
-    Return a list of all files for a workflow_spec along with last updated date and a
-    hash so we can determine file differences for a changed workflow on a box.
-    Return a dataframe
-    """
-    x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid)
-    # there might be a cleaner way of getting a data frome from some of the
-    # fields in the ORM - but this works OK
-    filelist = []
-    for file in x:
-        filelist.append({'file_model_id':file.file_model_id,
-                         'workflow_spec_id': file.file_model.workflow_spec_id,
-                         'md5_hash':file.md5_hash,
-                         'filename':file.file_model.name,
-                         'type':file.file_model.type.name,
-                         'primary':file.file_model.primary,
-                         'content_type':file.file_model.content_type,
-                         'primary_process_id':file.file_model.primary_process_id,
-                         'date_created':file.date_created})
-    if len(filelist) == 0:
-        return pd.DataFrame(columns=['file_model_id',
-                                     'workflow_spec_id',
-                                     'md5_hash',
-                                     'filename',
-                                     'type',
-                                     'primary',
-                                     'content_type',
-                                     'primary_process_id',
-                                     'date_created'])
-    df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
-    df['date_created'] = df['date_created'].astype('str')
-    return df
-
-
-
-def get_all_spec_state_dataframe():
-    """
-    Return a list of all workflow specs along with last updated date and a
-    thumbprint of all of the files that are used for that workflow_spec
-    Return a dataframe
-    """
-    x = session.query(FileDataModel).join(FileModel)
-    # there might be a cleaner way of getting a data frome from some of the
-    # fields in the ORM - but this works OK
-    filelist = []
-    for file in x:
-        filelist.append({'file_model_id':file.file_model_id,
-                         'workflow_spec_id': file.file_model.workflow_spec_id,
-                         'md5_hash':file.md5_hash,
-                         'filename':file.file_model.name,
-                         'date_created':file.date_created})
-    df = pd.DataFrame(filelist)
-
-    # get a distinct list of file_model_id's with the most recent file_data retained
-    df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy()
-
-    # take that list and then group by workflow_spec and retain the most recently touched file
-    # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each
-    # workflow spec
-    df = df.groupby('workflow_spec_id').agg({'date_created':'max',
-                                             'md5_hash':join_uuids}).copy()
-    # get only the columns we are really interested in returning
-    df = df[['date_created','md5_hash']].copy()
-    # convert dates to string
-    df['date_created'] = df['date_created'].astype('str')
-    return df
-
diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py
new file mode 100644
index 00000000..98f04236
--- /dev/null
+++ b/crc/api/workflow_sync.py
@@ -0,0 +1,336 @@
+import hashlib
+import json
+import pandas as pd
+import requests
+from crc import session, app
+from crc.api.common import ApiError
+from crc.models.file import FileModel, FileDataModel
+from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel
+from crc.services.file_service import FileService
+
+
+def join_uuids(uuids):
+    """Joins a pandas Series of uuids and combines them in one hash"""
+    combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always
+                                                                          # in the same order
+    return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
+
+def verify_token(token, required_scopes):
+    if token == app.config['API_TOKEN']:
+        return {'scope':['any']}
+    else:
+        raise ApiError("permission_denied", "API Token information is not correct")
+
+
+def get_changed_workflows(remote,as_df=False):
+    """
+    gets a remote endpoint - gets the workflows and then
+    determines what workflows are different from the remote endpoint
+    """
+    try:
+        response = requests.get('http://'+remote+'/v1.0/workflow_sync/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']})
+    except:
+        raise ApiError("endpoint error", 'had a problem connecting to '+remote)
+    if response.status_code != 200:
+        raise ApiError("endpoint error", response.text)
+
+    remote = pd.DataFrame(json.loads(response.text))
+
+    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
+    local = get_all_spec_state_dataframe().reset_index()
+
+    # merge these on workflow spec id and hash - this will
+    # make two different date columns date_x and date_y
+    different  = remote.merge(local,
+                              right_on=['workflow_spec_id','md5_hash'],
+                              left_on=['workflow_spec_id','md5_hash'],
+                              how = 'outer' ,
+                              indicator=True).loc[lambda x : x['_merge']!='both']
+    if len(different)==0:
+        return []
+    # each line has a tag on it - if was in the left or the right,
+    # label it so we know if that was on the remote or local machine
+    different.loc[different['_merge']=='left_only','location'] = 'remote'
+    different.loc[different['_merge']=='right_only','location'] = 'local'
+
+    # this takes the different date_created_x and date-created_y columns and
+    # combines them back into one date_created column
+    index = different['date_created_x'].isnull()
+    different.loc[index,'date_created_x'] = different[index]['date_created_y']
+    different = different[['workflow_spec_id','date_created_x','location']].copy()
+    different.columns=['workflow_spec_id','date_created','location']
+
+    # our different list will have multiple entries for a workflow if there is a version on either side
+    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
+    changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
+
+    # get an exclusive or list of workflow ids - that is we want lists of files that are
+    # on one machine or the other, but not both
+    remote_spec_ids = remote[['workflow_spec_id']]
+    local_spec_ids = local[['workflow_spec_id']]
+    left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
+    right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
+
+    # flag files as new that are only on the remote box and remove the files that are only on the local box
+    changedfiles['new'] = False
+    changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
+    output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
+
+    # return the list as a dict, let swagger convert it to json
+    if as_df:
+        return output
+    else:
+        return output.reset_index().to_dict(orient='records')
+
+
+def sync_all_changed_workflows(remote):
+
+    workflowsdf = get_changed_workflows(remote,as_df=True)
+    if len(workflowsdf) ==0:
+        return []
+    workflows = workflowsdf.reset_index().to_dict(orient='records')
+    for workflow in workflows:
+        sync_changed_files(remote,workflow['workflow_spec_id'])
+    return [x['workflow_spec_id'] for x in workflows]
+
+
+def sync_changed_files(remote,workflow_spec_id):
+    # make sure that spec is local before syncing files
+    try:
+        remotespectext  =  requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id,
+                                    headers={'X-CR-API-KEY': app.config['API_TOKEN']})
+    except:
+        raise ApiError("endpoint error", 'had a problem connecting to '+remote)
+
+    if remotespectext.status_code != 200:
+        raise ApiError("endpoint error", response.text)
+
+    specdict = json.loads(remotespectext.text)
+
+    localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
+    if localspec is None:
+        localspec = WorkflowSpecModel()
+        localspec.id = workflow_spec_id
+    if specdict['category'] == None:
+        localspec.category = None
+    else:
+        localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name
+                                                                        == specdict['category']['name']).first()
+        if localcategory == None:
+            #category doesn't exist - lets make it
+            localcategory = WorkflowSpecCategoryModel()
+            localcategory.name = specdict['category']['name']
+            localcategory.display_name = specdict['category']['display_name']
+            localcategory.display_order = specdict['category']['display_order']
+            session.add(localcategory)
+        localspec.category = localcategory
+
+    localspec.display_order = specdict['display_order']
+    localspec.display_name = specdict['display_name']
+    localspec.name = specdict['name']
+    localspec.description = specdict['description']
+    session.add(localspec)
+
+    changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
+    if len(changedfiles)==0:
+        return []
+    updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))]
+    updatefiles = updatefiles.reset_index().to_dict(orient='records')
+
+    deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))]
+    deletefiles = deletefiles.reset_index().to_dict(orient='records')
+
+    for delfile in deletefiles:
+        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
+                                            FileModel.name == delfile['filename']).first()
+
+        # it is more appropriate to archive the file than delete
+        # due to the fact that we might have workflows that are using the
+        # file data
+        currentfile.archived = True
+        session.add(currentfile)
+
+    for updatefile in updatefiles:
+        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
+                                            FileModel.name == updatefile['filename']).first()
+        if not currentfile:
+            currentfile = FileModel()
+            currentfile.name = updatefile['filename']
+            currentfile.workflow_spec_id = workflow_spec_id
+
+        currentfile.date_created = updatefile['date_created']
+        currentfile.type = updatefile['type']
+        currentfile.primary = updatefile['primary']
+        currentfile.content_type = updatefile['content_type']
+        currentfile.primary_process_id = updatefile['primary_process_id']
+        session.add(currentfile)
+        try:
+            response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data',
+                                headers={'X-CR-API-KEY': app.config['API_TOKEN']})
+        except:
+            raise ApiError("endpoint error", 'had a problem connecting to ' + remote)
+
+        if response.status_code != 200:
+            raise ApiError("endpoint error", response.text)
+
+        FileService.update_file(currentfile,response.content,updatefile['type'])
+    session.commit()
+    return [x['filename'] for x in updatefiles]
+
+
+def get_changed_files(remote,workflow_spec_id,as_df=False):
+    """
+    gets a remote endpoint - gets the files for a workflow_spec on both
+    local and remote and determines what files have been change and returns a list of those
+    files
+    """
+    try:
+        response = requests.get('http://'+remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files',
+                            headers={'X-CR-API-KEY':app.config['API_TOKEN']})
+    except:
+        raise ApiError("endpoint error", 'had a problem connecting to '+remote)
+
+    if response.status_code != 200:
+        raise ApiError("endpoint error", response.text)
+
+    # This is probably very and may allow cross site attacks - fix later
+    remote = pd.DataFrame(json.loads(response.text))
+    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
+    local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
+    local['md5_hash'] = local['md5_hash'].astype('str')
+    different = remote.merge(local,
+                             right_on=['filename','md5_hash'],
+                             left_on=['filename','md5_hash'],
+                             how = 'outer' ,
+                             indicator=True).loc[lambda x : x['_merge']!='both']
+    if len(different) == 0:
+        if as_df:
+            return different
+        else:
+            return []
+    # each line has a tag on it - if was in the left or the right,
+    # label it so we know if that was on the remote or local machine
+    different.loc[different['_merge']=='left_only','location'] = 'remote'
+    different.loc[different['_merge']=='right_only','location'] = 'local'
+
+    # this takes the different date_created_x and date-created_y columns and
+    # combines them back into one date_created column
+    dualfields = ['date_created','type','primary','content_type','primary_process_id']
+    for merge in dualfields:
+        index = different[merge+'_x'].isnull()
+        different.loc[index,merge+'_x'] = different[index][merge+'_y']
+
+    fieldlist = [fld+'_x' for fld in dualfields]
+    different = different[ fieldlist + ['md5_hash','filename','location']].copy()
+
+    different.columns=dualfields+['md5_hash','filename','location']
+    # our different list will have multiple entries for a workflow if there is a version on either side
+    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
+    changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
+
+    # get an exclusive or list of workflow ids - that is we want lists of files that are
+    # on one machine or the other, but not both
+    remote_spec_ids = remote[['filename']]
+    local_spec_ids = local[['filename']]
+    left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])]
+    right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])]
+    changedfiles['new'] = False
+    changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
+    changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
+    changedfiles = changedfiles.replace({pd.np.nan: None})
+    # return the list as a dict, let swagger convert it to json
+    if as_df:
+        return changedfiles
+    else:
+        return changedfiles.reset_index().to_dict(orient='records')
+
+
+
+def get_all_spec_state():
+    """
+    Return a list of all workflow specs along with last updated date and a
+    thumbprint of all of the files that are used for that workflow_spec
+    Convert into a dict list from a dataframe
+    """
+    df = get_all_spec_state_dataframe()
+    return df.reset_index().to_dict(orient='records')
+
+
+def get_workflow_spec_files(workflow_spec_id):
+    """
+    Return a list of all workflow specs along with last updated date and a
+    thumbprint of all of the files that are used for that workflow_spec
+    Convert into a dict list from a dataframe
+    """
+    df = get_workflow_spec_files_dataframe(workflow_spec_id)
+    return df.reset_index().to_dict(orient='records')
+
+
+def get_workflow_spec_files_dataframe(workflowid):
+    """
+    Return a list of all files for a workflow_spec along with last updated date and a
+    hash so we can determine file differences for a changed workflow on a box.
+    Return a dataframe
+    """
+    x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid)
+    # there might be a cleaner way of getting a data frome from some of the
+    # fields in the ORM - but this works OK
+    filelist = []
+    for file in x:
+        filelist.append({'file_model_id':file.file_model_id,
+                         'workflow_spec_id': file.file_model.workflow_spec_id,
+                         'md5_hash':file.md5_hash,
+                         'filename':file.file_model.name,
+                         'type':file.file_model.type.name,
+                         'primary':file.file_model.primary,
+                         'content_type':file.file_model.content_type,
+                         'primary_process_id':file.file_model.primary_process_id,
+                         'date_created':file.date_created})
+    if len(filelist) == 0:
+        return pd.DataFrame(columns=['file_model_id',
+                                     'workflow_spec_id',
+                                     'md5_hash',
+                                     'filename',
+                                     'type',
+                                     'primary',
+                                     'content_type',
+                                     'primary_process_id',
+                                     'date_created'])
+    df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
+    df['date_created'] = df['date_created'].astype('str')
+    return df
+
+
+
+def get_all_spec_state_dataframe():
+    """
+    Return a list of all workflow specs along with last updated date and a
+    thumbprint of all of the files that are used for that workflow_spec
+    Return a dataframe
+    """
+    x = session.query(FileDataModel).join(FileModel)
+    # there might be a cleaner way of getting a data frome from some of the
+    # fields in the ORM - but this works OK
+    filelist = []
+    for file in x:
+        filelist.append({'file_model_id':file.file_model_id,
+                         'workflow_spec_id': file.file_model.workflow_spec_id,
+                         'md5_hash':file.md5_hash,
+                         'filename':file.file_model.name,
+                         'date_created':file.date_created})
+    df = pd.DataFrame(filelist)
+
+    # get a distinct list of file_model_id's with the most recent file_data retained
+    df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy()
+
+    # take that list and then group by workflow_spec and retain the most recently touched file
+    # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each
+    # workflow spec
+    df = df.groupby('workflow_spec_id').agg({'date_created':'max',
+                                             'md5_hash':join_uuids}).copy()
+    # get only the columns we are really interested in returning
+    df = df[['date_created','md5_hash']].copy()
+    # convert dates to string
+    df['date_created'] = df['date_created'].astype('str')
+    return df
+

From 993c7bc76e6480d4d9e0ef6fa3ea41f2db4fafe1 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 11 Dec 2020 08:29:37 -0500
Subject: [PATCH 16/22] fixed error on api.yml from search / replace

---
 crc/api.yml             | 14 +++----
 deploy/requirements.txt | 93 +++++++++++++++++++----------------------
 2 files changed, 51 insertions(+), 56 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index 41e76f90..ce02307f 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -547,7 +547,7 @@ paths:
           description: The workflow spec category has been removed.
   /file:
     parameters:
-      - name: workflow_sync_id
+      - name: workflow_spec_id
         in: query
         required: false
         description: The unique id of a workflow specification
@@ -1380,7 +1380,7 @@ components:
           type: string
     WorkflowSpecDiffList:
       properties:
-        workflow_sync_id:
+        workflow_spec_id:
           type: string
           example : top_level_workflow
         date_created :
@@ -1397,7 +1397,7 @@ components:
         file_model_id:
           type : integer
           example : 171
-        workflow_sync_id :
+        workflow_spec_id :
           type: string
           example : top_level_workflow
         filename :
@@ -1455,7 +1455,7 @@ components:
 
     WorkflowSpecAll:
       properties:
-        workflow_sync_id :
+        workflow_spec_id :
           type: string
           example : acaf1258-43b4-437e-8846-f612afa66811
         date_created :
@@ -1587,7 +1587,7 @@ components:
         content_type:
           type: string
           example: "application/xml"
-        workflow_sync_id:
+        workflow_spec_id:
           type: string
           example: "random_fact"
           x-nullable: true
@@ -1609,7 +1609,7 @@ components:
             $ref: "#/components/schemas/NavigationItem"
         next_task:
           $ref: "#/components/schemas/Task"
-        workflow_sync_id:
+        workflow_spec_id:
           type: string
         spec_version:
           type: string
@@ -1625,7 +1625,7 @@ components:
       example:
         id: 291234
         status: 'user_input_required'
-        workflow_sync_id: 'random_fact'
+        workflow_spec_id: 'random_fact'
         spec_version: 'v1.1 [22,23]'
         is_latest_spec: True
         next_task:
diff --git a/deploy/requirements.txt b/deploy/requirements.txt
index af827f27..b9ab7b4c 100644
--- a/deploy/requirements.txt
+++ b/deploy/requirements.txt
@@ -1,81 +1,76 @@
 alabaster==0.7.12
-alembic==1.4.2
-amqp==2.5.2
+alembic==1.4.3
 aniso8601==8.0.0
-attrs==19.3.0
-babel==2.8.0
-bcrypt==3.1.7
-beautifulsoup4==4.9.1
-billiard==3.6.3.0
+attrs==20.3.0
+babel==2.9.0
+bcrypt==3.2.0
+beautifulsoup4==4.9.3
 blinker==1.4
-celery==4.4.2
-certifi==2020.4.5.1
-cffi==1.14.0
+certifi==2020.11.8
+cffi==1.14.4
 chardet==3.0.4
 click==7.1.2
-clickclick==1.2.2
+clickclick==20.10.2
 commonmark==0.9.1
-configparser==5.0.0
 connexion==2.7.0
-coverage==5.1
+coverage==5.3
+deprecated==1.2.10
 docutils==0.16
-docxtpl==0.9.2
+docxtpl==0.11.2
 et-xmlfile==1.0.1
 flask==1.1.2
 flask-admin==1.5.7
 flask-bcrypt==0.7.1
-flask-cors==3.0.8
+flask-cors==3.0.9
 flask-mail==0.9.1
-flask-marshmallow==0.12.0
+flask-marshmallow==0.14.0
 flask-migrate==2.5.3
 flask-restful==0.3.8
-flask-sqlalchemy==2.4.1
-flask-sso==0.4.0
-future==0.18.2
-httpretty==1.0.2
-idna==2.9
+flask-sqlalchemy==2.4.4
+gunicorn==20.0.4
+httpretty==1.0.3
+idna==2.10
 imagesize==1.2.0
-importlib-metadata==1.6.0
-inflection==0.4.0
+inflection==0.5.1
 itsdangerous==1.1.0
 jdcal==1.4.1
 jinja2==2.11.2
 jsonschema==3.2.0
-kombu==4.6.8
-ldap3==2.7
-lxml==4.5.1
-mako==1.1.2
+ldap3==2.8.1
+lxml==4.6.2
+mako==1.1.3
+markdown==3.3.3
 markupsafe==1.1.1
-marshmallow==3.6.0
+marshmallow==3.9.1
 marshmallow-enum==1.5.1
-marshmallow-sqlalchemy==0.23.0
-numpy==1.18.4
-openapi-spec-validator==0.2.8
-openpyxl==3.0.3
+marshmallow-sqlalchemy==0.24.1
+numpy==1.19.4
+openapi-spec-validator==0.2.9
+openpyxl==3.0.5
 packaging==20.4
-pandas==1.0.3
-psycopg2-binary==2.8.5
+pandas==1.1.4
+psycopg2-binary==2.8.6
 pyasn1==0.4.8
 pycparser==2.20
-PyGithub==1.53
-pygments==2.6.1
+pygithub==1.53
+pygments==2.7.2
 pyjwt==1.7.1
 pyparsing==2.4.7
-pyrsistent==0.16.0
+pyrsistent==0.17.3
 python-box==5.2.0
 python-dateutil==2.8.1
 python-docx==0.8.10
 python-editor==1.0.4
-python-Levenshtein==0.12.0
-pytz==2020.1
+python-levenshtein==0.12.0
+pytz==2020.4
 pyyaml==5.3.1
 recommonmark==0.6.0
-requests==2.23.0
+requests==2.25.0
 sentry-sdk==0.14.4
-six==1.14.0
+six==1.15.0
 snowballstemmer==2.0.0
 soupsieve==2.0.1
-sphinx==3.0.3
+sphinx==3.3.1
 sphinxcontrib-applehelp==1.0.2
 sphinxcontrib-devhelp==1.0.2
 sphinxcontrib-htmlhelp==1.0.3
@@ -83,14 +78,14 @@ sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==1.0.3
 sphinxcontrib-serializinghtml==1.1.4
 spiffworkflow
-sqlalchemy==1.3.17
-swagger-ui-bundle==0.0.6
-urllib3==1.25.9
-vine==1.3.0
-waitress==1.4.3
+sqlalchemy==1.3.20
+swagger-ui-bundle==0.0.8
+urllib3==1.26.2
+waitress==1.4.4
 webob==1.8.6
 webtest==2.0.35
 werkzeug==1.0.1
+wrapt==1.12.1
+wtforms==2.3.3
 xlrd==1.2.0
-xlsxwriter==1.2.8
-zipp==3.1.0
+xlsxwriter==1.3.7

From 9eea26e019de87a96a09baa66f27e3a9dd3d4472 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 11 Dec 2020 08:34:59 -0500
Subject: [PATCH 17/22] add workflow_sync test

---
 crc/api/workflow_sync.py    |  8 ++++----
 tests/base_test.py          |  8 ++++++++
 tests/test_workflow_sync.py | 19 +++++++++++++++++++
 3 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_workflow_sync.py

diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py
index 98f04236..c4256a68 100644
--- a/crc/api/workflow_sync.py
+++ b/crc/api/workflow_sync.py
@@ -31,7 +31,7 @@ def get_changed_workflows(remote,as_df=False):
         response = requests.get('http://'+remote+'/v1.0/workflow_sync/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']})
     except:
         raise ApiError("endpoint error", 'had a problem connecting to '+remote)
-    if response.status_code != 200:
+    if not response.ok:
         raise ApiError("endpoint error", response.text)
 
     remote = pd.DataFrame(json.loads(response.text))
@@ -102,7 +102,7 @@ def sync_changed_files(remote,workflow_spec_id):
     except:
         raise ApiError("endpoint error", 'had a problem connecting to '+remote)
 
-    if remotespectext.status_code != 200:
+    if not remotespectext.ok:
         raise ApiError("endpoint error", response.text)
 
     specdict = json.loads(remotespectext.text)
@@ -170,7 +170,7 @@ def sync_changed_files(remote,workflow_spec_id):
         except:
             raise ApiError("endpoint error", 'had a problem connecting to ' + remote)
 
-        if response.status_code != 200:
+        if not response.ok:
             raise ApiError("endpoint error", response.text)
 
         FileService.update_file(currentfile,response.content,updatefile['type'])
@@ -190,7 +190,7 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
     except:
         raise ApiError("endpoint error", 'had a problem connecting to '+remote)
 
-    if response.status_code != 200:
+    if not response.ok:
         raise ApiError("endpoint error", response.text)
 
     # This is probably very and may allow cross site attacks - fix later
diff --git a/tests/base_test.py b/tests/base_test.py
index af899917..e32bbd9b 100644
--- a/tests/base_test.py
+++ b/tests/base_test.py
@@ -204,6 +204,14 @@ class BaseTest(unittest.TestCase):
             data = myfile.read()
         return data
 
+    @staticmethod
+    def workflow_sync_response(file_name):
+        filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'workflow_sync_responses', file_name)
+        with open(filepath, 'r') as myfile:
+            data = myfile.read()
+        return data
+
+
     def assert_success(self, rv, msg=""):
         try:
             data = json.loads(rv.get_data(as_text=True))
diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py
new file mode 100644
index 00000000..9a739c48
--- /dev/null
+++ b/tests/test_workflow_sync.py
@@ -0,0 +1,19 @@
+from unittest.mock import patch
+
+from crc import app
+from tests.base_test import BaseTest
+from crc.api.workflow_sync import get_all_spec_state, get_changed_workflows
+import json
+pip
+
+class TestWorkflowSync(BaseTest):
+
+    @patch('crc.api.workflow_sync.requests.get')
+    def test_get_no_changes(self, mock_get):
+        othersys = get_all_spec_state()
+        mock_get.return_value.ok = True
+        mock_get.return_value.text = json.dumps(othersys)
+        response = get_changed_workflows('somewhere over the rainbow')
+        self.assertIsNotNone(response)
+        self.assertEqual(response,[])
+

From 3a1160efac2348748b083d1b9dd4f83c11aee2b0 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 11 Dec 2020 11:41:32 -0500
Subject: [PATCH 18/22] refactored calls into a service

---
 crc/api.yml                   |  3 +-
 crc/api/workflow_sync.py      | 51 ++++++--------------------
 tests/study/test_study_api.py |  2 +-
 tests/test_workflow_sync.py   | 68 +++++++++++++++++++++++++++++++----
 4 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index ce02307f..0939daa5 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -657,7 +657,8 @@ paths:
     get:
       operationId: crc.api.file.get_file_data_by_hash
       summary: Returns only the file contents
-      security: []  # Disable security for this endpoint only.
+      security:
+        - ApiKeyAuth: []
       tags:
         - Files
       responses:
diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py
index c4256a68..6a64fe7d 100644
--- a/crc/api/workflow_sync.py
+++ b/crc/api/workflow_sync.py
@@ -7,6 +7,7 @@ from crc.api.common import ApiError
 from crc.models.file import FileModel, FileDataModel
 from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel
 from crc.services.file_service import FileService
+from crc.services.workflow_sync import WorkflowSyncService
 
 
 def join_uuids(uuids):
@@ -27,21 +28,16 @@ def get_changed_workflows(remote,as_df=False):
     gets a remote endpoint - gets the workflows and then
     determines what workflows are different from the remote endpoint
     """
-    try:
-        response = requests.get('http://'+remote+'/v1.0/workflow_sync/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']})
-    except:
-        raise ApiError("endpoint error", 'had a problem connecting to '+remote)
-    if not response.ok:
-        raise ApiError("endpoint error", response.text)
 
-    remote = pd.DataFrame(json.loads(response.text))
+    remote_workflows_list = WorkflowSyncService.get_all_remote_workflows(remote)
+    remote_workflows = pd.DataFrame(remote_workflows_list)
 
     # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
     local = get_all_spec_state_dataframe().reset_index()
 
     # merge these on workflow spec id and hash - this will
     # make two different date columns date_x and date_y
-    different  = remote.merge(local,
+    different  = remote_workflows.merge(local,
                               right_on=['workflow_spec_id','md5_hash'],
                               left_on=['workflow_spec_id','md5_hash'],
                               how = 'outer' ,
@@ -66,7 +62,7 @@ def get_changed_workflows(remote,as_df=False):
 
     # get an exclusive or list of workflow ids - that is we want lists of files that are
     # on one machine or the other, but not both
-    remote_spec_ids = remote[['workflow_spec_id']]
+    remote_spec_ids = remote_workflows[['workflow_spec_id']]
     local_spec_ids = local[['workflow_spec_id']]
     left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
     right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
@@ -96,16 +92,8 @@ def sync_all_changed_workflows(remote):
 
 def sync_changed_files(remote,workflow_spec_id):
     # make sure that spec is local before syncing files
-    try:
-        remotespectext  =  requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id,
-                                    headers={'X-CR-API-KEY': app.config['API_TOKEN']})
-    except:
-        raise ApiError("endpoint error", 'had a problem connecting to '+remote)
 
-    if not remotespectext.ok:
-        raise ApiError("endpoint error", response.text)
-
-    specdict = json.loads(remotespectext.text)
+    specdict = WorkflowSyncService.get_remote_workfow_spec(remote,workflow_spec_id)
 
     localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
     if localspec is None:
@@ -164,16 +152,8 @@ def sync_changed_files(remote,workflow_spec_id):
         currentfile.content_type = updatefile['content_type']
         currentfile.primary_process_id = updatefile['primary_process_id']
         session.add(currentfile)
-        try:
-            response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data',
-                                headers={'X-CR-API-KEY': app.config['API_TOKEN']})
-        except:
-            raise ApiError("endpoint error", 'had a problem connecting to ' + remote)
-
-        if not response.ok:
-            raise ApiError("endpoint error", response.text)
-
-        FileService.update_file(currentfile,response.content,updatefile['type'])
+        content = WorkflowSyncService.get_remote_file_by_hash(remote,updatefile['md5_hash'])
+        FileService.update_file(currentfile,content,updatefile['type'])
     session.commit()
     return [x['filename'] for x in updatefiles]
 
@@ -184,21 +164,12 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
     local and remote and determines what files have been change and returns a list of those
     files
     """
-    try:
-        response = requests.get('http://'+remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files',
-                            headers={'X-CR-API-KEY':app.config['API_TOKEN']})
-    except:
-        raise ApiError("endpoint error", 'had a problem connecting to '+remote)
-
-    if not response.ok:
-        raise ApiError("endpoint error", response.text)
-
-    # This is probably very and may allow cross site attacks - fix later
-    remote = pd.DataFrame(json.loads(response.text))
+    remote_file_list = WorkflowSyncService.get_remote_workflow_spec_files(remote,workflow_spec_id)
+    remote_files = pd.DataFrame(remote_file_list)
     # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
     local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
     local['md5_hash'] = local['md5_hash'].astype('str')
-    different = remote.merge(local,
+    different = remote_files.merge(local,
                              right_on=['filename','md5_hash'],
                              left_on=['filename','md5_hash'],
                              how = 'outer' ,
diff --git a/tests/study/test_study_api.py b/tests/study/test_study_api.py
index e7d33a9d..df734086 100644
--- a/tests/study/test_study_api.py
+++ b/tests/study/test_study_api.py
@@ -165,7 +165,7 @@ class TestStudyApi(BaseTest):
         self.assertEqual(study_event.comment, update_comment)
         self.assertEqual(study_event.user_uid, self.test_uid)
 
-    @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators')  # mock_studies
+    @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators')  # mock_investigators
     @patch('crc.services.protocol_builder.ProtocolBuilderService.get_required_docs')  # mock_docs
     @patch('crc.services.protocol_builder.ProtocolBuilderService.get_study_details')  # mock_details
     @patch('crc.services.protocol_builder.ProtocolBuilderService.get_studies')  # mock_studies
diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py
index 9a739c48..f2c68719 100644
--- a/tests/test_workflow_sync.py
+++ b/tests/test_workflow_sync.py
@@ -1,19 +1,75 @@
 from unittest.mock import patch
 
-from crc import app
+from crc import db
 from tests.base_test import BaseTest
 from crc.api.workflow_sync import get_all_spec_state, get_changed_workflows
+from crc.models.workflow import WorkflowSpecModel
 import json
-pip
+from datetime import datetime
+from crc.services.file_service import FileService
 
 class TestWorkflowSync(BaseTest):
 
-    @patch('crc.api.workflow_sync.requests.get')
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
     def test_get_no_changes(self, mock_get):
+        self.load_example_data()
         othersys = get_all_spec_state()
-        mock_get.return_value.ok = True
-        mock_get.return_value.text = json.dumps(othersys)
-        response = get_changed_workflows('somewhere over the rainbow')
+        mock_get.return_value = othersys
+        response = get_changed_workflows('localhost:0000') # not actually used due to mock
         self.assertIsNotNone(response)
         self.assertEqual(response,[])
 
+
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
+    def test_remote_workflow_change(self, mock_get):
+        self.load_example_data()
+        othersys = get_all_spec_state()
+        othersys[1]['date_created'] = str(datetime.now())
+        othersys[1]['md5_hash'] = '12345'
+        mock_get.return_value = othersys
+        response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
+        self.assertIsNotNone(response)
+        self.assertEqual(len(response),1)
+        self.assertEqual(response[0]['workflow_spec_id'], 'random_fact')
+        self.assertEqual(response[0]['location'], 'remote')
+        self.assertEqual(response[0]['new'], False)
+
+
+
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
+    def test_remote_workflow_has_new(self, mock_get):
+        self.load_example_data()
+        othersys = get_all_spec_state()
+        othersys.append({'workflow_spec_id':'my_new_workflow',
+                         'date_created':str(datetime.now()),
+                         'md5_hash': '12345'})
+        mock_get.return_value = othersys
+        response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
+        self.assertIsNotNone(response)
+        self.assertEqual(len(response),1)
+        self.assertEqual(response[0]['workflow_spec_id'],'my_new_workflow')
+        self.assertEqual(response[0]['location'], 'remote')
+        self.assertEqual(response[0]['new'], True)
+
+
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
+    def test_local_workflow_has_new(self, mock_get):
+        self.load_example_data()
+
+        othersys = get_all_spec_state()
+        mock_get.return_value = othersys
+        wf_spec = WorkflowSpecModel()
+        wf_spec.id = 'abcdefg'
+        wf_spec.display_name = 'New Workflow - Yum!!'
+        wf_spec.name = 'my_new_workflow'
+        wf_spec.description = 'yep - its a new workflow'
+        wf_spec.category_id = 0
+        wf_spec.display_order = 0
+        db.session.add(wf_spec)
+        db.session.commit()
+        FileService.add_workflow_spec_file(wf_spec,'dummyfile.txt','text',b'this is a test')
+        # after setting up the test - I realized that this doesn't return anything for
+        # a workflow that is new locally - it just returns nothing
+        response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
+        self.assertIsNotNone(response)
+        self.assertEqual(response,[])

From 55e6f5b753f7ad12f4d7e719161824c206af4d79 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 11 Dec 2020 11:42:00 -0500
Subject: [PATCH 19/22] refactored calls into a service - forgot to add actual
 service

---
 crc/services/workflow_sync.py | 57 +++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 crc/services/workflow_sync.py

diff --git a/crc/services/workflow_sync.py b/crc/services/workflow_sync.py
new file mode 100644
index 00000000..e2026746
--- /dev/null
+++ b/crc/services/workflow_sync.py
@@ -0,0 +1,57 @@
+import json
+from json import JSONDecodeError
+from typing import List, Optional
+
+import requests
+
+from crc import app
+from crc.api.common import ApiError
+
+
+class WorkflowSyncService(object):
+
+    @staticmethod
+    def get_remote_file_by_hash(remote,md5_hash):
+        url = remote+'/v1.0/file/'+md5_hash+'/hash_data'
+        return WorkflowSyncService.__make_request(url,return_contents=True)
+
+    @staticmethod
+    def get_remote_workflow_spec_files(remote,workflow_spec_id):
+        url = remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files'
+        return WorkflowSyncService.__make_request(url)
+
+    @staticmethod
+    def get_remote_workflow_spec(remote, workflow_spec_id):
+        """
+        this just gets the details of a workflow spec from the
+        remote side.
+
+        FIXME: for testing I had changed the security on the API endpoint
+        below so that I could run it - I need to restore the security on this
+        and make a new workflow_sync endpoint that just calls this same function
+        so that I can use the API_TOKEN rather than the other token setup
+        """
+        url = remote+'/v1.0/workflow-specification/'+workflow_spec_id
+        return WorkflowSyncService.__make_request(url)
+
+    @staticmethod
+    def get_all_remote_workflows(remote):
+        url = remote + '/v1.0/workflow_sync/all'
+        return WorkflowSyncService.__make_request(url)
+
+    @staticmethod
+    def __make_request(url,return_contents=False):
+        try:
+            response = requests.get(url,headers={'X-CR-API-KEY':app.config['API_TOKEN']})
+        except:
+            raise ApiError("workflow_sync_error",response.text)
+        if response.ok and response.text:
+            if return_contents:
+                return response.content
+            else:
+                return json.loads(response.text)
+        else:
+            raise ApiError("workflow_sync_error",
+                           "Received an invalid response from the protocol builder (status %s): %s when calling "
+                           "url '%s'." %
+                           (response.status_code, response.text, url))

From adc4dc4453392cbae7665a6e0e0b36fe90eb3b5a Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Fri, 11 Dec 2020 12:03:41 -0500
Subject: [PATCH 20/22] redid the api a bit so that nothing was using open
 security - added a new endpoint for getting a workflow spec that uses the
 alternate API_TOKEN security and leave the original endpoint as it was.

---
 crc/api.yml                   | 24 +++++++++++++++++++++++-
 crc/api/workflow_sync.py      |  4 ++++
 crc/services/workflow_sync.py |  7 +------
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index 0939daa5..a4b799ea 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -156,6 +156,29 @@ paths:
                 items:
                   $ref: "#/components/schemas/WorkflowSpecDiffList"
 
+  /workflow_sync/{workflow_spec_id}/spec:
+    parameters:
+      - name: workflow_spec_id
+        in: path
+        required: false
+        description: The unique id of an existing workflow specification to modify.
+        schema:
+          type: string
+    get:
+      operationId: crc.api.workflow_sync.get_sync_workflow_specification
+      summary: Returns a single workflow specification
+      security:
+        - ApiKeyAuth: []
+      tags:
+        - Workflow Sync API
+      responses:
+        '200':
+          description: Workflow specification.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/WorkflowSpec"
+
 
   /workflow_sync/{workflow_spec_id}/files:
     get:
@@ -401,7 +424,6 @@ paths:
     get:
       operationId: crc.api.workflow.get_workflow_specification
       summary: Returns a single workflow specification
-      security: []
       tags:
         - Workflow Specifications
       responses:
diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py
index 6a64fe7d..4915af0f 100644
--- a/crc/api/workflow_sync.py
+++ b/crc/api/workflow_sync.py
@@ -8,8 +8,12 @@ from crc.models.file import FileModel, FileDataModel
 from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel
 from crc.services.file_service import FileService
 from crc.services.workflow_sync import WorkflowSyncService
+from crc.api.workflow import get_workflow_specification
 
 
+def get_sync_workflow_specification(workflow_spec_id):
+    return get_workflow_specification(workflow_spec_id)
+
 def join_uuids(uuids):
     """Joins a pandas Series of uuids and combines them in one hash"""
     combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always
diff --git a/crc/services/workflow_sync.py b/crc/services/workflow_sync.py
index e2026746..26796413 100644
--- a/crc/services/workflow_sync.py
+++ b/crc/services/workflow_sync.py
@@ -25,13 +25,8 @@ class WorkflowSyncService(object):
         """
         this just gets the details of a workflow spec from the
         remote side.
-
-        FIXME: for testing I had changed the security on the API endpoint
-        below so that I could run it - I need to restore the security on this
-        and make a new workflow_sync endpoint that just calls this same function
-        so that I can use the API_TOKEN rather than the other token setup
         """
-        url = remote+'/v1.0/workflow-specification/'+workflow_spec_id
+        url = remote+'/v1.0/workflow-sync/'+workflow_spec_id+'/spec'
         return WorkflowSyncService.__make_request(url)
 
     @staticmethod

From ee3ee9fd4a7439b69285b6089d2d151aac4f8f52 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Mon, 14 Dec 2020 10:27:40 -0500
Subject: [PATCH 21/22] Added tests to cover most of the use cases and code,
 and a bunch of stuff to make the mocks happy

---
 crc/api/workflow_sync.py                      |   6 +-
 tests/base_test.py                            |   2 +-
 tests/data/random_fact/random_fact2.bpmn      | 200 ++++++++++++++++++
 .../workflow_sync_responses/random_fact2.bpmn | 104 +++++++++
 tests/test_workflow_sync.py                   |  85 +++++++-
 5 files changed, 392 insertions(+), 5 deletions(-)
 create mode 100644 tests/data/random_fact/random_fact2.bpmn
 create mode 100644 tests/data/workflow_sync_responses/random_fact2.bpmn

diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py
index 4915af0f..68cd9fd2 100644
--- a/crc/api/workflow_sync.py
+++ b/crc/api/workflow_sync.py
@@ -97,7 +97,7 @@ def sync_all_changed_workflows(remote):
 def sync_changed_files(remote,workflow_spec_id):
     # make sure that spec is local before syncing files
 
-    specdict = WorkflowSyncService.get_remote_workfow_spec(remote,workflow_spec_id)
+    specdict = WorkflowSyncService.get_remote_workflow_spec(remote,workflow_spec_id)
 
     localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
     if localspec is None:
@@ -173,6 +173,8 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
     # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
     local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
     local['md5_hash'] = local['md5_hash'].astype('str')
+    remote_files['md5_hash'] = remote_files['md5_hash'].astype('str')
+
     different = remote_files.merge(local,
                              right_on=['filename','md5_hash'],
                              left_on=['filename','md5_hash'],
@@ -205,7 +207,7 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
 
     # get an exclusive or list of workflow ids - that is we want lists of files that are
     # on one machine or the other, but not both
-    remote_spec_ids = remote[['filename']]
+    remote_spec_ids = remote_files[['filename']]
     local_spec_ids = local[['filename']]
     left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])]
     right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])]
diff --git a/tests/base_test.py b/tests/base_test.py
index e32bbd9b..4f2306a2 100644
--- a/tests/base_test.py
+++ b/tests/base_test.py
@@ -207,7 +207,7 @@ class BaseTest(unittest.TestCase):
     @staticmethod
     def workflow_sync_response(file_name):
         filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'workflow_sync_responses', file_name)
-        with open(filepath, 'r') as myfile:
+        with open(filepath, 'rb') as myfile:
             data = myfile.read()
         return data
 
diff --git a/tests/data/random_fact/random_fact2.bpmn b/tests/data/random_fact/random_fact2.bpmn
new file mode 100644
index 00000000..bd0a9ef5
--- /dev/null
+++ b/tests/data/random_fact/random_fact2.bpmn
@@ -0,0 +1,200 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bpmn:definitions xmlns:bpmn="http://www.omg.org/spec/BPMN/20100524/MODEL" xmlns:bpmndi="http://www.omg.org/spec/BPMN/20100524/DI" xmlns:dc="http://www.omg.org/spec/DD/20100524/DC" xmlns:camunda="http://camunda.org/schema/1.0/bpmn" xmlns:di="http://www.omg.org/spec/DD/20100524/DI" id="Definitions_1gjhqt9" targetNamespace="http://bpmn.io/schema/bpmn" exporter="Camunda Modeler" exporterVersion="3.7.0">
+  <bpmn:process id="Process_SecondFact" isExecutable="true">
+    <bpmn:startEvent id="StartEvent_1">
+      <bpmn:outgoing>SequenceFlow_0c7wlth</bpmn:outgoing>
+    </bpmn:startEvent>
+    <bpmn:userTask id="Task_User_Select_Type" name="Set Type" camunda:formKey="Get A Random Fun Fact">
+      <bpmn:documentation># h1 Heading 8-)
+## h2 Heading
+### h3 Heading
+#### h4 Heading
+##### h5 Heading
+###### h6 Heading
+
+
+## Horizontal Rules
+
+___
+
+---
+
+***
+
+
+## Typographic replacements
+
+"double quotes" and 'single quotes'
+
+
+## Emphasis
+
+**This is bold text**
+
+__This is bold text__
+
+*This is italic text*
+
+_This is italic text_
+
+~~Strikethrough~~
+
+
+## Blockquotes
+
+
+&gt; Blockquotes can also be nested...
+&gt;&gt; ...by using additional greater-than signs right next to each other...
+&gt; &gt; &gt; ...or with spaces between arrows.
+
+
+## Lists
+
+Unordered
+
++ Create a list by starting a line with `+`, `-`, or `*`
++ Sub-lists are made by indenting 2 spaces:
+  - Marker character change forces new list start:
+    * Ac tristique libero volutpat at
+    + Facilisis in pretium nisl aliquet
+    - Nulla volutpat aliquam velit
++ Very easy!
+
+Ordered
+
+1. Lorem ipsum dolor sit amet
+2. Consectetur adipiscing elit
+3. Integer molestie lorem at massa
+
+
+1. You can use sequential numbers...
+1. ...or keep all the numbers as `1.`
+
+Start numbering with offset:
+
+57. foo
+1. bar
+
+## Tables
+
+| Option | Description |
+| ------ | ----------- |
+| data   | path to data files to supply the data that will be passed into templates. |
+| engine | engine to be used for processing templates. Handlebars is the default. |
+| ext    | extension to be used for dest files. |
+
+Right aligned columns
+
+| Option | Description |
+| ------:| -----------:|
+| data   | path to data files to supply the data that will be passed into templates. |
+| engine | engine to be used for processing templates. Handlebars is the default. |
+| ext    | extension to be used for dest files. |
+
+
+## Links
+
+[link text](http://dev.nodeca.com)
+
+[link with title](http://nodeca.github.io/pica/demo/ "title text!")
+
+Autoconverted link https://github.com/nodeca/pica (enable linkify to see)
+
+
+## Images
+
+![Minion](https://octodex.github.com/images/minion.png)
+![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat")</bpmn:documentation>
+      <bpmn:extensionElements>
+        <camunda:formData>
+          <camunda:formField id="type" label="Type" type="enum" defaultValue="cat">
+            <camunda:validation>
+              <camunda:constraint name="required" config="true" />
+            </camunda:validation>
+            <camunda:value id="norris" name="Chuck Norris" />
+            <camunda:value id="cat" name="Cat Fact" />
+            <camunda:value id="buzzword" name="Business Buzzword" />
+          </camunda:formField>
+        </camunda:formData>
+        <camunda:properties>
+          <camunda:property name="type" value="string" />
+        </camunda:properties>
+      </bpmn:extensionElements>
+      <bpmn:incoming>SequenceFlow_0c7wlth</bpmn:incoming>
+      <bpmn:outgoing>SequenceFlow_0641sh6</bpmn:outgoing>
+    </bpmn:userTask>
+    <bpmn:scriptTask id="Task_Get_Fact_From_API" name="Display Fact">
+      <bpmn:documentation />
+      <bpmn:extensionElements>
+        <camunda:inputOutput>
+          <camunda:inputParameter name="Fact.type" />
+        </camunda:inputOutput>
+      </bpmn:extensionElements>
+      <bpmn:incoming>SequenceFlow_0641sh6</bpmn:incoming>
+      <bpmn:outgoing>SequenceFlow_0t29gjo</bpmn:outgoing>
+      <bpmn:script>FactService = fact_service()</bpmn:script>
+    </bpmn:scriptTask>
+    <bpmn:endEvent id="EndEvent_0u1cgrf">
+      <bpmn:documentation># Great Job!
+You have completed the random fact generator.
+You chose to receive a random fact of the type: "{{type}}"
+
+Your random fact is:
+{{details}}</bpmn:documentation>
+      <bpmn:incoming>SequenceFlow_0t29gjo</bpmn:incoming>
+    </bpmn:endEvent>
+    <bpmn:sequenceFlow id="SequenceFlow_0c7wlth" sourceRef="StartEvent_1" targetRef="Task_User_Select_Type" />
+    <bpmn:sequenceFlow id="SequenceFlow_0641sh6" sourceRef="Task_User_Select_Type" targetRef="Task_Get_Fact_From_API" />
+    <bpmn:sequenceFlow id="SequenceFlow_0t29gjo" sourceRef="Task_Get_Fact_From_API" targetRef="EndEvent_0u1cgrf" />
+    <bpmn:textAnnotation id="TextAnnotation_09fq7kh">
+      <bpmn:text>User sets the Fact.type to cat, norris, or buzzword</bpmn:text>
+    </bpmn:textAnnotation>
+    <bpmn:association id="Association_1cfasjp" sourceRef="Task_User_Select_Type" targetRef="TextAnnotation_09fq7kh" />
+    <bpmn:textAnnotation id="TextAnnotation_1234e5n">
+      <bpmn:text>Makes an API  call to get a fact of the required type.</bpmn:text>
+    </bpmn:textAnnotation>
+    <bpmn:association id="Association_1qirnyy" sourceRef="Task_Get_Fact_From_API" targetRef="TextAnnotation_1234e5n" />
+  </bpmn:process>
+  <bpmndi:BPMNDiagram id="BPMNDiagram_1">
+    <bpmndi:BPMNPlane id="BPMNPlane_1" bpmnElement="Process_1ds61df">
+      <bpmndi:BPMNEdge id="SequenceFlow_0t29gjo_di" bpmnElement="SequenceFlow_0t29gjo">
+        <di:waypoint x="570" y="250" />
+        <di:waypoint x="692" y="250" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="SequenceFlow_0641sh6_di" bpmnElement="SequenceFlow_0641sh6">
+        <di:waypoint x="370" y="250" />
+        <di:waypoint x="470" y="250" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="SequenceFlow_0c7wlth_di" bpmnElement="SequenceFlow_0c7wlth">
+        <di:waypoint x="188" y="250" />
+        <di:waypoint x="270" y="250" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNShape id="_BPMNShape_StartEvent_2" bpmnElement="StartEvent_1">
+        <dc:Bounds x="152" y="232" width="36" height="36" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="UserTask_186s7tw_di" bpmnElement="Task_User_Select_Type">
+        <dc:Bounds x="270" y="210" width="100" height="80" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="EndEvent_0u1cgrf_di" bpmnElement="EndEvent_0u1cgrf">
+        <dc:Bounds x="692" y="232" width="36" height="36" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="TextAnnotation_09fq7kh_di" bpmnElement="TextAnnotation_09fq7kh">
+        <dc:Bounds x="330" y="116" width="99.99202297383536" height="68.28334396936822" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="TextAnnotation_1234e5n_di" bpmnElement="TextAnnotation_1234e5n">
+        <dc:Bounds x="570" y="120" width="100" height="68" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="ScriptTask_10keafb_di" bpmnElement="Task_Get_Fact_From_API">
+        <dc:Bounds x="470" y="210" width="100" height="80" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNEdge id="Association_1cfasjp_di" bpmnElement="Association_1cfasjp">
+        <di:waypoint x="344" y="210" />
+        <di:waypoint x="359" y="184" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="Association_1qirnyy_di" bpmnElement="Association_1qirnyy">
+        <di:waypoint x="561" y="210" />
+        <di:waypoint x="584" y="188" />
+      </bpmndi:BPMNEdge>
+    </bpmndi:BPMNPlane>
+  </bpmndi:BPMNDiagram>
+</bpmn:definitions>
diff --git a/tests/data/workflow_sync_responses/random_fact2.bpmn b/tests/data/workflow_sync_responses/random_fact2.bpmn
new file mode 100644
index 00000000..f502a238
--- /dev/null
+++ b/tests/data/workflow_sync_responses/random_fact2.bpmn
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bpmn:definitions xmlns:bpmn="http://www.omg.org/spec/BPMN/20100524/MODEL" xmlns:bpmndi="http://www.omg.org/spec/BPMN/20100524/DI" xmlns:dc="http://www.omg.org/spec/DD/20100524/DC" xmlns:camunda="http://camunda.org/schema/1.0/bpmn" xmlns:di="http://www.omg.org/spec/DD/20100524/DI" id="Definitions_1gjhqt9" targetNamespace="http://bpmn.io/schema/bpmn" exporter="Camunda Modeler" exporterVersion="3.7.0">
+  <bpmn:process id="Process_SecondFact" isExecutable="true">
+    <bpmn:startEvent id="StartEvent_1">
+      <bpmn:outgoing>SequenceFlow_0c7wlth</bpmn:outgoing>
+    </bpmn:startEvent>
+    <bpmn:userTask id="Task_User_Select_Type" name="Set Type" camunda:formKey="Get A Random Fun Fact">
+      <bpmn:documentation># h1 Heading 8-)
+      NEW_FILE_ADDED
+
+![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat")</bpmn:documentation>
+      <bpmn:extensionElements>
+        <camunda:formData>
+          <camunda:formField id="type" label="Type" type="enum" defaultValue="cat">
+            <camunda:validation>
+              <camunda:constraint name="required" config="true" />
+            </camunda:validation>
+            <camunda:value id="norris" name="Chuck Norris" />
+            <camunda:value id="cat" name="Cat Fact" />
+            <camunda:value id="buzzword" name="Business Buzzword" />
+          </camunda:formField>
+        </camunda:formData>
+        <camunda:properties>
+          <camunda:property name="type" value="string" />
+        </camunda:properties>
+      </bpmn:extensionElements>
+      <bpmn:incoming>SequenceFlow_0c7wlth</bpmn:incoming>
+      <bpmn:outgoing>SequenceFlow_0641sh6</bpmn:outgoing>
+    </bpmn:userTask>
+    <bpmn:scriptTask id="Task_Get_Fact_From_API" name="Display Fact">
+      <bpmn:documentation />
+      <bpmn:extensionElements>
+        <camunda:inputOutput>
+          <camunda:inputParameter name="Fact.type" />
+        </camunda:inputOutput>
+      </bpmn:extensionElements>
+      <bpmn:incoming>SequenceFlow_0641sh6</bpmn:incoming>
+      <bpmn:outgoing>SequenceFlow_0t29gjo</bpmn:outgoing>
+      <bpmn:script>FactService = fact_service()</bpmn:script>
+    </bpmn:scriptTask>
+    <bpmn:endEvent id="EndEvent_0u1cgrf">
+      <bpmn:documentation># Great Job!
+You have completed the random fact generator.
+You chose to receive a random fact of the type: "{{type}}"
+
+Your random fact is:
+{{details}}</bpmn:documentation>
+      <bpmn:incoming>SequenceFlow_0t29gjo</bpmn:incoming>
+    </bpmn:endEvent>
+    <bpmn:sequenceFlow id="SequenceFlow_0c7wlth" sourceRef="StartEvent_1" targetRef="Task_User_Select_Type" />
+    <bpmn:sequenceFlow id="SequenceFlow_0641sh6" sourceRef="Task_User_Select_Type" targetRef="Task_Get_Fact_From_API" />
+    <bpmn:sequenceFlow id="SequenceFlow_0t29gjo" sourceRef="Task_Get_Fact_From_API" targetRef="EndEvent_0u1cgrf" />
+    <bpmn:textAnnotation id="TextAnnotation_09fq7kh">
+      <bpmn:text>User sets the Fact.type to cat, norris, or buzzword</bpmn:text>
+    </bpmn:textAnnotation>
+    <bpmn:association id="Association_1cfasjp" sourceRef="Task_User_Select_Type" targetRef="TextAnnotation_09fq7kh" />
+    <bpmn:textAnnotation id="TextAnnotation_1234e5n">
+      <bpmn:text>Makes an API  call to get a fact of the required type.</bpmn:text>
+    </bpmn:textAnnotation>
+    <bpmn:association id="Association_1qirnyy" sourceRef="Task_Get_Fact_From_API" targetRef="TextAnnotation_1234e5n" />
+  </bpmn:process>
+  <bpmndi:BPMNDiagram id="BPMNDiagram_1">
+    <bpmndi:BPMNPlane id="BPMNPlane_1" bpmnElement="Process_1ds61df">
+      <bpmndi:BPMNEdge id="SequenceFlow_0t29gjo_di" bpmnElement="SequenceFlow_0t29gjo">
+        <di:waypoint x="570" y="250" />
+        <di:waypoint x="692" y="250" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="SequenceFlow_0641sh6_di" bpmnElement="SequenceFlow_0641sh6">
+        <di:waypoint x="370" y="250" />
+        <di:waypoint x="470" y="250" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="SequenceFlow_0c7wlth_di" bpmnElement="SequenceFlow_0c7wlth">
+        <di:waypoint x="188" y="250" />
+        <di:waypoint x="270" y="250" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNShape id="_BPMNShape_StartEvent_2" bpmnElement="StartEvent_1">
+        <dc:Bounds x="152" y="232" width="36" height="36" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="UserTask_186s7tw_di" bpmnElement="Task_User_Select_Type">
+        <dc:Bounds x="270" y="210" width="100" height="80" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="EndEvent_0u1cgrf_di" bpmnElement="EndEvent_0u1cgrf">
+        <dc:Bounds x="692" y="232" width="36" height="36" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="TextAnnotation_09fq7kh_di" bpmnElement="TextAnnotation_09fq7kh">
+        <dc:Bounds x="330" y="116" width="99.99202297383536" height="68.28334396936822" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="TextAnnotation_1234e5n_di" bpmnElement="TextAnnotation_1234e5n">
+        <dc:Bounds x="570" y="120" width="100" height="68" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="ScriptTask_10keafb_di" bpmnElement="Task_Get_Fact_From_API">
+        <dc:Bounds x="470" y="210" width="100" height="80" />
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNEdge id="Association_1cfasjp_di" bpmnElement="Association_1cfasjp">
+        <di:waypoint x="344" y="210" />
+        <di:waypoint x="359" y="184" />
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="Association_1qirnyy_di" bpmnElement="Association_1qirnyy">
+        <di:waypoint x="561" y="210" />
+        <di:waypoint x="584" y="188" />
+      </bpmndi:BPMNEdge>
+    </bpmndi:BPMNPlane>
+  </bpmndi:BPMNDiagram>
+</bpmn:definitions>
diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py
index f2c68719..eb0cd1b5 100644
--- a/tests/test_workflow_sync.py
+++ b/tests/test_workflow_sync.py
@@ -2,12 +2,18 @@ from unittest.mock import patch
 
 from crc import db
 from tests.base_test import BaseTest
-from crc.api.workflow_sync import get_all_spec_state, get_changed_workflows
+from crc.api.workflow_sync import get_all_spec_state, \
+                                  get_changed_workflows, \
+                                  get_workflow_spec_files, \
+                                  get_changed_files, \
+                                  get_workflow_specification, \
+                                  sync_changed_files
 from crc.models.workflow import WorkflowSpecModel
-import json
 from datetime import datetime
 from crc.services.file_service import FileService
 
+
+
 class TestWorkflowSync(BaseTest):
 
     @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
@@ -73,3 +79,78 @@ class TestWorkflowSync(BaseTest):
         response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
         self.assertIsNotNone(response)
         self.assertEqual(response,[])
+
+
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+    def test_file_differences(self, mock_get):
+        self.load_example_data()
+        othersys = get_workflow_spec_files('random_fact')
+        othersys[1]['date_created'] = str(datetime.now())
+        othersys[1]['md5_hash'] = '12345'
+        mock_get.return_value = othersys
+        response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
+        self.assertIsNotNone(response)
+        self.assertEqual(len(response),1)
+        self.assertEqual(response[0]['filename'], 'random_fact2.bpmn')
+        self.assertEqual(response[0]['location'], 'remote')
+        self.assertEqual(response[0]['new'], False)
+
+
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+    def test_file_differences(self, mock_get):
+        self.load_example_data()
+        othersys = get_workflow_spec_files('random_fact')
+        othersys[1]['date_created'] = str(datetime.now())
+        othersys[1]['md5_hash'] = '12345'
+        mock_get.return_value = othersys
+        response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
+        self.assertIsNotNone(response)
+        self.assertEqual(len(response),1)
+        self.assertEqual(response[0]['filename'], 'random_fact2.bpmn')
+        self.assertEqual(response[0]['location'], 'remote')
+        self.assertEqual(response[0]['new'], False)
+
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash')
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
+    def test_file_differences(self, workflow_mock, spec_files_mock, file_data_mock):
+        self.load_example_data()
+        remote_workflow = get_workflow_specification('random_fact')
+        self.assertEqual(remote_workflow['display_name'],'Random Fact')
+        remote_workflow['description'] = 'This Workflow came from Remote'
+        remote_workflow['display_name'] = 'Remote Workflow'
+        workflow_mock.return_value = remote_workflow
+        othersys = get_workflow_spec_files('random_fact')
+        othersys[1]['date_created'] = str(datetime.now())
+        othersys[1]['md5_hash'] = '12345'
+        spec_files_mock.return_value = othersys
+        file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn')
+        response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
+        self.assertIsNotNone(response)
+        self.assertEqual(len(response),1)
+        self.assertEqual(response[0], 'random_fact2.bpmn')
+        files = FileService.get_spec_data_files('random_fact')
+        md5sums = [str(f.md5_hash) for f in files]
+        self.assertEqual('21bb6f9e-0af7-0ab2-0fc7-ec0f94787e58' in md5sums, True)
+        new_local_workflow = get_workflow_specification('random_fact')
+        self.assertEqual(new_local_workflow['display_name'],'Remote Workflow')
+
+
+
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
+    def test_file_deleted(self, workflow_mock, spec_files_mock):
+        self.load_example_data()
+        remote_workflow = get_workflow_specification('random_fact')
+        workflow_mock.return_value = remote_workflow
+        othersys = get_workflow_spec_files('random_fact')
+        del(othersys[1])
+        spec_files_mock.return_value = othersys
+        response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
+        self.assertIsNotNone(response)
+        # when we delete a local file, we do not return that it was deleted - just
+        # a list of updated files. We may want to change this in the future.
+        self.assertEqual(len(response),0)
+        files = FileService.get_spec_data_files('random_fact')
+        self.assertEqual(len(files),1)
+

From d8ac20b1c33da1bec8e01a8f2ba455b3d3243835 Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Mon, 14 Dec 2020 10:37:16 -0500
Subject: [PATCH 22/22] I added a second file to 'random_fact' test workflow,
 so another test was expecting 2 files in it after adding a new file, but
 there were 3 -

Nothing to see here - move along
---
 tests/files/test_files_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/files/test_files_api.py b/tests/files/test_files_api.py
index 02feb8d0..958989d1 100644
--- a/tests/files/test_files_api.py
+++ b/tests/files/test_files_api.py
@@ -46,7 +46,7 @@ class TestFilesApi(BaseTest):
                           content_type="application/json", headers=self.logged_in_headers())
         self.assert_success(rv)
         json_data = json.loads(rv.get_data(as_text=True))
-        self.assertEqual(2, len(json_data))
+        self.assertEqual(3, len(json_data))
 
 
     def test_create_file(self):