check diff output to make sure it looks reasonable

2024-06-27 17:25:54 -04:00 · 2024-06-27 17:25:54 -04:00 · 03beaf8936
parent 0ca371149f
commit 03beaf8936
3 changed files with 35 additions and 9 deletions
--- a/docs/appendices/bpmn_terminology.md
+++ b/docs/appendices/bpmn_terminology.md
@ -84,7 +84,7 @@ There are four types of Gateways: Exclusive, Parallel, Inclusive, and Event-Base

 ## Intermediate Event

-This is an event that occurs within the middle of a process, neither at the start nor the end.
+This is an event that occurs in the middle of a process, neither at the start nor the end.
 It can be connected to other tasks through connectors or placed on the border of a task.
 It evaluates conditions and circumstances, triggering events and enabling the initiation of alternative paths within the process.

@ -99,7 +99,7 @@ These are subdivisions within a Pool that are utilized to assign activities to s
 ## Merge

 This is the process in which two or more parallel Sequence Flow paths converge into a single path, achieved either through multiple incoming Sequence Flows or by utilizing an Exclusive Gateway.
-This merging of paths is also commonly referred to as an "OR-Join".
+This merging of paths is also commonly referred to as an "OR-Join."

 ## Message

--- a/docs/bin/gpt-proofread.py
+++ b/docs/bin/gpt-proofread.py
@ -69,20 +69,22 @@ def read_file(file_path):
    with open(file_path, "r") as f:
        return f.read()

+
 def split_content(content, chunk_size=13000):
    splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    return splitter.split_text(content)

+
 def process_chunk(doc, chat_prompt, retries=3, chunk_index=0):
    for attempt in range(retries):
        result = llm.invoke(chat_prompt.format_prompt(text=doc).to_messages())
        edited_result_content = result.content
        if 0.95 * len(doc) <= len(edited_result_content) <= 1.05 * len(doc):
-
            return edited_result_content
        print(f"Retry {attempt + 1} for chunk due to size mismatch.")
    raise ValueError("Failed to process chunk after retries.")

+
 def write_to_temp_file(temp_file_path, docs, chat_prompt):
    os.makedirs("/tmp/proof-edits", exist_ok=True)
    with open(temp_file_path, "w") as f:
@ -90,12 +92,27 @@ def write_to_temp_file(temp_file_path, docs, chat_prompt):
            edited_result_content = process_chunk(doc, chat_prompt, chunk_index=i)
            f.write(edited_result_content + "\n")

+
+def analyze_diff(diff_file_path):
+    diff_content = read_file(diff_file_path)
+    analysis_prompt = f"""
+You are an expert technical editor. Please analyze the following diff and ensure it looks like a successful copy edit of a markdown file. Provide feedback if there are any issues or if it looks good, just reply with the single word: good
+
+Diff:
+{diff_content}
+"""
+    result = llm.invoke([HumanMessage(content=analysis_prompt)])
+    return result.content
+
+
 def process_file(input_file):
    content = read_file(input_file)
    docs = split_content(content)
    print(f"Split into {len(docs)} docs")

-    chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_message_prompt])
+    chat_prompt = ChatPromptTemplate.from_messages(
+        [system_prompt, human_message_prompt]
+    )
    os.makedirs("/tmp/proof-edits", exist_ok=True)
    temp_output_file = "/tmp/proof-edits/edited_output.md"

@ -109,11 +126,20 @@ def process_file(input_file):
        edited_content = f.read()

    # Generate and save the diff for the whole file
-    diff = difflib.unified_diff(original_content.splitlines(), edited_content.splitlines(), lineterm='')
+    diff = difflib.unified_diff(
+        original_content.splitlines(), edited_content.splitlines(), lineterm=""
+    )
    with open("/tmp/proof-edits/diff_file.diff", "w") as diff_file:
-        diff_file.write('\n'.join(diff))
-    os.replace(temp_output_file, input_file)
-    print(f"Edited file saved as {input_file}")
+        diff_file.write("\n".join(diff))
+
+    # Analyze the diff
+    analysis_result = analyze_diff("/tmp/proof-edits/diff_file.diff")
+
+    if analysis_result.lower().strip() == "good":
+        os.replace(temp_output_file, input_file)
+        print(f"Edited file saved as {input_file}")
+    else:
+        print(f"The diff looked suspect. Diff analysis result: {analysis_result}")


 if __name__ == "__main__":
--- a/docs/documentation/documentation.md
+++ b/docs/documentation/documentation.md
@ -2,7 +2,7 @@

 This documentation is currently hosted live at [Spiff-Arena's ReadTheDocs](https://spiff-arena.readthedocs.io/en/latest/).

-Please set aside a couple of hours to work through this, as getting this setup correctly once is 10,000 times better than having problems every day for the rest of your life.
+Please set aside a couple of hours to work through this, as getting this set up correctly once is 10,000 times better than having problems every day for the rest of your life.

 ## Our Methodology