diff --git a/docs/appendices/bpmn_terminology.md b/docs/appendices/bpmn_terminology.md index f0bbbaed9..1d8e39257 100644 --- a/docs/appendices/bpmn_terminology.md +++ b/docs/appendices/bpmn_terminology.md @@ -84,7 +84,7 @@ There are four types of Gateways: Exclusive, Parallel, Inclusive, and Event-Base ## Intermediate Event -This is an event that occurs within the middle of a process, neither at the start nor the end. +This is an event that occurs in the middle of a process, neither at the start nor the end. It can be connected to other tasks through connectors or placed on the border of a task. It evaluates conditions and circumstances, triggering events and enabling the initiation of alternative paths within the process. @@ -99,7 +99,7 @@ These are subdivisions within a Pool that are utilized to assign activities to s ## Merge This is the process in which two or more parallel Sequence Flow paths converge into a single path, achieved either through multiple incoming Sequence Flows or by utilizing an Exclusive Gateway. -This merging of paths is also commonly referred to as an "OR-Join". +This merging of paths is also commonly referred to as an "OR-Join." ## Message diff --git a/docs/bin/gpt-proofread.py b/docs/bin/gpt-proofread.py index afeb3b110..91557bc0c 100644 --- a/docs/bin/gpt-proofread.py +++ b/docs/bin/gpt-proofread.py @@ -69,20 +69,22 @@ def read_file(file_path): with open(file_path, "r") as f: return f.read() + def split_content(content, chunk_size=13000): splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0) return splitter.split_text(content) + def process_chunk(doc, chat_prompt, retries=3, chunk_index=0): for attempt in range(retries): result = llm.invoke(chat_prompt.format_prompt(text=doc).to_messages()) edited_result_content = result.content if 0.95 * len(doc) <= len(edited_result_content) <= 1.05 * len(doc): - return edited_result_content print(f"Retry {attempt + 1} for chunk due to size mismatch.") raise ValueError("Failed to process chunk after retries.") + def write_to_temp_file(temp_file_path, docs, chat_prompt): os.makedirs("/tmp/proof-edits", exist_ok=True) with open(temp_file_path, "w") as f: @@ -90,12 +92,27 @@ def write_to_temp_file(temp_file_path, docs, chat_prompt): edited_result_content = process_chunk(doc, chat_prompt, chunk_index=i) f.write(edited_result_content + "\n") + +def analyze_diff(diff_file_path): + diff_content = read_file(diff_file_path) + analysis_prompt = f""" +You are an expert technical editor. Please analyze the following diff and ensure it looks like a successful copy edit of a markdown file. Provide feedback if there are any issues or if it looks good, just reply with the single word: good + +Diff: +{diff_content} +""" + result = llm.invoke([HumanMessage(content=analysis_prompt)]) + return result.content + + def process_file(input_file): content = read_file(input_file) docs = split_content(content) print(f"Split into {len(docs)} docs") - chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_message_prompt]) + chat_prompt = ChatPromptTemplate.from_messages( + [system_prompt, human_message_prompt] + ) os.makedirs("/tmp/proof-edits", exist_ok=True) temp_output_file = "/tmp/proof-edits/edited_output.md" @@ -109,11 +126,20 @@ def process_file(input_file): edited_content = f.read() # Generate and save the diff for the whole file - diff = difflib.unified_diff(original_content.splitlines(), edited_content.splitlines(), lineterm='') + diff = difflib.unified_diff( + original_content.splitlines(), edited_content.splitlines(), lineterm="" + ) with open("/tmp/proof-edits/diff_file.diff", "w") as diff_file: - diff_file.write('\n'.join(diff)) - os.replace(temp_output_file, input_file) - print(f"Edited file saved as {input_file}") + diff_file.write("\n".join(diff)) + + # Analyze the diff + analysis_result = analyze_diff("/tmp/proof-edits/diff_file.diff") + + if analysis_result.lower().strip() == "good": + os.replace(temp_output_file, input_file) + print(f"Edited file saved as {input_file}") + else: + print(f"The diff looked suspect. Diff analysis result: {analysis_result}") if __name__ == "__main__": diff --git a/docs/documentation/documentation.md b/docs/documentation/documentation.md index 3b475d5c2..c22638750 100644 --- a/docs/documentation/documentation.md +++ b/docs/documentation/documentation.md @@ -2,7 +2,7 @@ This documentation is currently hosted live at [Spiff-Arena's ReadTheDocs](https://spiff-arena.readthedocs.io/en/latest/). -Please set aside a couple of hours to work through this, as getting this setup correctly once is 10,000 times better than having problems every day for the rest of your life. +Please set aside a couple of hours to work through this, as getting this set up correctly once is 10,000 times better than having problems every day for the rest of your life. ## Our Methodology