Compare commits

...

3 Commits

Author SHA1 Message Date
burnettk c088d8884e
update script 2024-06-28 08:36:39 -04:00
burnettk 5ed9263f4e
update script 2024-06-28 08:32:13 -04:00
burnettk d484633e17
update script 2024-06-28 08:26:17 -04:00
1 changed files with 18 additions and 7 deletions

View File

@ -3,6 +3,7 @@
import sys
import os
import difflib
import os.path
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
@ -36,9 +37,11 @@ human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
# - Do not leave any trailing spaces (handled by another script, though)
# - Never remove entire sentences (didn't seem necessary, since we said keep everything else exactly the same)
system_text = """You are proofreading and you will receive text that is almost exactly correct, but may contain errors. You should:
system_text = """You are proofreading a markdown document and you will receive text that is almost exactly correct, but may contain errors. You should:
- Fix spelling
- Not edit URLs
- Never touch a markdown link; these might look like: [Image label](images/Manual_instructions_panel.png)
- Improve grammar that is obviously wrong
- Fix awkward language if it is really bad
- Keep everything else exactly the same, including tone and voice
@ -47,10 +50,14 @@ system_text = """You are proofreading and you will receive text that is almost e
- Output one line per sentence (same as input)
- Avoid putting multiple sentences on the same line
- Make sure you do not remove any headers at the beginning of the text (markdown headers begin with one or more # characters)
The markdown document follows. The output document's first line should probably match that of the input document, even if it is a markdown header.
"""
system_prompt = SystemMessage(content=system_text)
EDIT_DIR = "/tmp/edits"
openai_api_key = os.environ.get("OPENAI_API_KEY")
if openai_api_key is None:
keyfile = "oai.key"
@ -98,6 +105,8 @@ def analyze_diff(diff_file_path):
analysis_prompt = f"""
You are an expert technical editor.
Please analyze the following diff and ensure it looks like a successful copy edit of a markdown file.
Editing URLs is not allowed; never touch a link like [Image label](images/Manual_instructions_panel.png)
It is not a successful edit if line one has been removed (editing is fine; removing is not).
It is not a successful edit if three or more lines in a row have been removed without replacement.
Edits or reformats are potentially good, but simply removing or adding a bunch of content is bad.
Provide feedback if there are any issues.
@ -118,13 +127,13 @@ def process_file(input_file):
chat_prompt = ChatPromptTemplate.from_messages(
[system_prompt, human_message_prompt]
)
os.makedirs("/tmp/proof-edits", exist_ok=True)
os.makedirs(EDIT_DIR, exist_ok=True)
# Save the original content for diff generation
original_content = content
edited_content = get_edited_content(docs, chat_prompt)
temp_output_file = "/tmp/proof-edits/edited_output.md"
temp_output_file = f"{EDIT_DIR}/edited_output.md"
overall_result = None
if edited_content == original_content:
@ -134,15 +143,17 @@ def process_file(input_file):
with open(temp_output_file, "w") as f:
f.write(edited_content)
# Generate and save the diff for the whole file
# Generate and save the diff for the whole file based on the basename of the input file
input_basename = os.path.basename(input_file)
diff_file_path = f"{EDIT_DIR}/{input_basename}.diff"
diff = difflib.unified_diff(
original_content.splitlines(), edited_content.splitlines(), lineterm=""
)
with open("/tmp/proof-edits/diff_file.diff", "w") as diff_file:
with open(diff_file_path, "w") as diff_file:
diff_file.write("\n".join(diff))
# Analyze the diff
analysis_result = analyze_diff("/tmp/proof-edits/diff_file.diff")
analysis_result = analyze_diff(diff_file_path)
if analysis_result.lower().strip() == "good":
os.replace(temp_output_file, input_file)
@ -161,5 +172,5 @@ if __name__ == "__main__":
else:
input_file = sys.argv[1]
overall_result = process_file(input_file)
with open("/tmp/proof-edits/proofread_results.txt", "a") as f:
with open(f"{EDIT_DIR}/proofread_results.txt", "a") as f:
f.write(f"{input_file}: {overall_result}\n")