spiff-arena/SpiffWorkflow/util/levenshtein.py
Jon Herron 7c219fd731 Squashed 'SpiffWorkflow/' content from commit 63db3e4
git-subtree-dir: SpiffWorkflow
git-subtree-split: 63db3e45947ec66b8d0efc2c74064004f8ff482c
2022-10-12 10:19:53 -04:00

22 lines
565 B
Python

from difflib import ndiff
def distance(str1, str2):
counter = {"+": 0, "-": 0}
distance = 0
for edit_code, *_ in ndiff(str1, str2):
if edit_code == " ":
distance += max(counter.values())
counter = {"+": 0, "-": 0}
else:
counter[edit_code] += 1
distance += max(counter.values())
return distance
def most_similar(value, item_list, limit):
distances = [(key, distance(value, key)) for key in item_list]
distances.sort(key=lambda x: x[1])
return [x[0] for x in distances[:limit]]