Add tool to calculate string distance
This commit is contained in:
parent
6147eef19b
commit
2b0593a51e
2 changed files with 41 additions and 0 deletions
|
@ -36,3 +36,25 @@ def size(size, unit=True):
|
||||||
return "%.3f %s" % (size / math.pow(1024,p), units[int(p)])
|
return "%.3f %s" % (size / math.pow(1024,p), units[int(p)])
|
||||||
else:
|
else:
|
||||||
return "%.3f" % (size / math.pow(1024,p))
|
return "%.3f" % (size / math.pow(1024,p))
|
||||||
|
|
||||||
|
|
||||||
|
def word_distance(str1, str2):
|
||||||
|
"""Perform a Damerau-Levenshtein distance on the two given strings"""
|
||||||
|
|
||||||
|
d = [[i + j for j in range(len(str2) + 1)] for i in range(len(str1) + 1)]
|
||||||
|
|
||||||
|
for i in range(0, len(str1)):
|
||||||
|
for j in range(0, len(str2)):
|
||||||
|
cost = 0 if str1[i-1] == str2[j-1] else 1
|
||||||
|
d[i+1][j+1] = min(
|
||||||
|
d[i][j+1] + 1, # deletion
|
||||||
|
d[i+1][j] + 1, # insertion
|
||||||
|
d[i][j] + cost, # substitution
|
||||||
|
)
|
||||||
|
if i >= 1 and j >= 1 and str1[i] == str2[j-1] and str1[i-1] == str2[j]:
|
||||||
|
d[i+1][j+1] = min(
|
||||||
|
d[i+1][j+1],
|
||||||
|
d[i-1][j-1] + cost, # transposition
|
||||||
|
)
|
||||||
|
|
||||||
|
return d[len(str1)][len(str2)]
|
||||||
|
|
19
nemubot/tools/test_human.py
Normal file
19
nemubot/tools/test_human.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from nemubot.tools.human import size, word_distance
|
||||||
|
|
||||||
|
class TestHuman(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_Levenshtein(self):
|
||||||
|
self.assertEqual(word_distance("", "a"), 1)
|
||||||
|
self.assertEqual(word_distance("a", ""), 1)
|
||||||
|
self.assertEqual(word_distance("a", "a"), 0)
|
||||||
|
self.assertEqual(word_distance("a", "b"), 1)
|
||||||
|
self.assertEqual(word_distance("aa", "ba"), 1)
|
||||||
|
self.assertEqual(word_distance("ba", "ab"), 1)
|
||||||
|
self.assertEqual(word_distance("long", "short"), 4)
|
||||||
|
self.assertEqual(word_distance("long", "short"), word_distance("short", "long"))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Add table
Add a link
Reference in a new issue