Add tool to calculate string distance
This commit is contained in:
parent
6147eef19b
commit
2b0593a51e
2 changed files with 41 additions and 0 deletions
|
@ -36,3 +36,25 @@ def size(size, unit=True):
|
|||
return "%.3f %s" % (size / math.pow(1024,p), units[int(p)])
|
||||
else:
|
||||
return "%.3f" % (size / math.pow(1024,p))
|
||||
|
||||
|
||||
def word_distance(str1, str2):
|
||||
"""Perform a Damerau-Levenshtein distance on the two given strings"""
|
||||
|
||||
d = [[i + j for j in range(len(str2) + 1)] for i in range(len(str1) + 1)]
|
||||
|
||||
for i in range(0, len(str1)):
|
||||
for j in range(0, len(str2)):
|
||||
cost = 0 if str1[i-1] == str2[j-1] else 1
|
||||
d[i+1][j+1] = min(
|
||||
d[i][j+1] + 1, # deletion
|
||||
d[i+1][j] + 1, # insertion
|
||||
d[i][j] + cost, # substitution
|
||||
)
|
||||
if i >= 1 and j >= 1 and str1[i] == str2[j-1] and str1[i-1] == str2[j]:
|
||||
d[i+1][j+1] = min(
|
||||
d[i+1][j+1],
|
||||
d[i-1][j-1] + cost, # transposition
|
||||
)
|
||||
|
||||
return d[len(str1)][len(str2)]
|
||||
|
|
19
nemubot/tools/test_human.py
Normal file
19
nemubot/tools/test_human.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import unittest
|
||||
|
||||
from nemubot.tools.human import size, word_distance
|
||||
|
||||
class TestHuman(unittest.TestCase):
|
||||
|
||||
def test_Levenshtein(self):
|
||||
self.assertEqual(word_distance("", "a"), 1)
|
||||
self.assertEqual(word_distance("a", ""), 1)
|
||||
self.assertEqual(word_distance("a", "a"), 0)
|
||||
self.assertEqual(word_distance("a", "b"), 1)
|
||||
self.assertEqual(word_distance("aa", "ba"), 1)
|
||||
self.assertEqual(word_distance("ba", "ab"), 1)
|
||||
self.assertEqual(word_distance("long", "short"), 4)
|
||||
self.assertEqual(word_distance("long", "short"), word_distance("short", "long"))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Add table
Add a link
Reference in a new issue