Source code for pm4py.algo.anonymization.pripel.util.trace_levenshtein

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
# This algorithm was copied at 16/Nov/2018 from
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python and applied to activity
# sequences
delimiter = "@"


[docs] def length(s): return s.count(delimiter) + 1
[docs] def enumerateSequence(s): list = s.split(delimiter) return enumerate(list, 0)
[docs] def trace_levenshtein(s1, s2): if length(s1) < length(s2): return trace_levenshtein(s2, s1) # len(s1) >= len(s2) if length(s2) == 0: return length(s1) previous_row = range(length(s2) + 1) for i, c1 in enumerateSequence(s1): current_row = [i + 1] for j, c2 in enumerateSequence(s2): insertions = previous_row[ j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1]