Source code for pm4py.algo.clustering.trace_attribute_driven.util.evaluation

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from scipy.spatial.distance import squareform
import numpy as np
from pm4py.algo.clustering.trace_attribute_driven.variants import act_dist_calc
from pm4py.algo.clustering.trace_attribute_driven.variants import suc_dist_calc
from pm4py.algo.clustering.trace_attribute_driven.leven_dist import (
    leven_dist_calc,
)
from pm4py.algo.clustering.trace_attribute_driven.dfg import dfg_dist



[docs]
def dfg_dis(loglist, percent, alpha):
    size = len(loglist)
    dist_mat = np.zeros((size, size))

    for i in range(0, size - 1):
        for j in range(i + 1, size):
            (dist_act, dist_dfg) = dfg_dist.dfg_dist_calc(
                loglist[i], loglist[j]
            )
            dist_mat[i][j] = dist_act * alpha + dist_dfg * (1 - alpha)
            dist_mat[j][i] = dist_mat[i][j]
    y = squareform(dist_mat)
    return y




[docs]
def eval_avg_variant(loglist, percent, alpha):
    size = len(loglist)
    dist_mat = np.zeros((size, size))

    for i in range(0, size - 1):
        for j in range(i + 1, size):
            dist_act = act_dist_calc.act_sim_percent_avg(
                loglist[i], loglist[j], percent, percent
            )
            dist_suc = suc_dist_calc.suc_sim_percent_avg(
                loglist[i], loglist[j], percent, percent
            )
            dist_mat[i][j] = dist_act * alpha + dist_suc * (1 - alpha)
            dist_mat[j][i] = dist_mat[i][j]
    y = squareform(dist_mat)

    return y




[docs]
def eval_DMM_variant(loglist, percent, alpha):
    size = len(loglist)
    dist_mat = np.zeros((size, size))

    for i in range(0, size - 1):
        for j in range(i + 1, size):
            dist_act = act_dist_calc.act_sim_percent(
                loglist[i], loglist[j], percent, percent
            )
            dist_suc = suc_dist_calc.suc_sim_percent(
                loglist[i], loglist[j], percent, percent
            )
            dist_mat[i][j] = dist_act * alpha + dist_suc * (1 - alpha)
            dist_mat[j][i] = dist_mat[i][j]
    y = squareform(dist_mat)
    return y




[docs]
def eval_avg_leven(loglist, percent, alpha):
    size = len(loglist)
    dist_mat = np.zeros((size, size))

    for i in range(0, size - 1):
        for j in range(i + 1, size):
            dist_mat[i][j] = leven_dist_calc.leven_dist_avg(
                loglist[i], loglist[j], percent, percent
            )
            dist_mat[j][i] = dist_mat[i][j]
    y = squareform(dist_mat)
    return y




[docs]
def eval_DMM_leven(loglist, percent, alpha):
    size = len(loglist)
    dist_mat = np.zeros((size, size))

    for i in range(0, size - 1):
        for j in range(i + 1, size):
            dist_mat[i][j] = leven_dist_calc.leven_dist(
                loglist[i], loglist[j], percent, percent
            )
            dist_mat[j][i] = dist_mat[i][j]
    y = squareform(dist_mat)
    return y