Source code for pm4py.algo.conformance.footprints.util.evaluation

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from collections import Counter
from typing import List, Dict, Any


from enum import Enum


[docs] class Outputs(Enum): DFG = "dfg" SEQUENCE = "sequence" PARALLEL = "parallel" START_ACTIVITIES = "start_activities" END_ACTIVITIES = "end_activities" ACTIVITIES = "activities" SKIPPABLE = "skippable" ACTIVITIES_ALWAYS_HAPPENING = "activities_always_happening" MIN_TRACE_LENGTH = "min_trace_length" TRACE = "trace"
DFG = "dfg" FOOTPRINTS_KEY = "footprints" START_ACTIVITIES = "start_activities" END_ACTIVITIES = "end_activities" SEQUENCE = "sequence" PARALLEL = "parallel" IS_FOOTPRINTS_FIT = "is_footprints_fit"
[docs] def fp_fitness(fp_log, fp_model, conf_results, parameters=None): """ Calculates the footprints fitness provided the footprints of the log, and the result of footprints conformance (applied to the entire log) Parameters --------------- fp_log Footprints of the log fp_model Footprints of the model conf_results Footprints conformance (applied to the entire log) parameters Parameters of the algorithm Returns --------------- fitness Fitness value (between 0.0 and 1.0) """ if parameters is None: parameters = {} fit_traces = None if isinstance(conf_results, list): fit_traces = ( len([x for x in conf_results if x[IS_FOOTPRINTS_FIT]]) / len(conf_results) * 100.0 ) fp_log = flatten_fp(fp_log) conf_results = flatten_conf(conf_results) dfg = fp_log[DFG] num_sequence_log = len(fp_log[SEQUENCE]) num_parallel_log = len(fp_log[PARALLEL]) num_start_activities_log = len(fp_log[START_ACTIVITIES]) num_end_activities_log = len(fp_log[END_ACTIVITIES]) num_start_activities_dev = len(conf_results[START_ACTIVITIES]) num_end_activities_dev = len(conf_results[END_ACTIVITIES]) footprints = conf_results[FOOTPRINTS_KEY] if dfg: sum_dfg = float(sum(x for x in dfg.values())) sum_dev = float(sum(dfg[x] for x in footprints)) fitness = ( (1.0 - sum_dev / sum_dfg) * (num_sequence_log + num_parallel_log) + ( num_start_activities_log + num_end_activities_log - num_start_activities_dev - num_end_activities_dev ) ) / ( num_sequence_log + num_parallel_log + num_start_activities_log + num_end_activities_log ) else: # return fitness 1.0 if DFG is empty fitness = 1.0 if fit_traces is not None: return {"perc_fit_traces": fit_traces, "log_fitness": fitness} return fitness
[docs] def fp_precision(fp_log, fp_model, parameters=None): """ Calculates the footprints based precision provided the two footprints of the log and the model. Parameters -------------- fp_log Footprints of the log fp_model Footprints of the model parameters Parameters of the algorithm Returns ------------- precision Precision value (between 0 and 1) """ if parameters is None: parameters = {} fp_log = flatten_fp(fp_log) fp_model = flatten_fp(fp_model) log_configurations = fp_log[Outputs.SEQUENCE.value].union( fp_log[Outputs.PARALLEL.value] ) model_configurations = fp_model[Outputs.SEQUENCE.value].union( fp_model[Outputs.PARALLEL.value] ) if model_configurations: return float( len(log_configurations.intersection(model_configurations)) ) / float(len(model_configurations)) # return precision 1.0 if model configurations are empty return 1.0
[docs] def flatten_fp(fp: List[Dict[str, Any]]) -> Dict[str, Any]: """ Flattens the trace-based footprints to the footprints of the overall log Parameters --------------- fp Trace-based footprints Returns -------------- log_fp Overall log footprints """ if isinstance(fp, list): res = { DFG: Counter(), SEQUENCE: set(), PARALLEL: set(), START_ACTIVITIES: set(), END_ACTIVITIES: set(), } for el in fp: for x, y in el[DFG].items(): res[DFG][x] += y res[SEQUENCE] = res[SEQUENCE].union(el[SEQUENCE]) res[PARALLEL] = res[PARALLEL].union(el[PARALLEL]) res[START_ACTIVITIES] = res[START_ACTIVITIES].union( el[START_ACTIVITIES] ) res[END_ACTIVITIES] = res[END_ACTIVITIES].union(el[END_ACTIVITIES]) return res return fp
[docs] def flatten_conf(conf: List[Dict[str, Any]]) -> Dict[str, Any]: """ Flattens the trace-based conformance checking results (obtained using footprints) to the conformance checking results on the overall log Parameters ---------------- conf Trace-based conformance checking results Returns ---------------- log_conf Overall log conformance checking results """ if isinstance(conf, list): res = { FOOTPRINTS_KEY: set(), START_ACTIVITIES: set(), END_ACTIVITIES: set(), } for el in conf: res[FOOTPRINTS_KEY] = res[FOOTPRINTS_KEY].union(el[FOOTPRINTS_KEY]) res[START_ACTIVITIES] = res[START_ACTIVITIES].union( el[START_ACTIVITIES] ) res[END_ACTIVITIES] = res[END_ACTIVITIES].union(el[END_ACTIVITIES]) return res return conf