Source code for pm4py.algo.conformance.footprints.util.evaluation

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from collections import Counter
from typing import List, Dict, Any


from enum import Enum



[docs]
class Outputs(Enum):
    DFG = "dfg"
    SEQUENCE = "sequence"
    PARALLEL = "parallel"
    START_ACTIVITIES = "start_activities"
    END_ACTIVITIES = "end_activities"
    ACTIVITIES = "activities"
    SKIPPABLE = "skippable"
    ACTIVITIES_ALWAYS_HAPPENING = "activities_always_happening"
    MIN_TRACE_LENGTH = "min_trace_length"
    TRACE = "trace"



DFG = "dfg"
FOOTPRINTS_KEY = "footprints"
START_ACTIVITIES = "start_activities"
END_ACTIVITIES = "end_activities"
SEQUENCE = "sequence"
PARALLEL = "parallel"
IS_FOOTPRINTS_FIT = "is_footprints_fit"



[docs]
def fp_fitness(fp_log, fp_model, conf_results, parameters=None):
    """
    Calculates the footprints fitness provided the footprints of the log,
    and the result of footprints conformance (applied to the entire log)

    Parameters
    ---------------
    fp_log
        Footprints of the log
    fp_model
        Footprints of the model
    conf_results
        Footprints conformance (applied to the entire log)
    parameters
        Parameters of the algorithm

    Returns
    ---------------
    fitness
        Fitness value (between 0.0 and 1.0)
    """
    if parameters is None:
        parameters = {}

    fit_traces = None
    if isinstance(conf_results, list):
        fit_traces = (
            len([x for x in conf_results if x[IS_FOOTPRINTS_FIT]])
            / len(conf_results)
            * 100.0
        )

    fp_log = flatten_fp(fp_log)
    conf_results = flatten_conf(conf_results)

    dfg = fp_log[DFG]
    num_sequence_log = len(fp_log[SEQUENCE])
    num_parallel_log = len(fp_log[PARALLEL])
    num_start_activities_log = len(fp_log[START_ACTIVITIES])
    num_end_activities_log = len(fp_log[END_ACTIVITIES])
    num_start_activities_dev = len(conf_results[START_ACTIVITIES])
    num_end_activities_dev = len(conf_results[END_ACTIVITIES])
    footprints = conf_results[FOOTPRINTS_KEY]

    if dfg:
        sum_dfg = float(sum(x for x in dfg.values()))
        sum_dev = float(sum(dfg[x] for x in footprints))

        fitness = (
            (1.0 - sum_dev / sum_dfg) * (num_sequence_log + num_parallel_log)
            + (
                num_start_activities_log
                + num_end_activities_log
                - num_start_activities_dev
                - num_end_activities_dev
            )
        ) / (
            num_sequence_log
            + num_parallel_log
            + num_start_activities_log
            + num_end_activities_log
        )
    else:
        # return fitness 1.0 if DFG is empty
        fitness = 1.0

    if fit_traces is not None:
        return {"perc_fit_traces": fit_traces, "log_fitness": fitness}

    return fitness




[docs]
def fp_precision(fp_log, fp_model, parameters=None):
    """
    Calculates the footprints based precision provided the two footprints
    of the log and the model.

    Parameters
    --------------
    fp_log
        Footprints of the log
    fp_model
        Footprints of the model
    parameters
        Parameters of the algorithm

    Returns
    -------------
    precision
        Precision value (between 0 and 1)
    """
    if parameters is None:
        parameters = {}

    fp_log = flatten_fp(fp_log)
    fp_model = flatten_fp(fp_model)

    log_configurations = fp_log[Outputs.SEQUENCE.value].union(
        fp_log[Outputs.PARALLEL.value]
    )
    model_configurations = fp_model[Outputs.SEQUENCE.value].union(
        fp_model[Outputs.PARALLEL.value]
    )

    if model_configurations:
        return float(
            len(log_configurations.intersection(model_configurations))
        ) / float(len(model_configurations))

    # return precision 1.0 if model configurations are empty
    return 1.0




[docs]
def flatten_fp(fp: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Flattens the trace-based footprints to the footprints of the overall log

    Parameters
    ---------------
    fp
        Trace-based footprints

    Returns
    --------------
    log_fp
        Overall log footprints
    """
    if isinstance(fp, list):
        res = {
            DFG: Counter(),
            SEQUENCE: set(),
            PARALLEL: set(),
            START_ACTIVITIES: set(),
            END_ACTIVITIES: set(),
        }
        for el in fp:
            for x, y in el[DFG].items():
                res[DFG][x] += y
            res[SEQUENCE] = res[SEQUENCE].union(el[SEQUENCE])
            res[PARALLEL] = res[PARALLEL].union(el[PARALLEL])
            res[START_ACTIVITIES] = res[START_ACTIVITIES].union(
                el[START_ACTIVITIES]
            )
            res[END_ACTIVITIES] = res[END_ACTIVITIES].union(el[END_ACTIVITIES])
        return res
    return fp




[docs]
def flatten_conf(conf: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Flattens the trace-based conformance checking results (obtained using footprints) to the conformance checking
    results on the overall log

    Parameters
    ----------------
    conf
        Trace-based conformance checking results

    Returns
    ----------------
    log_conf
        Overall log conformance checking results
    """
    if isinstance(conf, list):
        res = {
            FOOTPRINTS_KEY: set(),
            START_ACTIVITIES: set(),
            END_ACTIVITIES: set(),
        }
        for el in conf:
            res[FOOTPRINTS_KEY] = res[FOOTPRINTS_KEY].union(el[FOOTPRINTS_KEY])
            res[START_ACTIVITIES] = res[START_ACTIVITIES].union(
                el[START_ACTIVITIES]
            )
            res[END_ACTIVITIES] = res[END_ACTIVITIES].union(el[END_ACTIVITIES])
        return res
    return conf