Source code for pm4py.statistics.traces.generic.common.case_duration

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
import numpy as np
import json
import logging
import importlib.util
from pm4py.util import exec_utils
from enum import Enum


[docs] class Parameters(Enum): GRAPH_POINTS = "graph_points" POINT_TO_SAMPLE = "points_to_sample"
[docs] def get_kde_caseduration(duration_values, parameters=None): """ Gets the estimation of KDE density for the case durations calculated on the log/dataframe Parameters -------------- duration_values Values of duration parameters Possible parameters of the algorithm, including: graph_points -> number of points to include in the graph Returns -------------- x X-axis values to represent y Y-axis values to represent """ if importlib.util.find_spec("scipy"): from scipy.stats import gaussian_kde if parameters is None: parameters = {} graph_points = exec_utils.get_param_value( Parameters.GRAPH_POINTS, parameters, 200 ) duration_values = sorted(duration_values) # Check if we have enough data points for KDE if len(duration_values) < 2: # Return empty or single-point data if len(duration_values) == 0: return [[], []] else: # Single value case - return a simple representation single_val = duration_values[0] return [[single_val], [1.0]] density = gaussian_kde(duration_values) xs1 = list( np.linspace( min(duration_values), max(duration_values), int(graph_points / 2), ) ) xs2 = list( np.geomspace( max(min(duration_values), 0.001), max(duration_values), int(graph_points / 2), ) ) xs = sorted(xs1 + xs2) return [xs, list(density(xs))] else: msg = "scipy is not available. graphs cannot be built!" logging.error(msg) raise Exception(msg)
[docs] def get_kde_caseduration_json(duration_values, parameters=None): """ Gets the estimation of KDE density for the case durations calculated on the log/dataframe (expressed as JSON) Parameters -------------- duration_values Values of duration parameters Possible parameters of the algorithm, including: graph_points: number of points to include in the graph Returns -------------- json JSON representing the graph points """ x, y = get_kde_caseduration(duration_values, parameters=parameters) ret = [] for i in range(len(x)): ret.append((x[i], y[i])) return json.dumps(ret)