Source code for pm4py.statistics.traces.generic.log.case_arrival

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or 
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from pm4py.util.xes_constants import DEFAULT_TIMESTAMP_KEY
import statistics
from pm4py.util.business_hours import BusinessHours
from pm4py.util import exec_utils, constants
from enum import Enum
from typing import Optional, Dict, Any, Union
from pm4py.objects.log.obj import EventLog
from pm4py.objects.conversion.log import converter as log_converter


[docs] class Parameters(Enum): ATTRIBUTE_KEY = constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY BUSINESS_HOURS = "business_hours" BUSINESS_HOUR_SLOTS = "business_hour_slots" WORKCALENDAR = "workcalendar"
[docs] def get_case_arrival_avg( log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None, ) -> float: """ Gets the average time interlapsed between case starts Parameters -------------- log Trace log parameters Parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> attribute of the log to be used as timestamp Returns -------------- case_arrival_avg Average time interlapsed between case starts """ if parameters is None: parameters = {} log = log_converter.apply( log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters ) business_hours = exec_utils.get_param_value( Parameters.BUSINESS_HOURS, parameters, False ) business_hours_slots = exec_utils.get_param_value( Parameters.BUSINESS_HOUR_SLOTS, parameters, constants.DEFAULT_BUSINESS_HOUR_SLOTS, ) workcalendar = exec_utils.get_param_value( Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR, ) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY ) case_start_time = [ trace[0][timestamp_key] for trace in log if trace and timestamp_key in trace[0] ] case_start_time = sorted(case_start_time) case_diff_start_time = [] for i in range(len(case_start_time) - 1): if business_hours: bh = BusinessHours( case_start_time[i], case_start_time[i + 1], business_hour_slots=business_hours_slots, workcalendar=workcalendar, ) case_diff_start_time.append(bh.get_seconds()) else: case_diff_start_time.append( (case_start_time[i + 1] - case_start_time[i]).total_seconds() ) if case_diff_start_time: return statistics.mean(case_diff_start_time) return 0.0
[docs] def get_case_dispersion_avg( log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None, ) -> float: """ Gets the average time interlapsed between case ends Parameters -------------- log Trace log parameters Parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> attribute of the log to be used as timestamp Returns -------------- case_dispersion_avg Average time interlapsed between the completion of cases """ if parameters is None: parameters = {} log = log_converter.apply( log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters ) business_hours = exec_utils.get_param_value( Parameters.BUSINESS_HOURS, parameters, False ) business_hours_slots = exec_utils.get_param_value( Parameters.BUSINESS_HOUR_SLOTS, parameters, constants.DEFAULT_BUSINESS_HOUR_SLOTS, ) workcalendar = exec_utils.get_param_value( Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR, ) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY ) case_end_time = [ trace[-1][timestamp_key] for trace in log if trace and timestamp_key in trace[0] ] case_end_time = sorted(case_end_time) case_diff_end_time = [] for i in range(len(case_end_time) - 1): if business_hours: bh = BusinessHours( case_end_time[i], case_end_time[i + 1], business_hour_slots=business_hours_slots, workcalendar=workcalendar, ) case_diff_end_time.append(bh.get_seconds()) else: case_diff_end_time.append( (case_end_time[i + 1] - case_end_time[i]).total_seconds() ) if case_diff_end_time: return statistics.mean(case_diff_end_time) return 0.0