Source code for pm4py.algo.discovery.temporal_profile.variants.log

from enum import Enum
from typing import Optional, Dict, Any

from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.obj import EventLog
from pm4py.util import exec_utils, constants, xes_constants
from pm4py.util import typing
from pm4py.util.business_hours import BusinessHours


[docs] class Parameters(Enum): ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY START_TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY BUSINESS_HOURS = "business_hours" BUSINESS_HOUR_SLOTS = "business_hour_slots" WORKCALENDAR = "workcalendar"
[docs] def apply( log: EventLog, parameters: Optional[Dict[Any, Any]] = None ) -> typing.TemporalProfile: """ Gets the temporal profile from the log. Implements the approach described in: Stertz, Florian, Jürgen Mangler, and Stefanie Rinderle-Ma. "Temporal Conformance Checking at Runtime based on Time-infused Process Models." arXiv preprint arXiv:2008.07262 (2020). Parameters ---------- log Event log parameters Parameters, including: - Parameters.ACTIVITY_KEY => the attribute to use as activity - Parameters.START_TIMESTAMP_KEY => the attribute to use as start timestamp - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time. Default: False - Parameters.BUSINESS_HOURS_SLOTS => work schedule of the company, provided as a list of tuples where each tuple represents one time slot of business hours. One slot i.e. one tuple consists of one start and one end time given in seconds since week start, e.g. [ (7 * 60 * 60, 17 * 60 * 60), ((24 + 7) * 60 * 60, (24 + 12) * 60 * 60), ((24 + 13) * 60 * 60, (24 + 17) * 60 * 60), ] meaning that business hours are Mondays 07:00 - 17:00 and Tuesdays 07:00 - 12:00 and 13:00 - 17:00 Returns ------- temporal_profile Temporal profile of the log """ if parameters is None: parameters = {} from statistics import mean, stdev log = log_converter.apply( log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters ) business_hours = exec_utils.get_param_value( Parameters.BUSINESS_HOURS, parameters, False ) business_hours_slots = exec_utils.get_param_value( Parameters.BUSINESS_HOUR_SLOTS, parameters, constants.DEFAULT_BUSINESS_HOUR_SLOTS, ) activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY ) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY, ) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY, ) diff_time_recordings = {} for trace in log: for i in range(len(trace) - 1): act_i = trace[i][activity_key] time_i = trace[i][timestamp_key].timestamp() for j in range(i + 1, len(trace)): time_j = trace[j][start_timestamp_key].timestamp() if time_j >= time_i: act_j = trace[j][activity_key] if not (act_i, act_j) in diff_time_recordings: diff_time_recordings[(act_i, act_j)] = [] if business_hours: bh = BusinessHours( trace[i][timestamp_key], trace[j][start_timestamp_key], business_hour_slots=business_hours_slots, ) diff_time_recordings[(act_i, act_j)].append( bh.get_seconds() ) else: diff_time_recordings[(act_i, act_j)].append( time_j - time_i ) temporal_profile = {} for ac in diff_time_recordings: if len(diff_time_recordings[ac]) > 1: temporal_profile[ac] = ( mean(diff_time_recordings[ac]), stdev(diff_time_recordings[ac]), ) else: temporal_profile[ac] = (diff_time_recordings[ac][0], 0) return temporal_profile