Source code for pm4py.objects.ocel.obj

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from enum import Enum

from pm4py.objects.ocel import constants
from pm4py.util import exec_utils, pandas_utils
import pandas as pd
import numpy as np
from collections import Counter
from copy import copy, deepcopy


[docs] class Parameters(Enum): EVENT_ID = constants.PARAM_EVENT_ID EVENT_ACTIVITY = constants.PARAM_EVENT_ACTIVITY EVENT_TIMESTAMP = constants.PARAM_EVENT_TIMESTAMP OBJECT_ID = constants.PARAM_OBJECT_ID OBJECT_TYPE = constants.PARAM_OBJECT_TYPE QUALIFIER = constants.PARAM_QUALIFIER CHANGED_FIELD = constants.PARAM_CHNGD_FIELD
[docs] class OCEL(object): def __init__( self, events=None, objects=None, relations=None, globals=None, parameters=None, o2o=None, e2e=None, object_changes=None, ): if parameters is None: parameters = {} self.event_id_column = exec_utils.get_param_value( Parameters.EVENT_ID, parameters, constants.DEFAULT_EVENT_ID ) self.object_id_column = exec_utils.get_param_value( Parameters.OBJECT_ID, parameters, constants.DEFAULT_OBJECT_ID ) self.object_type_column = exec_utils.get_param_value( Parameters.OBJECT_TYPE, parameters, constants.DEFAULT_OBJECT_TYPE ) self.event_activity = exec_utils.get_param_value( Parameters.EVENT_ACTIVITY, parameters, constants.DEFAULT_EVENT_ACTIVITY, ) self.event_timestamp = exec_utils.get_param_value( Parameters.EVENT_TIMESTAMP, parameters, constants.DEFAULT_EVENT_TIMESTAMP, ) self.qualifier = exec_utils.get_param_value( Parameters.QUALIFIER, parameters, constants.DEFAULT_QUALIFIER ) self.changed_field = exec_utils.get_param_value( Parameters.CHANGED_FIELD, parameters, constants.DEFAULT_CHNGD_FIELD ) if events is None: events = pandas_utils.instantiate_dataframe( { self.event_id_column: [], self.event_activity: [], self.event_timestamp: [], } ) if objects is None: objects = pandas_utils.instantiate_dataframe( {self.object_id_column: [], self.object_type_column: []} ) if relations is None: relations = pandas_utils.instantiate_dataframe( { self.event_id_column: [], self.event_activity: [], self.event_timestamp: [], self.object_id_column: [], self.object_type_column: [], } ) if globals is None: globals = {} if o2o is None: o2o = pandas_utils.instantiate_dataframe( { self.object_id_column: [], self.object_id_column + "_2": [], self.qualifier: [], } ) if e2e is None: e2e = pandas_utils.instantiate_dataframe( { self.event_id_column: [], self.event_id_column + "_2": [], self.qualifier: [], } ) if object_changes is None: object_changes = pandas_utils.instantiate_dataframe( { self.object_id_column: [], self.object_type_column: [], self.event_timestamp: [], self.changed_field: [], } ) if self.qualifier not in relations: relations[self.qualifier] = [None] * len(relations) self.events = events self.objects = objects self.relations = relations self.globals = globals self.o2o = o2o self.e2e = e2e self.object_changes = object_changes self.parameters = parameters
[docs] def get_extended_table( self, ot_prefix=constants.DEFAULT_OBJECT_TYPE_PREFIX_EXTENDED ) -> pd.DataFrame: """ Transforms the current OCEL data structure into a Pandas dataframe containing the events with their attributes and the related objects per object type. """ object_types = pandas_utils.format_unique( self.relations[self.object_type_column].unique() ) table = self.events.copy().set_index(self.event_id_column) for ot in object_types: table[ot_prefix + ot] = ( self.relations[self.relations[self.object_type_column] == ot] .groupby(self.event_id_column)[self.object_id_column] .agg(list) ) table = table.reset_index() return table
[docs] def get_summary(self) -> str: """ Gets a string summary of the object-centric event log """ ret = [] ret.append("Object-Centric Event Log (") ret.append("number of events: %d" % (len(self.events))) ret.append(", number of objects: %d" % (len(self.objects))) ret.append( ", number of activities: %d" % (self.events[self.event_activity].nunique()) ) ret.append( ", number of object types: %d" % (self.objects[self.object_type_column].nunique()) ) ret.append( ", events-objects relationships: %d)" % (len(self.relations)) ) ret.append("\n") ret.append( "Activities occurrences: " + str( Counter( self.events[self.event_activity].value_counts().to_dict() ) ) ) ret.append("\n") ret.append( "Object types occurrences (number of objects): " + str( Counter( self.objects[self.object_type_column] .value_counts() .to_dict() ) ) ) ret.append("\n") ret.append( "Unique activities per object type: " + str( Counter( self.relations.groupby(self.object_type_column)[ self.event_activity ] .nunique() .to_dict() ) ) ) ret.append("\n") ret.append( "Please use <THIS>.get_extended_table() to get a dataframe representation of the events related to the objects." ) return "".join(ret)
[docs] def is_ocel20(self): unique_qualifiers = [] if self.qualifier in self.relations.columns: unique_qualifiers = [ x for x in pandas_utils.format_unique( self.relations[self.qualifier].unique() ) if not self.__check_is_nan(x) ] return ( len(self.o2o) > 0 or len(self.object_changes) > 0 or len(unique_qualifiers) > 0 )
def __check_is_nan(self, x): try: if x is None: return True if np.isnan(x): return True except BaseException: return False def __str__(self): return str(self.get_summary()) def __repr__(self): return str(self.get_summary()) def __copy__(self): return OCEL( self.events, self.objects, self.relations, copy(self.globals), copy(self.parameters), copy(self.o2o), copy(self.e2e), copy(self.object_changes), ) def __deepcopy__(self, memo): return OCEL( self.events.copy(), self.objects.copy(), self.relations.copy(), deepcopy(self.globals), deepcopy(self.parameters), deepcopy(self.o2o), deepcopy(self.e2e), deepcopy(self.object_changes), ) def __eq__(self, other): if not isinstance(other, OCEL): return False return ( self.event_id_column == other.event_id_column and self.object_id_column == other.object_id_column and self.object_type_column == other.object_type_column and self.event_activity == other.event_activity and self.event_timestamp == other.event_timestamp and self.qualifier == other.qualifier and self.changed_field == other.changed_field and self.events.equals(other.events) and self.objects.equals(other.objects) and self.relations.equals(other.relations) and self.globals == other.globals and self.o2o.equals(other.o2o) and self.e2e.equals(other.e2e) and self.object_changes.equals(other.object_changes) and self.parameters == other.parameters ) def __hash__(self): return hash( ( self.event_id_column, self.object_id_column, self.object_type_column, self.event_activity, self.event_timestamp, self.qualifier, self.changed_field, self.events.to_string(), self.objects.to_string(), self.relations.to_string(), self.o2o.to_string(), self.e2e.to_string(), self.object_changes.to_string(), ) )