Source code for pm4py.objects.ocel.util.sampling

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from enum import Enum
from pm4py.util import exec_utils, pandas_utils
from pm4py.objects.ocel import constants
import random
from pm4py.objects.ocel.util import filtering_utils
from copy import copy
from pm4py.objects.ocel.obj import OCEL
from typing import Optional, Dict, Any


[docs] class Parameters(Enum): OBJECT_ID = constants.PARAM_OBJECT_ID EVENT_ID = constants.PARAM_EVENT_ID NUM_ENTITIES = "num_entities"
[docs] def sample_ocel_events( ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None ) -> OCEL: """ Keeps a sample of the events of an object-centric event log Parameters ------------------ ocel Object-centric event log parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => event identifier - Parameters.NUM_ENTITIES => number of events Returns ------------------ sampled_ocel Sampled object-centric event log """ if parameters is None: parameters = {} event_id_column = exec_utils.get_param_value( Parameters.EVENT_ID, parameters, ocel.event_id_column ) num_entities = exec_utils.get_param_value( Parameters.NUM_ENTITIES, parameters, 100 ) events = pandas_utils.format_unique(ocel.events[event_id_column].unique()) num_events = min(len(events), num_entities) random.shuffle(events) picked_events = events[:num_events] ocel = copy(ocel) ocel.events = ocel.events[ocel.events[event_id_column].isin(picked_events)] return filtering_utils.propagate_event_filtering( ocel, parameters=parameters )
[docs] def sample_ocel_objects( ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None ) -> OCEL: """ Random samples the objects of the object-centric event log. Then, only the events related to at least one of these objects are filtered from the event log. As a note, the relationships between the different objects are probably going to be ruined by this sampling. Parameters ----------------- ocel Object-centric event log parameters Parameters of the algorithm, including: - Parameters.OBJECT_ID => object identifier - Parameters.NUM_ENTITIES => number of objects to retain Returns ---------------- sampled_ocel Sampled object-centric event log """ if parameters is None: parameters = {} object_id_column = exec_utils.get_param_value( Parameters.OBJECT_ID, parameters, ocel.object_id_column ) num_entities = exec_utils.get_param_value( Parameters.NUM_ENTITIES, parameters, 100 ) objects = pandas_utils.format_unique( ocel.objects[object_id_column].unique() ) num_objects = min(len(objects), num_entities) random.shuffle(objects) picked_objects = objects[:num_objects] ocel = copy(ocel) ocel.objects = ocel.objects[ ocel.objects[object_id_column].isin(picked_objects) ] return filtering_utils.propagate_object_filtering( ocel, parameters=parameters )