Source code for pm4py.algo.querying.llm.abstractions.ocel_fea_descr

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''

from pm4py.objects.ocel.obj import OCEL
from typing import Optional, Dict, Any
from pm4py.util import exec_utils, constants
from enum import Enum
import numpy as np
import pandas as pd


[docs] class Parameters(Enum): INCLUDE_HEADER = "include_header" MAX_LEN = "max_len" DEBUG = "debug" ENABLE_OBJECT_LIFECYCLE_PATHS = "enable_object_lifecycle_paths"
def __transform_to_string(stru: str) -> str: if stru.startswith("@@ocel_lif_activity_"): return ( "Number of occurrences of the activity " + stru.split("@@ocel_lif_activity_")[1] ) elif stru.startswith("@@object_lifecycle_unq_act"): return "Number of unique activities in the lifecycle of the object" elif stru.startswith("@@object_lifecycle_length"): return "Number of events in the lifecycle of the object" elif stru.startswith("@@object_lifecycle_duration"): return "Duration of the lifecycle of the object" elif stru.startswith("@@object_lifecycle_start_timestamp"): return "Start timestamp of the lifecycle of the object" elif stru.startswith("@@object_lifecycle_end_timestamp"): return "Completion timestamp of the lifecycle of the object" elif stru.startswith("@@object_degree_centrality"): return ( "Degree centrality of the object in the object interaction graph" ) elif stru.startswith("@@object_general_interaction_graph"): return "Number of objects related in the object interaction graph" elif stru.startswith("@@object_general_descendants_graph_descendants"): return "Number of objects which follow the current object in the object descendants graph" elif stru.startswith("@@object_general_inheritance_graph_ascendants"): return "Number of objects which follow the current object in the object inheritance graph" elif stru.startswith("@@object_general_descendants_graph_ascendants"): return "Number of objects which precede the current object in the object descendants graph" elif stru.startswith("@@object_general_inheritance_graph_descendants"): return "Number of objects which precede the current object in the object descendants graph" elif stru.startswith("@@object_cobirth"): return "Number of objects starting their lifecycle together with the current object" elif stru.startswith("@@object_codeath"): return "Number of objects ending their lifecycle together with the current object" elif stru.startswith("@@object_interaction_graph_"): return ( "Number of object of type " + stru.split("@@object_interaction_graph_")[1] + " related to the current object in the object interaction graph" ) elif stru.startswith("@@ocel_lif_path_"): path = stru.split("@@ocel_lif_path_")[1] act1 = path.split("##")[0] act2 = path.split("##")[1] return ( 'Frequency of the path "' + act1 + '" -> "' + act2 + '" in the lifecycle of the object' ) return stru
[docs] def textual_abstraction_from_fea_df( fea_df: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None ) -> str: if parameters is None: parameters = {} include_header = exec_utils.get_param_value( Parameters.INCLUDE_HEADER, parameters, True ) max_len = exec_utils.get_param_value( Parameters.MAX_LEN, parameters, constants.OPENAI_MAX_LEN ) cols = [] for c in fea_df.columns: ser = fea_df[c] ser1 = ser[ser > 0] if len(ser1) > 0: desc = __transform_to_string(c) avg = np.average(ser1) stdavg = 0 if avg == 0 or len(ser1) == 1 else np.std(ser1) / avg cols.append([desc, len(ser1), stdavg, ser1]) cols = sorted(cols, key=lambda x: (x[1], x[2], x[0]), reverse=True) ret = ["\n"] if include_header: ret.append("Beforehand, a bit of notions.") ret.append( "Given an object-centric event log, the object interaction graph connects objects that are related in at least an event." ) ret.append( "The object descendants graph connects objects related in at least an event, when the lifecycle of the second object starts after the lifecycle of the first." ) ret.append( "The object inheritance graph connects objects when there an event that ends the lifecycle of the first object and starts the lifecycle of the second one." ) ret.append("\n\n") ret.append("Given the following features:\n\n") ret = " ".join(ret) i = 0 while i < len(cols): if len(ret) >= max_len: break stru = ( cols[i][0] + ": number of non-zero values: " + str(cols[i][1]) + " ; quantiles of the non-zero: " + str(cols[i][3].quantile([0.0, 0.25, 0.5, 0.75, 1.0]).to_dict()) + "\n" ) ret = ret + stru i = i + 1 return ret
[docs] def apply( ocel: OCEL, obj_type: str, parameters: Optional[Dict[Any, Any]] = None ) -> str: if parameters is None: parameters = {} debug = exec_utils.get_param_value(Parameters.DEBUG, parameters, True) enable_object_lifecycle_paths = exec_utils.get_param_value( Parameters.ENABLE_OBJECT_LIFECYCLE_PATHS, parameters, False ) import pm4py fea_df = pm4py.extract_ocel_features( ocel, obj_type, include_obj_id=False, debug=debug, enable_object_lifecycle_paths=enable_object_lifecycle_paths, ) return textual_abstraction_from_fea_df(fea_df, parameters=parameters)