Source code for pm4py.objects.log.util.pandas_log_wrapper

import pandas as pd
from typing import Optional, Dict, Any
from pm4py.util import constants, exec_utils
from enum import Enum
from collections.abc import Sequence


[docs] class Parameters(Enum): CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY CASE_ATTRIBUTE_PREFIX = constants.CASE_ATTRIBUTE_PREFIX
[docs] class PandasTraceWrapper(Sequence): def __init__( self, dataframe: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None, ): if parameters is None: parameters = {} self.parameters = parameters self.dataframe = dataframe self.case_attribute_prefix = exec_utils.get_param_value( Parameters.CASE_ATTRIBUTE_PREFIX, parameters, constants.CASE_ATTRIBUTE_PREFIX, ) self.attributes = self.dataframe.loc[0].to_dict() self.attributes = { x.split(self.case_attribute_prefix)[-1]: y for x, y in self.attributes.items() if x.startswith(self.case_attribute_prefix) } def __getitem__(self, key): if type(key) is slice: start = key.start % len(self.dataframe) stop = key.stop % len(self.dataframe) sli = slice(start, stop - 1, key.step) return self.dataframe.loc[sli].to_dict("records") key = key % len(self.dataframe) return self.dataframe.loc[key].to_dict() def __iter__(self): return iter(self.dataframe.to_dict("records")) def __len__(self): return len(self.dataframe) def _get_list(self): return self.dataframe.to_dict("records") _list = property(_get_list)
[docs] class PandasLogWrapper(Sequence): # permits to iterate over a Pandas dataframe and access its Traces object # *without* a conversion to EventLog def __init__( self, dataframe: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None, ): if parameters is None: parameters = {} self.parameters = parameters self.case_id_key = exec_utils.get_param_value( Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME ) self.dataframe = dataframe self.grouped_dataframe = self.dataframe.groupby( self.case_id_key ).groups self.keys = list(self.grouped_dataframe) self.attributes = {} self.extensions = {} self.omni_present = {} self.classifiers = {} self.properties = {} def __getitem__(self, key): if type(key) is slice: start = key.start % len(self.dataframe) stop = key.stop % len(self.dataframe) sli = slice(start, stop, key.step) ret = [] for x in self.keys[sli]: ret.append( PandasTraceWrapper( self.dataframe.loc[self.grouped_dataframe[x]] .copy() .reset_index(), parameters=self.parameters, ) ) return ret key = key % len(self.grouped_dataframe) return PandasTraceWrapper( self.dataframe.loc[self.grouped_dataframe[self.keys[key]]] .copy() .reset_index(), parameters=self.parameters, ) def __iter__(self): for key in self.keys: yield PandasTraceWrapper( self.dataframe.loc[self.grouped_dataframe[key]] .copy() .reset_index(), parameters=self.parameters, ) def __len__(self): return len(self.grouped_dataframe) def _get_list(self): ret = [] for key in self.keys: ret.append( PandasTraceWrapper( self.dataframe.loc[self.grouped_dataframe[key]] .copy() .reset_index(), parameters=self.parameters, ) ) return ret _list = property(_get_list)