Source code for pm4py.objects.log.util.pandas_log_wrapper
'''
PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.
Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
import pandas as pd
from typing import Optional, Dict, Any
from pm4py.util import constants, exec_utils
from enum import Enum
from collections.abc import Sequence
[docs]
class Parameters(Enum):
CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
CASE_ATTRIBUTE_PREFIX = constants.CASE_ATTRIBUTE_PREFIX
[docs]
class PandasTraceWrapper(Sequence):
def __init__(
self,
dataframe: pd.DataFrame,
parameters: Optional[Dict[Any, Any]] = None,
):
if parameters is None:
parameters = {}
self.parameters = parameters
self.dataframe = dataframe
self.case_attribute_prefix = exec_utils.get_param_value(
Parameters.CASE_ATTRIBUTE_PREFIX,
parameters,
constants.CASE_ATTRIBUTE_PREFIX,
)
self.attributes = self.dataframe.loc[0].to_dict()
self.attributes = {
x.split(self.case_attribute_prefix)[-1]: y
for x, y in self.attributes.items()
if x.startswith(self.case_attribute_prefix)
}
def __getitem__(self, key):
if type(key) is slice:
start = key.start % len(self.dataframe)
stop = key.stop % len(self.dataframe)
sli = slice(start, stop - 1, key.step)
return self.dataframe.loc[sli].to_dict("records")
key = key % len(self.dataframe)
return self.dataframe.loc[key].to_dict()
def __iter__(self):
return iter(self.dataframe.to_dict("records"))
def __len__(self):
return len(self.dataframe)
def _get_list(self):
return self.dataframe.to_dict("records")
_list = property(_get_list)
[docs]
class PandasLogWrapper(Sequence):
# permits to iterate over a Pandas dataframe and access its Traces object
# *without* a conversion to EventLog
def __init__(
self,
dataframe: pd.DataFrame,
parameters: Optional[Dict[Any, Any]] = None,
):
if parameters is None:
parameters = {}
self.parameters = parameters
self.case_id_key = exec_utils.get_param_value(
Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME
)
self.dataframe = dataframe
self.grouped_dataframe = self.dataframe.groupby(
self.case_id_key
).groups
self.keys = list(self.grouped_dataframe)
self.attributes = {}
self.extensions = {}
self.omni_present = {}
self.classifiers = {}
self.properties = {}
def __getitem__(self, key):
if type(key) is slice:
start = key.start % len(self.dataframe)
stop = key.stop % len(self.dataframe)
sli = slice(start, stop, key.step)
ret = []
for x in self.keys[sli]:
ret.append(
PandasTraceWrapper(
self.dataframe.loc[self.grouped_dataframe[x]]
.copy()
.reset_index(),
parameters=self.parameters,
)
)
return ret
key = key % len(self.grouped_dataframe)
return PandasTraceWrapper(
self.dataframe.loc[self.grouped_dataframe[self.keys[key]]]
.copy()
.reset_index(),
parameters=self.parameters,
)
def __iter__(self):
for key in self.keys:
yield PandasTraceWrapper(
self.dataframe.loc[self.grouped_dataframe[key]]
.copy()
.reset_index(),
parameters=self.parameters,
)
def __len__(self):
return len(self.grouped_dataframe)
def _get_list(self):
ret = []
for key in self.keys:
ret.append(
PandasTraceWrapper(
self.dataframe.loc[self.grouped_dataframe[key]]
.copy()
.reset_index(),
parameters=self.parameters,
)
)
return ret
_list = property(_get_list)