Source code for pm4py.objects.log.util.pandas_log_wrapper

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
import pandas as pd
from typing import Optional, Dict, Any
from pm4py.util import constants, exec_utils
from enum import Enum
from collections.abc import Sequence



[docs]
class Parameters(Enum):
    CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
    CASE_ATTRIBUTE_PREFIX = constants.CASE_ATTRIBUTE_PREFIX




[docs]
class PandasTraceWrapper(Sequence):
    def __init__(
        self,
        dataframe: pd.DataFrame,
        parameters: Optional[Dict[Any, Any]] = None,
    ):
        if parameters is None:
            parameters = {}

        self.parameters = parameters
        self.dataframe = dataframe
        self.case_attribute_prefix = exec_utils.get_param_value(
            Parameters.CASE_ATTRIBUTE_PREFIX,
            parameters,
            constants.CASE_ATTRIBUTE_PREFIX,
        )

        self.attributes = self.dataframe.loc[0].to_dict()
        self.attributes = {
            x.split(self.case_attribute_prefix)[-1]: y
            for x, y in self.attributes.items()
            if x.startswith(self.case_attribute_prefix)
        }

    def __getitem__(self, key):
        if type(key) is slice:
            start = key.start % len(self.dataframe)
            stop = key.stop % len(self.dataframe)
            sli = slice(start, stop - 1, key.step)
            return self.dataframe.loc[sli].to_dict("records")
        key = key % len(self.dataframe)
        return self.dataframe.loc[key].to_dict()

    def __iter__(self):
        return iter(self.dataframe.to_dict("records"))

    def __len__(self):
        return len(self.dataframe)

    def _get_list(self):
        return self.dataframe.to_dict("records")

    _list = property(_get_list)




[docs]
class PandasLogWrapper(Sequence):
    # permits to iterate over a Pandas dataframe and access its Traces object
    # *without* a conversion to EventLog
    def __init__(
        self,
        dataframe: pd.DataFrame,
        parameters: Optional[Dict[Any, Any]] = None,
    ):
        if parameters is None:
            parameters = {}

        self.parameters = parameters
        self.case_id_key = exec_utils.get_param_value(
            Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME
        )
        self.dataframe = dataframe

        self.grouped_dataframe = self.dataframe.groupby(
            self.case_id_key
        ).groups
        self.keys = list(self.grouped_dataframe)

        self.attributes = {}
        self.extensions = {}
        self.omni_present = {}
        self.classifiers = {}
        self.properties = {}

    def __getitem__(self, key):
        if type(key) is slice:
            start = key.start % len(self.dataframe)
            stop = key.stop % len(self.dataframe)
            sli = slice(start, stop, key.step)
            ret = []
            for x in self.keys[sli]:
                ret.append(
                    PandasTraceWrapper(
                        self.dataframe.loc[self.grouped_dataframe[x]]
                        .copy()
                        .reset_index(),
                        parameters=self.parameters,
                    )
                )
            return ret
        key = key % len(self.grouped_dataframe)
        return PandasTraceWrapper(
            self.dataframe.loc[self.grouped_dataframe[self.keys[key]]]
            .copy()
            .reset_index(),
            parameters=self.parameters,
        )

    def __iter__(self):
        for key in self.keys:
            yield PandasTraceWrapper(
                self.dataframe.loc[self.grouped_dataframe[key]]
                .copy()
                .reset_index(),
                parameters=self.parameters,
            )

    def __len__(self):
        return len(self.grouped_dataframe)

    def _get_list(self):
        ret = []
        for key in self.keys:
            ret.append(
                PandasTraceWrapper(
                    self.dataframe.loc[self.grouped_dataframe[key]]
                    .copy()
                    .reset_index(),
                    parameters=self.parameters,
                )
            )
        return ret

    _list = property(_get_list)