Source code for pm4py.algo.concept_drift.algorithm
'''
PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.
Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from enum import Enum
from pm4py.algo.concept_drift.variants import bose
from pm4py.util import exec_utils
import pandas as pd
from pm4py.objects.log.obj import EventLog
from typing import Union, Dict, Any, Optional, Tuple, List
[docs]
class Variants(Enum):
BOSE = bose
[docs]
def apply(log: Union[EventLog, pd.DataFrame], variant=Variants.BOSE, parameters: Optional[Dict[Any, Any]] = None) -> \
Tuple[List[pd.DataFrame], List[int], List[float]]:
"""
Parameters
--------------
log
Event log or Pandas dataframe
variant
Variant of the algorithm (available: Variants.BOSE)
parameters
Variant-specific parameters
Returns
---------------
returned_sublogs : List[EventLog]
A list of sub-logs, where each sub-log is an EventLog object representing the cumulative segment of the original event log from the start up to each detected change point (and the final sub-log up to the end). Note: Due to a potential implementation issue, these sub-logs are not segments between change points but rather cumulative logs up to each change point.
change_timestamps : List[float]
A list of timestamps where concept drifts are detected. Each timestamp corresponds to the start time of the first trace in the sub-log where a change point occurs, based on case start timestamps.
p_values : List[float]
A list of p-values associated with each detected change point, indicating the statistical significance of the drift (lower values suggest stronger evidence of a change).
"""
return exec_utils.get_variant(variant).apply(log, parameters)