Source code for pm4py.algo.discovery.ocel.interleavings.variants.timestamp_interleavings

'''
    PM4Py â€“ A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschrÃ¤nkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from pm4py.algo.discovery.ocel.interleavings.utils import (
    merge_dataframe_rel_cases,
)
import pandas as pd
from typing import Optional, Dict, Any
from pm4py.util import exec_utils, constants, xes_constants, pandas_utils
from enum import Enum



[docs]
class Parameters(Enum):
    ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
    TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY
    CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
    LEFT_SUFFIX = "left_suffix"
    RIGHT_SUFFIX = "right_suffix"
    INDEX_KEY = "index_key"
    SOURCE_ACTIVITY = "source_activity_param"
    TARGET_ACTIVITY = "target_activity_param"
    SOURCE_TIMESTAMP = "source_timestamp_param"
    TARGET_TIMESTAMP = "target_timestamp_param"
    LEFT_INDEX = "left_index_param"
    RIGHT_INDEX = "right_index_param"
    DIRECTION = "direction_param"
    TIMESTAMP_DIFF = "timestamp_diff"




[docs]
def apply(
    left_df: pd.DataFrame,
    right_df: pd.DataFrame,
    case_relations: pd.DataFrame,
    parameters: Optional[Dict[Any, Any]] = None,
):
    """
    Calculates the timestamp-based interleavings ongoing from the left/right to the right/left dataframe.

    Parameters
    ------------------
    left_df
        Left dataframe
    right_df
        Right dataframe
    case_relations
        Dictionary associating the cases of the first dataframe (column: case:concept:name_LEFT) to the
        cases of the second dataframe (column: case:concept:name_RIGHT)
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY => the attribute to use as activity
        - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp
        - Parameters.CASE_ID_KEY => the attribute to use as case identifier
        - Parameters.LEFT_SUFFIX => the suffix for the columns of the left dataframe
        - Parameters.RIGHT_SUFFIX => the suffix for the columns of the right dataframe
        - Parameters.INDEX_KEY => the index column in the dataframe
        - Parameters.SOURCE_ACTIVITY => the source activity of the interleaving
        - Parameters.TARGET_ACTIVITY => the target activity of the interleaving
        - Parameters.SOURCE_TIMESTAMP => the source timestamp of the interleaving
        - Parameters.TARGET_TIMESTAMP => the target timestamp of the interleaving
        - Parameters.LEFT_INDEX => the index of the event of the left-dataframe in the interleaving
        - Parameters.RIGHT_INDEX => the index of the event of the right-dataframe in the interleaving
        - Parameters.DIRECTION => the direction of the interleaving (LR: left to right; RL: right to left)
        - Parameters.TIMESTAMP_DIFF => the difference between the timestamps of the interleaving

    Returns
    -----------------
    interleavings_dataframe
        Sorted interleaving dataframe
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY,
        parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY,
    )
    index_key = exec_utils.get_param_value(
        Parameters.INDEX_KEY, parameters, constants.DEFAULT_INDEX_KEY
    )
    activity_key = exec_utils.get_param_value(
        Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY
    )
    left_suffix = exec_utils.get_param_value(
        Parameters.LEFT_SUFFIX, parameters, "_LEFT"
    )
    right_suffix = exec_utils.get_param_value(
        Parameters.RIGHT_SUFFIX, parameters, "_RIGHT"
    )
    source_activity = exec_utils.get_param_value(
        Parameters.SOURCE_ACTIVITY, parameters, "@@source_activity"
    )
    target_activity = exec_utils.get_param_value(
        Parameters.TARGET_ACTIVITY, parameters, "@@target_activity"
    )
    source_timestamp = exec_utils.get_param_value(
        Parameters.SOURCE_TIMESTAMP, parameters, "@@source_timestamp"
    )
    target_timestamp = exec_utils.get_param_value(
        Parameters.TARGET_TIMESTAMP, parameters, "@@target_timestamp"
    )
    direction = exec_utils.get_param_value(
        Parameters.DIRECTION, parameters, "@@direction"
    )
    timestamp_diff = exec_utils.get_param_value(
        Parameters.TIMESTAMP_DIFF, parameters, "@@timestamp_diff"
    )
    left_index = exec_utils.get_param_value(
        Parameters.LEFT_INDEX, parameters, "@@left_index"
    )
    right_index = exec_utils.get_param_value(
        Parameters.RIGHT_INDEX, parameters, "@@right_index"
    )

    md = merge_dataframe_rel_cases.merge_dataframes(
        left_df, right_df, case_relations, parameters=parameters
    )

    df1 = md[
        md[timestamp_key + left_suffix] < md[timestamp_key + right_suffix]
    ]
    df1 = df1[
        df1[timestamp_key + right_suffix]
        < df1[timestamp_key + "_2" + left_suffix]
    ]
    df1 = df1[
        df1[timestamp_key + "_2" + left_suffix]
        < df1[timestamp_key + "_2" + right_suffix]
    ]
    df1[source_activity] = df1[activity_key + left_suffix]
    df1[target_activity] = df1[activity_key + right_suffix]
    df1[source_timestamp] = df1[timestamp_key + left_suffix]
    df1[target_timestamp] = df1[timestamp_key + right_suffix]
    df1[left_index] = df1[index_key + left_suffix]
    df1[right_index] = df1[index_key + right_suffix]
    df1[direction] = "LR"

    df2 = md[
        md[timestamp_key + right_suffix] < md[timestamp_key + left_suffix]
    ]
    df2 = df2[
        df2[timestamp_key + left_suffix]
        < df2[timestamp_key + "_2" + right_suffix]
    ]
    df2 = df2[
        df2[timestamp_key + "_2" + right_suffix]
        < df2[timestamp_key + "_2" + left_suffix]
    ]
    df2[source_activity] = df2[activity_key + "_2" + right_suffix]
    df2[target_activity] = df2[activity_key + "_2" + left_suffix]
    df2[source_timestamp] = df2[timestamp_key + "_2" + right_suffix]
    df2[target_timestamp] = df2[timestamp_key + "_2" + left_suffix]
    df2[left_index] = df2[index_key + "_2" + left_suffix]
    df2[right_index] = df2[index_key + "_2" + right_suffix]
    df2[direction] = "RL"

    md = pandas_utils.concat([df1, df2])
    md = md.sort_values([index_key + left_suffix, index_key + right_suffix])
    md[timestamp_diff] = pandas_utils.get_total_seconds(
        md[target_timestamp] - md[source_timestamp]
    )

    return md