'''
PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.
Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from pm4py.algo.discovery.ocel.interleavings.utils import (
merge_dataframe_rel_cases,
)
import pandas as pd
from typing import Optional, Dict, Any
from pm4py.util import exec_utils, constants, xes_constants, pandas_utils
from enum import Enum
[docs]
class Parameters(Enum):
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY
CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
LEFT_SUFFIX = "left_suffix"
RIGHT_SUFFIX = "right_suffix"
INDEX_KEY = "index_key"
SOURCE_ACTIVITY = "source_activity_param"
TARGET_ACTIVITY = "target_activity_param"
SOURCE_TIMESTAMP = "source_timestamp_param"
TARGET_TIMESTAMP = "target_timestamp_param"
LEFT_INDEX = "left_index_param"
RIGHT_INDEX = "right_index_param"
DIRECTION = "direction_param"
TIMESTAMP_DIFF = "timestamp_diff"
[docs]
def apply(
left_df: pd.DataFrame,
right_df: pd.DataFrame,
case_relations: pd.DataFrame,
parameters: Optional[Dict[Any, Any]] = None,
):
"""
Calculates the timestamp-based interleavings ongoing from the left/right to the right/left dataframe.
Parameters
------------------
left_df
Left dataframe
right_df
Right dataframe
case_relations
Dictionary associating the cases of the first dataframe (column: case:concept:name_LEFT) to the
cases of the second dataframe (column: case:concept:name_RIGHT)
parameters
Parameters of the algorithm, including:
- Parameters.ACTIVITY_KEY => the attribute to use as activity
- Parameters.TIMESTAMP_KEY => the attribute to use as timestamp
- Parameters.CASE_ID_KEY => the attribute to use as case identifier
- Parameters.LEFT_SUFFIX => the suffix for the columns of the left dataframe
- Parameters.RIGHT_SUFFIX => the suffix for the columns of the right dataframe
- Parameters.INDEX_KEY => the index column in the dataframe
- Parameters.SOURCE_ACTIVITY => the source activity of the interleaving
- Parameters.TARGET_ACTIVITY => the target activity of the interleaving
- Parameters.SOURCE_TIMESTAMP => the source timestamp of the interleaving
- Parameters.TARGET_TIMESTAMP => the target timestamp of the interleaving
- Parameters.LEFT_INDEX => the index of the event of the left-dataframe in the interleaving
- Parameters.RIGHT_INDEX => the index of the event of the right-dataframe in the interleaving
- Parameters.DIRECTION => the direction of the interleaving (LR: left to right; RL: right to left)
- Parameters.TIMESTAMP_DIFF => the difference between the timestamps of the interleaving
Returns
-----------------
interleavings_dataframe
Sorted interleaving dataframe
"""
if parameters is None:
parameters = {}
timestamp_key = exec_utils.get_param_value(
Parameters.TIMESTAMP_KEY,
parameters,
xes_constants.DEFAULT_TIMESTAMP_KEY,
)
index_key = exec_utils.get_param_value(
Parameters.INDEX_KEY, parameters, constants.DEFAULT_INDEX_KEY
)
activity_key = exec_utils.get_param_value(
Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY
)
left_suffix = exec_utils.get_param_value(
Parameters.LEFT_SUFFIX, parameters, "_LEFT"
)
right_suffix = exec_utils.get_param_value(
Parameters.RIGHT_SUFFIX, parameters, "_RIGHT"
)
source_activity = exec_utils.get_param_value(
Parameters.SOURCE_ACTIVITY, parameters, "@@source_activity"
)
target_activity = exec_utils.get_param_value(
Parameters.TARGET_ACTIVITY, parameters, "@@target_activity"
)
source_timestamp = exec_utils.get_param_value(
Parameters.SOURCE_TIMESTAMP, parameters, "@@source_timestamp"
)
target_timestamp = exec_utils.get_param_value(
Parameters.TARGET_TIMESTAMP, parameters, "@@target_timestamp"
)
direction = exec_utils.get_param_value(
Parameters.DIRECTION, parameters, "@@direction"
)
timestamp_diff = exec_utils.get_param_value(
Parameters.TIMESTAMP_DIFF, parameters, "@@timestamp_diff"
)
left_index = exec_utils.get_param_value(
Parameters.LEFT_INDEX, parameters, "@@left_index"
)
right_index = exec_utils.get_param_value(
Parameters.RIGHT_INDEX, parameters, "@@right_index"
)
md = merge_dataframe_rel_cases.merge_dataframes(
left_df, right_df, case_relations, parameters=parameters
)
df1 = md[
md[timestamp_key + left_suffix] < md[timestamp_key + right_suffix]
]
df1 = df1[
df1[timestamp_key + right_suffix]
< df1[timestamp_key + "_2" + left_suffix]
]
df1 = df1[
df1[timestamp_key + "_2" + left_suffix]
< df1[timestamp_key + "_2" + right_suffix]
]
df1[source_activity] = df1[activity_key + left_suffix]
df1[target_activity] = df1[activity_key + right_suffix]
df1[source_timestamp] = df1[timestamp_key + left_suffix]
df1[target_timestamp] = df1[timestamp_key + right_suffix]
df1[left_index] = df1[index_key + left_suffix]
df1[right_index] = df1[index_key + right_suffix]
df1[direction] = "LR"
df2 = md[
md[timestamp_key + right_suffix] < md[timestamp_key + left_suffix]
]
df2 = df2[
df2[timestamp_key + left_suffix]
< df2[timestamp_key + "_2" + right_suffix]
]
df2 = df2[
df2[timestamp_key + "_2" + right_suffix]
< df2[timestamp_key + "_2" + left_suffix]
]
df2[source_activity] = df2[activity_key + "_2" + right_suffix]
df2[target_activity] = df2[activity_key + "_2" + left_suffix]
df2[source_timestamp] = df2[timestamp_key + "_2" + right_suffix]
df2[target_timestamp] = df2[timestamp_key + "_2" + left_suffix]
df2[left_index] = df2[index_key + "_2" + left_suffix]
df2[right_index] = df2[index_key + "_2" + right_suffix]
df2[direction] = "RL"
md = pandas_utils.concat([df1, df2])
md = md.sort_values([index_key + left_suffix, index_key + right_suffix])
md[timestamp_diff] = pandas_utils.get_total_seconds(
md[target_timestamp] - md[source_timestamp]
)
return md