'''
PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.
Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
import pandas as pd
from pm4py.objects.log.obj import EventLog, EventStream, Trace
from typing import Union, Optional, Dict, Tuple, List, Any
from pm4py.utils import get_properties, constants, check_is_pandas_dataframe
from pm4py.utils import __event_log_deprecation_warning
from pm4py.objects.ocel.obj import OCEL
from tempfile import NamedTemporaryFile
from copy import copy
from pm4py.objects.petri_net.obj import PetriNet, Marking
[docs]
def openai_query(prompt: str, api_key: Optional[str] = None, openai_model: Optional[str] = None, api_url: Optional[str] = None, **kwargs) -> str:
"""
Executes the provided prompt, obtaining the answer from the OpenAI APIs.
:param prompt: The prompt to be executed.
:param api_key: (Optional) OpenAI API key.
:param openai_model: (Optional) OpenAI model to be used (default: "gpt-3.5-turbo").
:param api_url: (Optional) OpenAI API URL.
:param **kwargs: Additional parameters to pass to the OpenAI API.
:return: The response from the OpenAI API as a string.
.. code-block:: python3
import pm4py
resp = pm4py.llm.openai_query('What is the result of 3+3?', api_key="sk-382393", openai_model="gpt-3.5-turbo")
print(resp)
"""
parameters = copy(kwargs) if kwargs is not None else {}
if api_url is not None:
parameters["api_url"] = api_url
if api_key is not None:
parameters["api_key"] = api_key
if openai_model is not None:
parameters["openai_model"] = openai_model
from pm4py.algo.querying.llm.connectors import openai as perform_query
return perform_query.apply(prompt, parameters=parameters)
[docs]
def abstract_dfg(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, include_performance: bool = True, relative_frequency: bool = False, response_header: bool = True, primary_performance_aggregation: str = "mean", secondary_performance_aggregation: Optional[str] = None, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Obtains the DFG (Directly-Follows Graph) abstraction of a traditional event log.
:param log_obj: The log object to abstract.
:param max_len: Maximum length of the string abstraction (default: constants.OPENAI_MAX_LEN).
:param include_performance: Whether to include the performance of the paths in the abstraction.
:param relative_frequency: Whether to use relative instead of absolute frequency of the paths.
:param response_header: Whether to include a short header before the paths, describing the abstraction.
:param primary_performance_aggregation: Primary aggregation method for the arc's performance (default: "mean"). Other options: "median", "min", "max", "sum", "stdev".
:param secondary_performance_aggregation: (Optional) Secondary aggregation method for the arc's performance (default: None). Other options: "mean", "median", "min", "max", "sum", "stdev".
:param activity_key: The column name to be used as activity.
:param timestamp_key: The column name to be used as timestamp.
:param case_id_key: The column name to be used as case identifier.
:return: The DFG abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_dfg(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["include_performance"] = include_performance
parameters["relative_frequency"] = relative_frequency
parameters["response_header"] = response_header
parameters["primary_performance_aggregation"] = primary_performance_aggregation
parameters["secondary_performance_aggregation"] = secondary_performance_aggregation
from pm4py.algo.querying.llm.abstractions import log_to_dfg_descr
return log_to_dfg_descr.apply(log_obj, parameters=parameters)
[docs]
def abstract_variants(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, include_performance: bool = True, relative_frequency: bool = False, response_header: bool = True, primary_performance_aggregation: str = "mean", secondary_performance_aggregation: Optional[str] = None, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Obtains the variants abstraction of a traditional event log.
:param log_obj: The log object to abstract.
:param max_len: Maximum length of the string abstraction (default: constants.OPENAI_MAX_LEN).
:param include_performance: Whether to include the performance of the variants in the abstraction.
:param relative_frequency: Whether to use relative instead of absolute frequency of the variants.
:param response_header: Whether to include a short header before the variants, describing the abstraction.
:param primary_performance_aggregation: Primary aggregation method for the variants' performance (default: "mean"). Other options: "median", "min", "max", "sum", "stdev".
:param secondary_performance_aggregation: (Optional) Secondary aggregation method for the variants' performance (default: None). Other options: "mean", "median", "min", "max", "sum", "stdev".
:param activity_key: The column name to be used as activity.
:param timestamp_key: The column name to be used as timestamp.
:param case_id_key: The column name to be used as case identifier.
:return: The variants abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_variants(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["include_performance"] = include_performance
parameters["relative_frequency"] = relative_frequency
parameters["response_header"] = response_header
parameters["primary_performance_aggregation"] = primary_performance_aggregation
parameters["secondary_performance_aggregation"] = secondary_performance_aggregation
from pm4py.algo.querying.llm.abstractions import log_to_variants_descr
return log_to_variants_descr.apply(log_obj, parameters=parameters)
[docs]
def abstract_ocel(ocel: OCEL, include_timestamps: bool = True) -> str:
"""
Obtains the abstraction of an object-centric event log, including the list of events and the objects of the OCEL.
:param ocel: The object-centric event log to abstract.
:param include_timestamps: Whether to include timestamp information in the abstraction.
:return: The OCEL abstraction as a string.
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel("tests/input_data/ocel/example_log.jsonocel")
print(pm4py.llm.abstract_ocel(ocel))
"""
parameters = {}
parameters["include_timestamps"] = include_timestamps
from pm4py.algo.transformation.ocel.description import algorithm as ocel_description
return ocel_description.apply(ocel, parameters=parameters)
[docs]
def abstract_ocel_ocdfg(ocel: OCEL, include_header: bool = True, include_timestamps: bool = True, max_len: int = constants.OPENAI_MAX_LEN) -> str:
"""
Obtains the abstraction of an object-centric event log, representing the object-centric directly-follows graph in text.
:param ocel: The object-centric event log to abstract.
:param include_header: Whether to include a header in the abstraction.
:param include_timestamps: Whether to include timestamp information in the abstraction.
:param max_len: Maximum length of the abstraction (default: constants.OPENAI_MAX_LEN).
:return: The object-centric DFG abstraction as a string.
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel("tests/input_data/ocel/example_log.jsonocel")
print(pm4py.llm.abstract_ocel_ocdfg(ocel))
"""
parameters = {}
parameters["include_header"] = include_header
parameters["include_timestamps"] = include_timestamps
parameters["max_len"] = max_len
from pm4py.algo.querying.llm.abstractions import ocel_ocdfg_descr
return ocel_ocdfg_descr.apply(ocel, parameters=parameters)
[docs]
def abstract_ocel_features(ocel: OCEL, obj_type: str, include_header: bool = True, max_len: int = constants.OPENAI_MAX_LEN, debug: bool = False, enable_object_lifecycle_paths: bool = True) -> str:
"""
Obtains the abstraction of an object-centric event log, representing the features and their values in text.
:param ocel: The object-centric event log to abstract.
:param obj_type: The object type to consider in feature extraction.
:param include_header: Whether to include a header in the abstraction.
:param max_len: Maximum length of the abstraction (default: constants.OPENAI_MAX_LEN).
:param debug: Enables debugging mode, providing insights into feature extraction steps.
:param enable_object_lifecycle_paths: Enables the "lifecycle paths" feature in the abstraction.
:return: The OCEL features abstraction as a string.
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel("tests/input_data/ocel/example_log.jsonocel")
print(pm4py.llm.abstract_ocel_features(ocel, obj_type="Resource"))
"""
parameters = {}
parameters["include_header"] = include_header
parameters["max_len"] = max_len
parameters["debug"] = debug
parameters["enable_object_lifecycle_paths"] = enable_object_lifecycle_paths
from pm4py.algo.querying.llm.abstractions import ocel_fea_descr
return ocel_fea_descr.apply(ocel, obj_type, parameters=parameters)
[docs]
def abstract_event_stream(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, response_header: bool = True, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Obtains the event stream abstraction of a traditional event log.
:param log_obj: The log object to abstract.
:param max_len: Maximum length of the string abstraction (default: constants.OPENAI_MAX_LEN).
:param response_header: Whether to include a short header before the event stream, describing the abstraction.
:param activity_key: The column name to be used as activity.
:param timestamp_key: The column name to be used as timestamp.
:param case_id_key: The column name to be used as case identifier.
:return: The event stream abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_event_stream(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["response_header"] = response_header
from pm4py.algo.querying.llm.abstractions import stream_to_descr
return stream_to_descr.apply(log_obj, parameters=parameters)
[docs]
def abstract_petri_net(net: PetriNet, im: Marking, fm: Marking, response_header: bool = True) -> str:
"""
Obtains an abstraction of a Petri net.
:param net: The Petri net to abstract.
:param im: The initial marking of the Petri net.
:param fm: The final marking of the Petri net.
:param response_header: Whether to include a header in the abstraction.
:return: The Petri net abstraction as a string.
.. code-block:: python3
import pm4py
net, im, fm = pm4py.read_pnml('tests/input_data/running-example.pnml')
print(pm4py.llm.abstract_petri_net(net, im, fm))
"""
parameters = {}
parameters["response_header"] = response_header
from pm4py.algo.querying.llm.abstractions import net_to_descr
return net_to_descr.apply(net, im, fm, parameters=parameters)
[docs]
def abstract_log_attributes(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Abstracts the attributes of a log by reporting their names, types, and top values.
:param log_obj: The log object whose attributes are to be abstracted.
:param max_len: Maximum length of the string abstraction (default: constants.OPENAI_MAX_LEN).
:param activity_key: The column name to be used as activity.
:param timestamp_key: The column name to be used as timestamp.
:param case_id_key: The column name to be used as case identifier.
:return: The log attributes abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_log_attributes(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
from pm4py.algo.querying.llm.abstractions import log_to_cols_descr
return log_to_cols_descr.apply(log_obj, parameters=parameters)
[docs]
def abstract_log_features(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, include_header: bool = True, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Abstracts the machine learning features obtained from a log by reporting the top features until the desired length is achieved.
:param log_obj: The log object from which to extract features.
:param max_len: Maximum length of the string abstraction (default: constants.OPENAI_MAX_LEN).
:param include_header: Whether to include a header in the abstraction.
:param activity_key: The column name to be used as activity.
:param timestamp_key: The column name to be used as timestamp.
:param case_id_key: The column name to be used as case identifier.
:return: The log features abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_log_features(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import log_to_fea_descr
return log_to_fea_descr.apply(log_obj, parameters=parameters)
[docs]
def abstract_temporal_profile(temporal_profile: Dict[Tuple[str, str], Tuple[float, float]], include_header: bool = True) -> str:
"""
Abstracts a temporal profile model into a descriptive string.
:param temporal_profile: The temporal profile model to abstract.
:param include_header: Whether to include a header in the abstraction describing the temporal profile.
:return: The temporal profile abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
temporal_profile = pm4py.discover_temporal_profile(log)
text_abstr = pm4py.llm.abstract_temporal_profile(temporal_profile, include_header=True)
print(text_abstr)
"""
parameters = {}
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import tempprofile_to_descr
return tempprofile_to_descr.apply(temporal_profile, parameters=parameters)
[docs]
def abstract_case(case: Trace, include_case_attributes: bool = True, include_event_attributes: bool = True, include_timestamp: bool = True, include_header: bool = True, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp") -> str:
"""
Textually abstracts a single case from an event log.
:param case: The case object to abstract.
:param include_case_attributes: Whether to include attributes at the case level.
:param include_event_attributes: Whether to include attributes at the event level.
:param include_timestamp: Whether to include event timestamps in the abstraction.
:param include_header: Whether to include a header in the abstraction.
:param activity_key: The column name to be used as activity.
:param timestamp_key: The column name to be used as timestamp.
:return: The case abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
print(pm4py.llm.abstract_case(log[0]))
"""
parameters = {}
parameters["include_case_attributes"] = include_case_attributes
parameters["include_event_attributes"] = include_event_attributes
parameters["include_timestamp"] = include_timestamp
parameters["include_header"] = include_header
parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key
parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = timestamp_key
from pm4py.algo.querying.llm.abstractions import case_to_descr
return case_to_descr.apply(case, parameters=parameters)
[docs]
def abstract_declare(declare_model, include_header: bool = True) -> str:
"""
Textually abstracts a DECLARE model.
:param declare_model: The DECLARE model to abstract.
:param include_header: Whether to include a header in the abstraction.
:return: The DECLARE model abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
log_ske = pm4py.discover_declare(log)
print(pm4py.llm.abstract_declare(log_ske))
"""
parameters = {}
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import declare_to_descr
return declare_to_descr.apply(declare_model, parameters=parameters)
[docs]
def abstract_log_skeleton(log_skeleton, include_header: bool = True) -> str:
"""
Textually abstracts a log skeleton process model.
:param log_skeleton: The log skeleton to abstract.
:param include_header: Whether to include a header in the abstraction.
:return: The log skeleton abstraction as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
log_ske = pm4py.discover_log_skeleton(log)
print(pm4py.llm.abstract_log_skeleton(log_ske))
"""
parameters = {}
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import logske_to_descr
return logske_to_descr.apply(log_skeleton, parameters=parameters)
[docs]
def explain_visualization(vis_saver, *args, connector=openai_query, **kwargs) -> str:
"""
Explains a process mining visualization using LLMs by saving it as a .png image and providing the image to the Large Language Model along with a description.
:param vis_saver: The visualizer function used to save the visualization to disk.
:param args: Positional arguments required by the visualizer function.
:param connector: (Optional) The connector method to communicate with the large language model (default: openai_query).
:param **kwargs: Additional keyword arguments for the visualizer function or the connector (e.g., annotations, API key).
:return: The explanation of the visualization as a string.
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/running-example.xes")
descr = pm4py.llm.explain_visualization(pm4py.save_vis_dotted_chart, log, api_key="sk-5HN", show_legend=False)
print(descr)
"""
F = NamedTemporaryFile(suffix=".png")
image_path = F.name
F.close()
description = vis_saver(*args, image_path, **kwargs)
parameters = copy(kwargs) if kwargs is not None else {}
parameters["image_path"] = image_path
return connector("Could you explain the included process mining visualization?\n\n" + description, **parameters)