Source code for pm4py.objects.ocel.importer.jsonocel.variants.classic

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
import json
from enum import Enum
from typing import Optional, Dict, Any

import pandas as pd

from pm4py.objects.ocel import constants
from pm4py.objects.ocel.obj import OCEL
from pm4py.objects.ocel.util import filtering_utils
from pm4py.objects.ocel.util import ocel_consistency
from pm4py.util import (
    exec_utils,
    dt_parsing,
    constants as pm4_constants,
    pandas_utils,
)
from pm4py.objects.log.util import dataframe_utils


[docs] class Parameters(Enum): EVENT_ID = constants.PARAM_EVENT_ID EVENT_ACTIVITY = constants.PARAM_EVENT_ACTIVITY EVENT_TIMESTAMP = constants.PARAM_EVENT_TIMESTAMP OBJECT_ID = constants.PARAM_OBJECT_ID OBJECT_TYPE = constants.PARAM_OBJECT_TYPE INTERNAL_INDEX = constants.PARAM_INTERNAL_INDEX ENCODING = "encoding"
[docs] def get_base_ocel(json_obj: Any, parameters: Optional[Dict[Any, Any]] = None): events = [] relations = [] objects = [] o2o = [] object_changes = [] event_id = exec_utils.get_param_value( Parameters.EVENT_ID, parameters, constants.DEFAULT_EVENT_ID ) event_activity = exec_utils.get_param_value( Parameters.EVENT_ACTIVITY, parameters, constants.DEFAULT_EVENT_ACTIVITY ) event_timestamp = exec_utils.get_param_value( Parameters.EVENT_TIMESTAMP, parameters, constants.DEFAULT_EVENT_TIMESTAMP, ) object_id = exec_utils.get_param_value( Parameters.OBJECT_ID, parameters, constants.DEFAULT_OBJECT_ID ) object_type = exec_utils.get_param_value( Parameters.OBJECT_TYPE, parameters, constants.DEFAULT_OBJECT_TYPE ) internal_index = exec_utils.get_param_value( Parameters.INTERNAL_INDEX, parameters, constants.DEFAULT_INTERNAL_INDEX ) parser = dt_parsing.parser.get() types_dict = {} for obj_id in json_obj[constants.OCEL_OBJECTS_KEY]: obj = json_obj[constants.OCEL_OBJECTS_KEY][obj_id] obj_type = obj[object_type] types_dict[obj_id] = obj_type dct = {object_id: obj_id, object_type: obj_type} for k, v in obj[constants.OCEL_OVMAP_KEY].items(): dct[k] = v if constants.OCEL_O2O_KEY in obj: this_rel_objs = obj[constants.OCEL_O2O_KEY] for newel in this_rel_objs: target_id = newel[object_id] qualifier = newel[constants.DEFAULT_QUALIFIER] o2o.append( { object_id: obj_id, object_id + "_2": target_id, constants.DEFAULT_QUALIFIER: qualifier, } ) objects.append(dct) for ev_id in json_obj[constants.OCEL_EVENTS_KEY]: ev = json_obj[constants.OCEL_EVENTS_KEY][ev_id] dct = { event_id: ev_id, event_timestamp: parser.apply(ev[event_timestamp]), event_activity: ev[event_activity], } for k, v in ev[constants.OCEL_VMAP_KEY].items(): dct[k] = v this_rel = {} for obj in ev[constants.OCEL_OMAP_KEY]: if obj in types_dict: this_rel[obj] = { event_id: ev_id, event_activity: ev[event_activity], event_timestamp: parser.apply(ev[event_timestamp]), object_id: obj, object_type: types_dict[obj], } if constants.OCEL_TYPED_OMAP_KEY in ev: for element in ev[constants.OCEL_TYPED_OMAP_KEY]: if object_id in element: key1 = element[object_id] if key1 in this_rel: this_rel[key1][constants.DEFAULT_QUALIFIER] = element[ constants.DEFAULT_QUALIFIER ] for obj in this_rel: relations.append(this_rel[obj]) events.append(dct) if constants.OCEL_OBJCHANGES_KEY in json_obj: object_changes = json_obj[constants.OCEL_OBJCHANGES_KEY] events = pandas_utils.instantiate_dataframe(events) objects = pandas_utils.instantiate_dataframe(objects) relations = pandas_utils.instantiate_dataframe(relations) events = pandas_utils.insert_index( events, internal_index, reset_index=False, copy_dataframe=False ) relations = pandas_utils.insert_index( relations, internal_index, reset_index=False, copy_dataframe=False ) events = events.sort_values([event_timestamp, internal_index]) relations = relations.sort_values([event_timestamp, internal_index]) del events[internal_index] del relations[internal_index] globals = {} globals[constants.OCEL_GLOBAL_LOG] = json_obj[constants.OCEL_GLOBAL_LOG] globals[constants.OCEL_GLOBAL_EVENT] = json_obj[ constants.OCEL_GLOBAL_EVENT ] globals[constants.OCEL_GLOBAL_OBJECT] = json_obj[ constants.OCEL_GLOBAL_OBJECT ] o2o = pandas_utils.instantiate_dataframe(o2o) if o2o else None object_changes = ( pandas_utils.instantiate_dataframe(object_changes) if object_changes else None ) if object_changes is not None and len(object_changes) > 0: object_changes = dataframe_utils.convert_timestamp_columns_in_df( object_changes, timest_format=pm4_constants.DEFAULT_XES_TIMESTAMP_PARSE_FORMAT, timest_columns=[event_timestamp], ) obj_id_map = objects[[object_id, object_type]].to_dict("records") obj_id_map = {x[object_id]: x[object_type] for x in obj_id_map} object_changes[object_type] = object_changes[object_id].map(obj_id_map) log = OCEL( events=events, objects=objects, relations=relations, o2o=o2o, object_changes=object_changes, globals=globals, parameters=parameters, ) return log
[docs] def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Imports an object-centric event log from a JSON-OCEL file, using the default JSON backend of Python Parameters ----------------- file_path Path to the JSON-OCEL file parameters Parameters of the algorithm, including: - Parameters.EVENT_ID - Parameters.EVENT_ACTIVITY - Parameters.EVENT_TIMESTAMP - Parameters.OBJECT_ID - Parameters.OBJECT_TYPE - Parameters.INTERNAL_INDEX Returns ------------------ ocel Object-centric event log """ if parameters is None: parameters = {} encoding = exec_utils.get_param_value( Parameters.ENCODING, parameters, pm4_constants.DEFAULT_ENCODING ) F = open(file_path, "r", encoding=encoding) json_obj = json.load(F) F.close() log = get_base_ocel(json_obj, parameters=parameters) log = ocel_consistency.apply(log, parameters=parameters) log = filtering_utils.propagate_relations_filtering( log, parameters=parameters ) return log