Source code for pm4py.algo.anonymization.pripel.algorithm

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''

from enum import Enum
from typing import Optional, Dict, Any, Union

import pandas as pd

from pm4py.algo.anonymization.pripel.variants import pripel
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.obj import EventLog
from pm4py.util import exec_utils


[docs] class Variants(Enum): PRIPEL = pripel
DEFAULT_VARIANT = Variants.PRIPEL VERSIONS = {Variants.PRIPEL}
[docs] def apply(log: Union[EventLog, pd.DataFrame], trace_variant_query: Union[EventLog, pd.DataFrame], epsilon: float, variant=DEFAULT_VARIANT, parameters: Optional[Dict[Any, Any]] = None) -> EventLog: """ PRIPEL (Privacy-preserving event log publishing with contextual information) is a framework to publish event logs that fulfill differential privacy. PRIPEL ensures privacy on the level of individual cases instead of the complete log. This way, contextual information as well as the long tail process behaviour are preserved, which enables the application of a rich set of process analysis techniques. PRIPEL is described in: Fahrenkrog-Petersen, S.A., van der Aa, H., Weidlich, M. (2020). PRIPEL: Privacy-Preserving Event Log Publishing Including Contextual Information. In: Fahland, D., Ghidini, C., Becker, J., Dumas, M. (eds) Business Process Management. BPM 2020. Lecture Notes in Computer Science(), vol 12168. Springer, Cham. https://doi.org/10.1007/978-3-030-58666-9_7 Parameters ------------- log Event log trace_variant_query An anonymized trace variant distribution as an EventLog epsilon Strength of the differential privacy guarantee variant - Variants.PRIPEL parameters Parameters of the algorithm, including: -Parameters.BLOCKLIST -> Some event logs contain attributes that are equivalent to a case id. For privacy reasons, such attributes must be deleted from the anonymized log. We handle such attributes with this set. Returns ------------ anonymised_log Anonymised event log """ log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG) trace_variant_query = log_converter.apply(trace_variant_query, variant=log_converter.Variants.TO_EVENT_LOG) return exec_utils.get_variant(variant).apply(log, trace_variant_query, epsilon, parameters=parameters)