pm4py.algo.filtering.pandas.cases.case_filter module#

class pm4py.algo.filtering.pandas.cases.case_filter.Parameters(*values)[source]#

Bases: Enum

TIMESTAMP_KEY = 'pm4py:param:timestamp_key'#
CASE_ID_KEY = 'pm4py:param:case_id_key'#
BUSINESS_HOURS = 'business_hours'#
BUSINESS_HOUR_SLOTS = 'business_hour_slots'#
WORKCALENDAR = 'workcalendar'#
pm4py.algo.filtering.pandas.cases.case_filter.filter_on_ncases(df: DataFrame, case_id_glue: str = 'case:concept:name', max_no_cases: int = 1000)[source]#

Filter a dataframe keeping only the specified maximum number of traces

Parameters:
  • df – Dataframe

  • case_id_glue – Case ID column in the CSV

  • max_no_cases – Maximum number of traces to keep

Returns:

Filtered dataframe

Return type:

df

pm4py.algo.filtering.pandas.cases.case_filter.filter_on_case_size(df0: DataFrame, case_id_glue: str = 'case:concept:name', min_case_size: int = 2, max_case_size=None)[source]#

Filter a dataframe keeping only traces with at least the specified number of events

Parameters:
  • df – Dataframe

  • case_id_glue – Case ID column in the CSV

  • min_case_size – Minimum size of a case

  • max_case_size – Maximum case size

Returns:

Filtered dataframe

Return type:

df

pm4py.algo.filtering.pandas.cases.case_filter.filter_on_case_performance(df: DataFrame, case_id_glue: str = 'case:concept:name', timestamp_key: str = 'time:timestamp', min_case_performance: float = 0, max_case_performance: float = 10000000000, business_hours=False, business_hours_slots=[(25200, 61200), (111600, 147600), (198000, 234000), (284400, 320400), (370800, 406800)]) DataFrame[source]#

Filter a dataframe on case performance

Parameters:
  • df – Dataframe

  • case_id_glue – Case ID column in the CSV

  • timestamp_key – Timestamp column to use for the CSV

  • min_case_performance – Minimum case performance

  • max_case_performance – Maximum case performance

Returns:

Filtered dataframe

Return type:

df

pm4py.algo.filtering.pandas.cases.case_filter.filter_case_performance(df: DataFrame, min_case_performance: float = 0, max_case_performance: float = 10000000000, parameters: Dict[str | Parameters, Any] | None = None) DataFrame[source]#
pm4py.algo.filtering.pandas.cases.case_filter.apply(df, parameters=None)[source]#
pm4py.algo.filtering.pandas.cases.case_filter.apply_auto_filter(df, parameters=None)[source]#