Source code for rocelib.evaluations.DistanceEvaluator

import numpy as np

from rocelib.evaluations.RecourseEvaluator import RecourseEvaluator
from rocelib.evaluations.robustness_evaluations.Evaluator import Evaluator
from rocelib.lib.distance_functions.DistanceFunctions import euclidean



[docs]
class DistanceEvaluator(Evaluator):
    """
     An Evaluator class which evaluates the average distance of recourses from their original instance

        ...

    Attributes / Properties
    -------

    task: Task
        Stores the Task for which we are evaluating the distance of CEs

    distance_func: Function
        A function which takes in 2 dataframes and returns an integer representing distance, defaulted to euclidean

    valid_val: int
        Stores what the target value of a valid counterfactual is defined as

    -------

    Methods
    -------

    evaluate() -> int:
        Returns the average distance of each x' from x

    -------
    """


[docs]
    def evaluate(self, recourse_method, valid_val=1, distance_func=euclidean, column_name="target", subset=None, **kwargs):
        """
        Determines the average distance of the CEs from their original instances
        @param recourses: pd.DataFrame, dataset containing CEs in same order as negative instances in dataset
        @param valid_val: int, what the target value of a valid counterfactual is defined as, default 1
        @param distance_func: Function, function which takes in 2 dataframes and returns an integer representing
                              distance, defaulted to euclidean
        @param column_name: name of target column
        @param subset: optional DataFrame, contains instances to generate CEs on
        @param kwargs: other arguments
        @return: int, average distance of CEs from their original instances
        """
        recourses = self.task.CEs[recourse_method][0]
        
        df1 = recourses.drop(columns=[column_name, "loss", "predicted"], errors='ignore')
        # df1 = df1.drop(columns=[column_name, "predicted"], errors='ignore')


        if subset is None:
            df2 = self.task.dataset.get_negative_instances()
        else:
            df2 = subset

        # Drop any extra target columns from df2
        df2 = df2.drop(columns=[column_name, "predicted"], errors='ignore')

        # **Ensure both DataFrames have the same columns before assertion**
        df1 = df1[df2.columns]  # Align df1 columns to match df2

        print(f"Final Shapes - df1: {df1.shape}, df2: {df2.shape}")

        # Ensure the DataFrames have the same shape
        assert df1.shape == df2.shape, "DataFrames must have the same shape"

        distances = []

        # Iterate over each row in the DataFrames
        for i in range(len(df1)):
            row1 = df1.iloc[i:i + 1]  # Get the i-th row as a DataFrame
            row2 = df2.iloc[i:i + 1]  # Get the i-th row as a DataFrame

            # Calculate distance between corresponding rows
            dist = distance_func(row1, row2)
            distances.append(dist)

        # Calculate and return the average distance
        return np.mean(distances)