Source code for rocelib.evaluations.DistanceEvaluator

import numpy as np

from rocelib.evaluations.RecourseEvaluator import RecourseEvaluator
from rocelib.evaluations.robustness_evaluations.Evaluator import Evaluator
from rocelib.lib.distance_functions.DistanceFunctions import euclidean


[docs] class DistanceEvaluator(Evaluator): """ An Evaluator class which evaluates the average distance of recourses from their original instance ... Attributes / Properties ------- task: Task Stores the Task for which we are evaluating the distance of CEs distance_func: Function A function which takes in 2 dataframes and returns an integer representing distance, defaulted to euclidean valid_val: int Stores what the target value of a valid counterfactual is defined as ------- Methods ------- evaluate() -> int: Returns the average distance of each x' from x ------- """
[docs] def evaluate(self, recourse_method, valid_val=1, distance_func=euclidean, column_name="target", subset=None, **kwargs): """ Determines the average distance of the CEs from their original instances @param recourses: pd.DataFrame, dataset containing CEs in same order as negative instances in dataset @param valid_val: int, what the target value of a valid counterfactual is defined as, default 1 @param distance_func: Function, function which takes in 2 dataframes and returns an integer representing distance, defaulted to euclidean @param column_name: name of target column @param subset: optional DataFrame, contains instances to generate CEs on @param kwargs: other arguments @return: int, average distance of CEs from their original instances """ recourses = self.task.CEs[recourse_method][0] df1 = recourses.drop(columns=[column_name, "loss", "predicted"], errors='ignore') # df1 = df1.drop(columns=[column_name, "predicted"], errors='ignore') if subset is None: df2 = self.task.dataset.get_negative_instances() else: df2 = subset # Drop any extra target columns from df2 df2 = df2.drop(columns=[column_name, "predicted"], errors='ignore') # **Ensure both DataFrames have the same columns before assertion** df1 = df1[df2.columns] # Align df1 columns to match df2 print(f"Final Shapes - df1: {df1.shape}, df2: {df2.shape}") # Ensure the DataFrames have the same shape assert df1.shape == df2.shape, "DataFrames must have the same shape" distances = [] # Iterate over each row in the DataFrames for i in range(len(df1)): row1 = df1.iloc[i:i + 1] # Get the i-th row as a DataFrame row2 = df2.iloc[i:i + 1] # Get the i-th row as a DataFrame # Calculate distance between corresponding rows dist = distance_func(row1, row2) distances.append(dist) # Calculate and return the average distance return np.mean(distances)