Source code for squadds.core.metrics

import logging
from abc import ABC, abstractmethod

import numpy as np
import pandas as pd
from numpy import linalg as LA

logging.basicConfig(level=logging.INFO)


[docs] class MetricStrategy(ABC): """Abstract class for metric strategies."""
[docs] @abstractmethod def calculate(self, target_params: dict, row: pd.Series) -> float: """Calculate the distance metric between target parameters and a DataFrame row. Args: target_params (dict): Dictionary of target parameters. row (pd.Series): A row from a DataFrame. Returns: float: Calculated distance. """ raise NotImplementedError("This method should be overridden by subclass")
[docs] def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series: """Calculate distances using vectorized operations. Args: target_params (dict): Dictionary of target parameters. df (pd.DataFrame): The DataFrame containing rows to calculate distances for. Returns: pd.Series: Series of calculated distances. """ # Default fallback to apply if not overridden return df.apply(lambda row: self.calculate(target_params, row), axis=1)
[docs] class EuclideanMetric(MetricStrategy): """Implements the specific Euclidean metric strategy as per your definition."""
[docs] def calculate(self, target_params, df_row): """Calculate the custom Euclidean distance between target_params and df_row. The Euclidean distance is calculated as: sqrt(sum_i (x_i - x_{target})^2 / x_{target}), where x_i are the values in df_row and x_{target} are the target parameters. Parameters: target_params (dict): The target parameters as a dictionary. df_row (pd.Series): A single row from a DataFrame representing a set of parameters. Returns: float: The custom Euclidean distance. """ distance = 0.0 for column, target_value in target_params.items(): if isinstance(target_value, (int, float)): # Only numerical columns distance += (df_row[column] - target_value) ** 2 / target_value**2 return np.sqrt(distance)
[docs] def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series: """Vectorized Euclidean distance calculation.""" distance_sq = 0.0 for column, target_value in target_params.items(): if isinstance(target_value, (int, float)) and column in df.columns: # Vectorized operation on pandas Series diff_norm = (df[column] - target_value) / target_value distance_sq += diff_norm**2 return np.sqrt(distance_sq)
[docs] class ManhattanMetric(MetricStrategy): """Implements the Manhattan metric strategy."""
[docs] def calculate(self, target_params, df_row): """Calculate the Manhattan distance between target_params and df_row. Parameters: target_params (dict): The target parameters as a dictionary. df_row (pd.Series): A single row from a DataFrame representing a set of parameters. Returns: float: The Manhattan distance. """ target_vector = np.array([target_params[key] for key in target_params]) row_vector = np.array([df_row[key] for key in target_params]) return LA.norm(target_vector - row_vector, ord=1)
[docs] def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series: """Vectorized Manhattan distance calculation.""" distance = 0.0 for column, target_value in target_params.items(): if isinstance(target_value, (int, float)) and column in df.columns: distance += np.abs(df[column] - target_value) return distance
[docs] class ChebyshevMetric(MetricStrategy): """Implements the Chebyshev metric strategy."""
[docs] def calculate(self, target_params, df_row): """Calculate the Chebyshev distance between target_params and df_row. Parameters: target_params (dict): The target parameters as a dictionary. df_row (pd.Series): A single row from a DataFrame representing a set of parameters. Returns: float: The Chebyshev distance. """ target_vector = np.array([target_params[key] for key in target_params]) row_vector = np.array([df_row[key] for key in target_params]) return LA.norm(target_vector - row_vector, ord=np.inf)
[docs] def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series: """Vectorized Chebyshev distance calculation.""" distance = pd.Series(0.0, index=df.index) first = True for column, target_value in target_params.items(): if isinstance(target_value, (int, float)) and column in df.columns: abs_diff = np.abs(df[column] - target_value) if first: distance = abs_diff first = False else: distance = np.maximum(distance, abs_diff) return distance
[docs] class WeightedEuclideanMetric(MetricStrategy): """Concrete class for weighted Euclidean metric.""" def __init__(self, weights: dict): """Initialize the weights. Args: weights (dict): Dictionary of weights for each parameter. """ self.weights = weights
[docs] def calculate(self, target_params: dict, row: pd.Series) -> float: """Calculate the weighted Euclidean distance between target parameters and a DataFrame row. Args: target_params (dict): Dictionary of target parameters. row (pd.Series): A row from a DataFrame. Returns: float: Calculated weighted Euclidean distance. """ if self.weights is None: self.weights = {key: 1 for key in target_params.keys()} logging.info( "\033[1mNOTE TO USER:\033[0m No metric weights provided. Using default weights of 1 for all parameters." ) distance = 0 for param, target_value in target_params.items(): if isinstance(target_value, (int, float)): simulated_value = row.get(param, 0) weight = self.weights.get(param, 1) distance += weight * ((target_value - simulated_value) ** 2) / target_value**2 return distance
[docs] def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series: """Vectorized Weighted Euclidean distance calculation.""" if self.weights is None: self.weights = {key: 1 for key in target_params.keys()} distance = 0.0 for param, target_value in target_params.items(): if isinstance(target_value, (int, float)) and param in df.columns: simulated_values = df[param] weight = self.weights.get(param, 1) term = ((target_value - simulated_values) ** 2) / target_value**2 distance += weight * term return distance
[docs] class CustomMetric(MetricStrategy): """Implements a custom metric strategy using a user-defined function. Example Usage: To use a custom Manhattan distance metric, define the function as follows: def manhattan_distance(target, simulated): return sum(abs(target[key] - simulated.get(key, 0)) for key in target) Then, instantiate CustomMetric with this function: custom_metric = CustomMetric(manhattan_distance) """ def __init__(self, custom_metric_func): """Initialize CustomMetric with a custom metric function. Parameters: custom_metric_func (callable): User-defined custom metric function. The function should take two dictionaries as arguments and return a float. """ if custom_metric_func is None: raise ValueError("Must provide a custom metric function.") self.custom_metric_func = custom_metric_func
[docs] def calculate(self, target_params, df_row): """Calculate the custom metric between target_params and df_row using the user-defined function. Parameters: target_params (dict): The target parameters as a dictionary. df_row (pd.Series): A single row from a DataFrame representing a set of parameters. Returns: float: The custom metric calculated using the user-defined function. """ return self.custom_metric_func(target_params, df_row.to_dict())