import logging
from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
from numpy import linalg as LA
logging.basicConfig(level=logging.INFO)
[docs]
class MetricStrategy(ABC):
"""Abstract class for metric strategies."""
[docs]
@abstractmethod
def calculate(self, target_params: dict, row: pd.Series) -> float:
"""Calculate the distance metric between target parameters and a DataFrame row.
Args:
target_params (dict): Dictionary of target parameters.
row (pd.Series): A row from a DataFrame.
Returns:
float: Calculated distance.
"""
raise NotImplementedError("This method should be overridden by subclass")
[docs]
def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series:
"""Calculate distances using vectorized operations.
Args:
target_params (dict): Dictionary of target parameters.
df (pd.DataFrame): The DataFrame containing rows to calculate distances for.
Returns:
pd.Series: Series of calculated distances.
"""
# Default fallback to apply if not overridden
return df.apply(lambda row: self.calculate(target_params, row), axis=1)
[docs]
class EuclideanMetric(MetricStrategy):
"""Implements the specific Euclidean metric strategy as per your definition."""
[docs]
def calculate(self, target_params, df_row):
"""Calculate the custom Euclidean distance between target_params and df_row.
The Euclidean distance is calculated as: sqrt(sum_i (x_i - x_{target})^2 / x_{target}),
where x_i are the values in df_row and x_{target} are the target parameters.
Parameters:
target_params (dict): The target parameters as a dictionary.
df_row (pd.Series): A single row from a DataFrame representing a set of parameters.
Returns:
float: The custom Euclidean distance.
"""
distance = 0.0
for column, target_value in target_params.items():
if isinstance(target_value, (int, float)): # Only numerical columns
distance += (df_row[column] - target_value) ** 2 / target_value**2
return np.sqrt(distance)
[docs]
def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series:
"""Vectorized Euclidean distance calculation."""
distance_sq = 0.0
for column, target_value in target_params.items():
if isinstance(target_value, (int, float)) and column in df.columns:
# Vectorized operation on pandas Series
diff_norm = (df[column] - target_value) / target_value
distance_sq += diff_norm**2
return np.sqrt(distance_sq)
[docs]
class ManhattanMetric(MetricStrategy):
"""Implements the Manhattan metric strategy."""
[docs]
def calculate(self, target_params, df_row):
"""Calculate the Manhattan distance between target_params and df_row.
Parameters:
target_params (dict): The target parameters as a dictionary.
df_row (pd.Series): A single row from a DataFrame representing a set of parameters.
Returns:
float: The Manhattan distance.
"""
target_vector = np.array([target_params[key] for key in target_params])
row_vector = np.array([df_row[key] for key in target_params])
return LA.norm(target_vector - row_vector, ord=1)
[docs]
def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series:
"""Vectorized Manhattan distance calculation."""
distance = 0.0
for column, target_value in target_params.items():
if isinstance(target_value, (int, float)) and column in df.columns:
distance += np.abs(df[column] - target_value)
return distance
[docs]
class ChebyshevMetric(MetricStrategy):
"""Implements the Chebyshev metric strategy."""
[docs]
def calculate(self, target_params, df_row):
"""Calculate the Chebyshev distance between target_params and df_row.
Parameters:
target_params (dict): The target parameters as a dictionary.
df_row (pd.Series): A single row from a DataFrame representing a set of parameters.
Returns:
float: The Chebyshev distance.
"""
target_vector = np.array([target_params[key] for key in target_params])
row_vector = np.array([df_row[key] for key in target_params])
return LA.norm(target_vector - row_vector, ord=np.inf)
[docs]
def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series:
"""Vectorized Chebyshev distance calculation."""
distance = pd.Series(0.0, index=df.index)
first = True
for column, target_value in target_params.items():
if isinstance(target_value, (int, float)) and column in df.columns:
abs_diff = np.abs(df[column] - target_value)
if first:
distance = abs_diff
first = False
else:
distance = np.maximum(distance, abs_diff)
return distance
[docs]
class WeightedEuclideanMetric(MetricStrategy):
"""Concrete class for weighted Euclidean metric."""
def __init__(self, weights: dict):
"""Initialize the weights.
Args:
weights (dict): Dictionary of weights for each parameter.
"""
self.weights = weights
[docs]
def calculate(self, target_params: dict, row: pd.Series) -> float:
"""Calculate the weighted Euclidean distance between target parameters and a DataFrame row.
Args:
target_params (dict): Dictionary of target parameters.
row (pd.Series): A row from a DataFrame.
Returns:
float: Calculated weighted Euclidean distance.
"""
if self.weights is None:
self.weights = {key: 1 for key in target_params.keys()}
logging.info(
"\033[1mNOTE TO USER:\033[0m No metric weights provided. Using default weights of 1 for all parameters."
)
distance = 0
for param, target_value in target_params.items():
if isinstance(target_value, (int, float)):
simulated_value = row.get(param, 0)
weight = self.weights.get(param, 1)
distance += weight * ((target_value - simulated_value) ** 2) / target_value**2
return distance
[docs]
def calculate_vectorized(self, target_params: dict, df: pd.DataFrame) -> pd.Series:
"""Vectorized Weighted Euclidean distance calculation."""
if self.weights is None:
self.weights = {key: 1 for key in target_params.keys()}
distance = 0.0
for param, target_value in target_params.items():
if isinstance(target_value, (int, float)) and param in df.columns:
simulated_values = df[param]
weight = self.weights.get(param, 1)
term = ((target_value - simulated_values) ** 2) / target_value**2
distance += weight * term
return distance
[docs]
class CustomMetric(MetricStrategy):
"""Implements a custom metric strategy using a user-defined function.
Example Usage:
To use a custom Manhattan distance metric, define the function as follows:
def manhattan_distance(target, simulated):
return sum(abs(target[key] - simulated.get(key, 0)) for key in target)
Then, instantiate CustomMetric with this function:
custom_metric = CustomMetric(manhattan_distance)
"""
def __init__(self, custom_metric_func):
"""Initialize CustomMetric with a custom metric function.
Parameters:
custom_metric_func (callable): User-defined custom metric function.
The function should take two dictionaries as arguments and return a float.
"""
if custom_metric_func is None:
raise ValueError("Must provide a custom metric function.")
self.custom_metric_func = custom_metric_func
[docs]
def calculate(self, target_params, df_row):
"""Calculate the custom metric between target_params and df_row using the user-defined function.
Parameters:
target_params (dict): The target parameters as a dictionary.
df_row (pd.Series): A single row from a DataFrame representing a set of parameters.
Returns:
float: The custom metric calculated using the user-defined function.
"""
return self.custom_metric_func(target_params, df_row.to_dict())