Module prefeitura_rio.metrics.agnostic
Framework-agnotic metrics implementations for the prefeitura_rio package.
Most of them can usually be imported from the sklearn.metrics
module.
Expand source code
# -*- coding: utf-8 -*-
"""
Framework-agnotic metrics implementations for the prefeitura_rio package.
Most of them can usually be imported from the `sklearn.metrics` module.
"""
try:
import numpy as np
from sklearn.metrics import (
brier_score_loss,
confusion_matrix,
fbeta_score,
mean_squared_error,
)
except ImportError:
pass
from prefeitura_rio.utils import assert_dependencies
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def brier(
y_true,
y_pred,
*,
sample_weight=None,
pos_label=None,
):
"""
Computes the Brier score loss.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
sample_weight (array-like, optional): The sample weights. Defaults to
`None`.
pos_label (int, optional): The positive label. Defaults to `None`.
Returns:
float: The Brier score.
"""
return brier_score_loss(y_true, y_pred, sample_weight=sample_weight, pos_label=pos_label)
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def carabetta(y_true, y_pred, alpha=5, beta=2):
"""
Computes the Carabetta score.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
alpha (float, optional): The alpha coefficient, which controls the weight of false-negatives
in the score. Defaults to `5`.
beta (float, optional): The beta coefficient, which controls the weight of false-positives
in the score. Defaults to `2`.
Returns:
float: The Carabetta score.
"""
y_true = np.array(y_true)
y_pred = np.array(y_pred)
return np.sum(alpha * y_true * (1 - y_pred) + beta * (1 - y_true) * y_pred)
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def fbeta(
y_true,
y_pred,
*,
beta,
labels=None,
pos_label=1,
average="binary",
sample_weight=None,
zero_division="warn",
threshold=None,
) -> float:
"""
Computes the F-beta score.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
beta (float): The beta value.
labels (array-like, optional): The labels to consider. If `None`, all
labels are considered. Defaults to `None`.
pos_label (int, optional): The positive label. Defaults to `1`.
average (str, optional): The averaging method. Defaults to `'binary'`.
sample_weight (array-like, optional): The sample weights. Defaults to
`None`.
zero_division (str, optional): The value to return when there is a
zero division. Defaults to `'warn'`.
threshold (float, optional): The threshold to use when converting
`y_pred` to binary. Defaults to `None`.
Returns:
float: The F-beta score.
"""
if threshold:
y_pred = np.array(y_pred) > threshold
try:
return fbeta_score(
y_true,
y_pred,
beta=beta,
labels=labels,
pos_label=pos_label,
average=average,
sample_weight=sample_weight,
zero_division=zero_division,
)
except ValueError as exc:
# Check for exception message
if "mix of binary and continuous" in str(exc):
raise ValueError(
"`y_true` and `y_pred` must be binary. If your `y_pred` is not"
" binary, try setting the `threshold` parameter."
) from exc
raise exc
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def mse(y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"):
"""
Computes the MSE score.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
sample_weight (array-like, optional): The sample weights. Defaults to
`None`.
multioutput (str, optional): The averaging method. Defaults to
`'uniform_average'`.
Returns:
float: The MSE score.
"""
return mean_squared_error(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput)
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def nash_sutcliffe(
y_true,
y_pred,
):
"""
Computes the Nash-Sutcliffe score.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
Returns:
float: The Nash-Sutcliffe score.
"""
y_true = np.array(y_true)
y_pred = np.array(y_pred)
return 1 - np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def pet(y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"):
"""
Computes the PET score.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
sample_weight (array-like, optional): The sample weights. Defaults to
`None`.
multioutput (str, optional): The averaging method. Defaults to
`'uniform_average'`.
Returns:
float: The PET score.
"""
return np.tanh(
mean_squared_error(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput)
)
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def rmse(y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"):
"""
Computes the RMSE score.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
sample_weight (array-like, optional): The sample weights. Defaults to
`None`.
multioutput (str, optional): The averaging method. Defaults to
`'uniform_average'`.
Returns:
float: The RMSE score.
"""
return np.sqrt(
mean_squared_error(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput)
)
@assert_dependencies(["numpy", "sklearn"], extras=["metrics"])
def sp(
y_true,
y_pred,
*,
labels=None,
sample_weight=None,
normalize=None,
threshold=None,
):
"""
Computes the SP score.
Args:
y_true (array-like): The true values.
y_pred (array-like): The predicted values.
labels (array-like, optional): List of labels to index the matrix. This may be used to
reorder or select a subset of labels. If None is given, those that appear at least
once in y_true or y_pred are used in sorted order.
sample_weight (array-like, optional): The sample weights. Defaults to
`None`.
normalize (str, optional): Normalizes confusion matrix over the true (rows), predicted
(columns) conditions or all the population. If None, confusion matrix will not be
normalized.
threshold (float, optional): The threshold to use when converting
`y_pred` to binary. Defaults to `None`.
Returns:
float: The SP score.
"""
if threshold:
y_pred = np.array(y_pred) > threshold
try:
conf_matrix = confusion_matrix(
y_true,
y_pred,
labels=labels,
sample_weight=sample_weight,
normalize=normalize,
)
true_negative = conf_matrix[0][0]
false_negative = conf_matrix[1][0]
true_positive = conf_matrix[1][1]
false_positive = conf_matrix[0][1]
fa = false_positive / (true_negative + false_positive + np.finfo(float).eps)
pd = true_positive / (true_positive + false_negative + np.finfo(float).eps)
sp = np.sqrt(np.sqrt(pd * (1 - fa)) * (0.5 * (pd + (1 - fa))))
return sp
except ValueError as exc:
# Check for exception message
if "mix of binary and continuous" in str(exc):
raise ValueError(
"`y_true` and `y_pred` must be binary. If your `y_pred` is not"
" binary, try setting the `threshold` parameter."
) from exc
raise exc