import einops as eo
import mlnext
import numpy as np
import numpy.typing as npt
from mlnext.utils import check_ndim
from mlnext.utils import check_shape
from .types import eTaPrecision
from .types import eTaRecall
from .utils import check_floats
eps = 1e-16
__all__ = [
'eTaMetrics',
'evaluate_from_preds',
'evaluate_from_ranges',
]
[docs]
class eTaMetrics:
"""Defines the `enhanced time-aware (eTa)
<https://dl.acm.org/doi/10.1145/3477314.3507024>`_ precision, recall, and
f1. Moreover, we can also compute the point-wise and
`point-adjusted <https://arxiv.org/abs/1802.03903>`_ versions.
For a motivation to use eTaPR check out the documentation.
Attributes:
preds (list[tuple[int, int]]): Predictions as a list of ranges.
labels (list[tuple[int, int]]): Labels as a list of ranges.
theta_p (float, optional): Precision threshold. Only those
predictions who overlap with at least `theta_p` with a detected
anomaly are counted as correct. Defaults to 0.5.
theta_r (float, optional): Recall threshold. Only those anomalies
which overlap at least `theta_r` with an correct prediction are
counted as detected. Defaults to 0.1.
"""
def __init__(
self,
preds: list[tuple[int, int]],
labels: list[tuple[int, int]],
*,
theta_p: float = 0.5,
theta_r: float = 0.1,
):
check_floats(
('theta_p', theta_p),
('theta_r', theta_r),
min=0,
max=1,
)
self.theta_p = theta_p
self.theta_r = theta_r
self.preds = np.array(preds)
self.labels = np.array(labels)
self._pred_weights = (
np.sqrt(self.preds[:, 1] + 1 - self.preds[:, 0])
if len(preds) > 0
else np.zeros((1,))
)
self._overlap_score_mat_org = self._calculate_overlap_score_mat()
self._overlap_score_mat = self._overlap_score_mat_org.copy()
self._labels_max_score = (
self.labels[:, 1] + 1 - self.labels[:, 0]
if len(labels) > 0
else np.zeros((1,))
)
self._preds_max_score = (
self.preds[:, 1] + 1 - self.preds[:, 0]
if len(preds) > 0
else np.zeros((1,))
)
self._pruning()
def _calculate_overlap_score_mat(self):
"""Calculation of the overlap matrix (n_anomalies, n_predictions).
A row represents the overlap of predictions with one anomaly.
If we add the values col-wise (sum(axis=1)), then we get overlap of
an anomaly (each row) with all predictions. If we add the values
row-wise (sum(axis=0)), then we get the overlap of a prediction
(each col) with all anomalies.
"""
len_l, len_p = len(self.labels), len(self.preds)
if len_l == 0 or len_p == 0:
return np.zeros((1, 1))
labels_matrix = eo.repeat(self.labels, 'l r -> l p r', p=len_p)
preds_matrix = eo.repeat(self.preds, 'p r -> l p r', l=len_l)
detected_starts = np.maximum(
labels_matrix[..., 0],
preds_matrix[..., 0],
)
detected_ends = np.minimum(
labels_matrix[..., 1],
preds_matrix[..., 1],
)
overlap_score_mat = np.clip(
detected_ends + 1 - detected_starts,
a_min=0,
a_max=None,
)
return overlap_score_mat
def _pruning(self):
"""Pruning of the overlap matrix. In this process, we eliminate
rows / cols from the matrix such that only predictions/anomalies remain
which belong to the set of correct predictions and detected anomalies.
"""
if len(self.labels) == 0 or len(self.preds) == 0:
return
while True:
labels_portion = self._overlap_score_mat.sum(axis=1) / (
self._labels_max_score
)
label_ids = list(
set(np.where(labels_portion < self.theta_r)[0])
- set(np.where(labels_portion == 0.0)[0])
)
if label_ids:
self._overlap_score_mat[label_ids] = np.zeros(
(len(label_ids), self._overlap_score_mat.shape[1])
)
preds_portion = self._overlap_score_mat.sum(axis=0) / (
self._preds_max_score
)
pred_ids = list(
set(np.where(preds_portion < self.theta_p)[0])
- set(np.where(preds_portion == 0.0)[0])
)
if pred_ids:
self._overlap_score_mat[..., pred_ids] = np.zeros(
(self._overlap_score_mat.shape[0], len(pred_ids))
)
if len(label_ids) == 0 and len(pred_ids) == 0:
break
[docs]
def recall(self) -> eTaRecall:
"""Calculates the `enhanced time-aware recall (eTaR)
<https://dl.acm.org/doi/10.1145/3477314.3507024>`_. Recall answers the
question of "How much of anomalies is detected?"
The recall :math:`\\mathrm{RC}^\\mathrm{eTa}` is calculated as a
combination of the detection score :math:`s^\\mathrm{RD}` and the
portion score :math:`s^\\mathrm{RP}` as follows:
.. math::
\\mathrm{RC}^{\\mathrm{eTa}}(\\tilde{\\mathbf{y}}, \\mathbf{y})
\\triangleq
\\frac{1}{|\\mathcal{A}|}
\\sum_{A_i \\in \\mathcal{A}}
\\frac{
s^{\\mathrm{RD}}(A_i) + s^{\\mathrm{RD}}(A_i)
\\cdot s^{\\mathrm{RP}}(A_i)
}{2}
where :math:`\\tilde{\\mathbf{y}}` are the predictions,
:math:`\\mathbf{y}` the labels, :math:`A_i` an anomaly, and
:math:`\\mathcal{A}` the set of all anomalies. The recall
:math:`\\mathrm{RC}^\\mathrm{eTa}` is the average over all anomaly
segments :math:`\\mathcal{A}`, but only those anomalies
:math:`A_i` contribute to the overall score which belong to the set
of the detected anomalies :math:`\\mathcal{A}^D`. Thus, the recall is
a measure of how well we can anomaly segments.
The detection score :math:`s^\\mathrm{RD}` of a anomaly :math:`A_i`
is defined as:
.. math::
s^{\\mathrm{RD}}(A_i) = \\begin{cases}
1, & \\text{if $A_i \\in \\mathcal{A}^D$}\\\\
0, & \\text{otherwise},
\\end{cases}
where :math:`\\mathcal{A}^D` is the set of detected anomalies. An
anomaly :math:`A_i` belongs to this set, if the overlapped portion
with a correct prediction :math:`P_j \\in \\mathcal{P}^C` is greater than
:math:`\\theta_r`. Hence, the detection score :math:`s^\\mathrm{RD}` indicates
whether an anomaly :math:`A_i` is detected or not.
The portion score :math:`s^\\mathrm{RP}` is the proportion of an
anomaly :math:`A_i` which intersects with a correct prediction
:math:`P_j \\in \\mathcal{P}^C`. Mathematically defined as follows,
.. math::
s^{\\mathrm{RP}}(\\mathbf{A}_i) =
\\frac{
\\sum_{\\mathbf{P}_j \\in \\mathcal{P}^C}
|\\mathbf{A}_i \\cap \\mathbf{P}_j|
}{
|\\mathbf{A}_i|
}.
Returns:
eTaRecall: Returns a namedtuple containing the
- precision
- detection score
- portion score
- number of correct predictions
"""
if len(self.labels) == 0 or len(self.preds) == 0:
return eTaRecall(0.0, 0.0, 0.0, 0)
rec_portion = self._overlap_score_mat.sum(axis=1) / (
self._labels_max_score
)
detection_scores = np.where(rec_portion >= self.theta_r, 1.0, 0.0)
detection_score = detection_scores.sum() / len(detection_scores)
portion_scores = np.clip(rec_portion, a_min=0.0, a_max=1.0)
portion_score = portion_scores.mean()
recall = (
(detection_scores + detection_scores * portion_scores) / 2
).mean()
detected_segments = detection_scores.sum()
return eTaRecall(
recall,
detection_score,
portion_score,
detected_segments,
)
[docs]
def precision(self) -> eTaPrecision:
"""Calculates the `enhanced time-aware precision (eTaP)
<https://dl.acm.org/doi/10.1145/3477314.3507024>`_. Precision
answers the question of "How many predictions (for anomalies) concern
real anomalies?".
The precision :math:`\\mathrm{PR}^\\mathrm{eTa}` is calculated as a
combination of the detection score :math:`s^\\mathrm{PD}` and the
portion score :math:`s^\\mathrm{PP}` as follows:
.. math::
\\mathrm{PR}^{\\mathrm{eTa}}(\\tilde{\\mathbf{y}}, \\mathbf{y})
\\triangleq
\\sum_{P_j \\in \\mathcal{P}} \\left(
\\frac{s^{\\mathrm{PD}}(P_j) +
s^{\\mathrm{PD}}(P_j) \\cdot
s^{\\mathrm{PP}}(P_j)}{2}
\\right) \\cdot w_{p},
where :math:`\\tilde{\\mathbf{y}}` are the predictions,
:math:`\\mathbf{y}` the labels, :math:`P_j` a prediction,
:math:`\\mathcal{P}` the set of all predictions and :math:`w_{p}` a
weight for the prediction,
.. math::
w_p = \\frac{
\\sqrt{|P_j|}
}{
\\sum_{P_i \\in \mathcal{P}} \sqrt{|P_i|}
}
The overall square roots of the lengths of all predictions
:math:`\sum_{\mathbf{Q} \in \mathcal{P}} \sqrt{|\mathbf{Q}|}` restricts
the precision score the range [0, 1]. Furthermore, it penalizes the
detection method for lengthy and frequent incorrect predictions.
The detection score :math:`s^\\mathrm{PD}` of a prediction :math:`P_j`
is defined as:
.. math::
s^{\\mathrm{PD}}(P_j) = \\begin{cases}
1, & \\text{if $P_j \\in \\mathcal{P}^C$} \\\\
0, & \\text{otherwise},
\\end{cases}
where :math:`\\mathcal{P}^C` is the set of correct predictions. A
prediction :math:`P_j` belongs to this set, if at least
:math:`\\theta_p` of the prediction :math:`P_j` overlaps with a
detected anomaly :math:`A_i \\in \\mathcal{A}^D`.
Thus, a prediction :math:`P_j` can only contribute if it is precise
enough and belongs to the set of correct predictions
:math:`\\mathcal{P}^C`. Over all predictions :math:`\\mathcal{P}`,
it is the ratio of correct predictions :math:`\\mathcal{P}^C` to the
number of all predictions :math:`\\mathcal{P}`, i.e.,
:math:`\\frac{|\\mathcal{P}^C|}{|\\mathcal{P}|}`.
The portion score :math:`s^\\mathrm{PP}` is proportion of the
overlapping parts with a detected anomaly :math:`A_i`:
.. math::
s^\\mathrm{PP}(P_j) = \\frac{
\\sum_{A_i \\in \\mathcal{A}} | A_i \\cap P_j |
}{
| P_j |
}
Thus, the precision :math:`\\mathrm{PR}^\\mathrm{eTa}` is a measure of
the quality of the predictions. Only relevant predictions :math:`P_j`,
i.e., whose overlapping portions are greater than :math:`\\theta_p`,
can directly contribute to the overall score. However, incorrect
predictions :math:`P_j \\notin \\mathcal{P}^C` can impact the score
through the weighting scheme :math:`w_p`.
Returns:
eTaPrecision: Returns a namedtuple containing the
- precision
- detection score
- portion score
- number of correct predictions
"""
if len(self.labels) == 0 or len(self.preds) == 0:
return eTaPrecision(0.0, 0.0, 0.0, 0)
preds_portion = (
self._overlap_score_mat.sum(axis=0) / self._preds_max_score
)
weight = self._pred_weights / self._pred_weights.sum()
detection_scores = np.where(preds_portion >= self.theta_p, 1.0, 0.0)
detection_score = (detection_scores * weight).sum()
portion_scores = np.clip(preds_portion, a_min=0.0, a_max=1.0)
portion_score = (portion_scores * weight).sum()
precision = (detection_scores + detection_scores * portion_scores) / 2
precision = (precision * weight).sum()
correct_predictions = detection_scores.sum()
return eTaPrecision(
precision,
detection_score,
portion_score,
correct_predictions,
)
[docs]
def f1(self, precision: float, recall: float) -> float:
"""Calculates the F1 score from `precision` and `recall` as the
harmonic mean:
.. math::
\\mathrm{F1} \\triangleq 2 \\frac{
\\mathrm{PR} \\cdot \\mathrm{RC}
}{
\\mathrm{PR} + \\mathrm{RC}
}
Args:
precision (float): Precision score.
recall (float): Recall score.
Returns:
float: Returns the F1 score.
"""
return (2 * precision * recall) / (precision + recall + eps)
[docs]
def scores(self) -> dict[str, float | int]:
"""Calculates the enhanced time-aware (eTa) scores. All keys in the
result mapping are prefixed with ``eta/``.
Returns:
dict[str, float | int]: Returns a mapping containing:
- ``eta/recall``: recall score
- ``eta/recall_detection``: detection score of the recall
- ``eta/recall_portion``: portion score of the recall
- ``eta/detected_anomalies``: number of detected anomalies
- ``eta/precision``: precision score
- ``eta/precision_detection``: detection score of the precision
- ``eta/precision_portion``: portion score of the precision
- ``eta/correct_predictions``: number of correct predictions
- ``eta/f1``: f1 score (harmonic mean of precision and recall)
- ``eta/TP``: number of true positives (points counted)
- ``eta/FP``: number of false positives (points counted)
- ``eta/FN``: number of false negatives (points counted)
- ``eta/wrong_predictions``: number of wrong predictions
- ``eta/missed_anomalies``: number of undetected anomalies
- ``eta/anomalies``: total number of anomalies
- ``eta/segments``: percentage of detected anomalies
"""
eTaR = self.recall()
eTaP = self.precision()
eTaF1 = self.f1(eTaP.value, eTaR.value)
TP_point = self._overlap_score_mat.sum()
TP_range = np.count_nonzero(self._overlap_score_mat.sum(axis=0))
FP_point = self._preds_max_score.sum() - TP_point
FP_range = len(self.preds) - TP_range
FN_point = self._labels_max_score.sum() - TP_point
FN_range = len(self.labels) - TP_range
anomalies = len(self.labels)
detected_anomalies = np.count_nonzero(
self._overlap_score_mat.sum(axis=1)
)
segments = detected_anomalies / (anomalies + eps)
return {
**eTaR._asdict(),
**eTaP._asdict(),
'eta/f1': eTaF1,
'eta/TP': TP_point,
'eta/FP': FP_point,
'eta/FN': FN_point,
'eta/wrong_predictions': FP_range,
'eta/missed_anomalies': FN_range,
'eta/anomalies': anomalies,
'eta/segments': segments,
}
[docs]
def point_precision(self) -> float:
"""Calculates the point-wise precision score. Precision answers the
question of "How many predictions (for anomalies) concern real
anomalies?". In a point-wise manner, we categorize each prediction
into true positives (TP), false positives (FP), true negative (TN),
and false negatives (TN). Then we can calculate the precision as
as ``TP / (TP + FP)``.
Returns:
float: Returns the point-wise precision.
"""
return self._overlap_score_mat_org.sum() / (
self._preds_max_score.sum() + eps
)
[docs]
def point_recall(self) -> float:
"""Calculates the point-wise recall score. Recall answers the question
of "How much of anomalies is detected?".In a point-wise manner, we
categorize each prediction into true positives (TP), false positives
(FP), true negative (TN), and false negatives (TN). Then we can
calculate the recall as ``TP / (TP + FN)``.
Returns:
float: Returns the point-wise recall.
"""
return self._overlap_score_mat_org.sum() / (
self._labels_max_score.sum() + eps
)
[docs]
def point_scores(self) -> dict[str, float | int]:
"""Calculates the point-wise (traditional) scores. Each data point can
be categorized as either true positive (TP), false positive (FP),
true negative (TN) or false negative (FN). Then, we can calculate the
metrics as follows:
.. math::
:nowrap:
\\begin{align*}
\\mathrm{RC}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
\\triangleq \\frac{\\mathrm{TP}}{\\mathrm{TP} + \\mathrm{FN}} \\\\
\\mathrm{PR}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
\\triangleq \\frac{\\mathrm{TP}}{\\mathrm{TP} + \\mathrm{FP}} \\\\
\\mathrm{F1}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
\\triangleq 2 \\frac{\\mathrm{PR}^{\\mathrm{P}} \\cdot
\\mathrm{RC}^{\\mathrm{P}}}{\\mathrm{PR}^{\\mathrm{P}} +
\\mathrm{RC}^{\\mathrm{P}}} = \\frac{2 \\mathrm{TP}}{2\\mathrm{TP}
+ \\mathrm{FP} + \\mathrm{FN}}\\\\
\\mathrm{SEG}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
\\triangleq
\\sum_{\\mathbf{A}_i \\in \\mathcal{A}} \\mathbb{1}(
\\sum_{\\mathbf{P}_j \\in \\mathcal{P}} |\\mathbf{P}_j \\cap
\\mathbf{A}_i| > 0)
\\end{align*}
All keys in the return mapping are prefixed with ``point/``.
Returns:
dict[str, float | int]: Returns a mapping containing:
- ``point/recall``: point-wise recall (TP / (TP + FN))
- ``point/precision``: point-wise precision (TP / (TP + FP))
- ``point/f1``: point-wise f1 score
- ``point/TP``: number of true positives, correctly classified as 1
- ``point/FP``: number of false positive, incorrectly classified as 1
- ``point/FN``: number of false negatives, incorrectly classified as
0
- ``point/anomalies``: total number of anomalies
- ``point/detected_anomalies``: number of detected anomalies (at
least one point detected)
- ``point/segments``: percentage of detected anomalies
"""
recall = self.point_recall()
precision = self.point_precision()
f1 = self.f1(precision, recall)
TP = self._overlap_score_mat_org.sum()
FP = self._preds_max_score.sum() - TP
FN = self._labels_max_score.sum() - TP
anomalies = len(self.labels)
detected_anomalies = np.where(
self._overlap_score_mat_org.sum(axis=1) > 0, 1.0, 0
).sum()
segments = detected_anomalies / (anomalies + eps)
return {
'point/recall': recall,
'point/precision': precision,
'point/f1': f1,
'point/TP': TP,
'point/FP': FP,
'point/FN': FN,
'point/anomalies': anomalies,
'point/detected_anomalies': detected_anomalies,
'point/segments': segments,
}
[docs]
def point_adjust_precision(self) -> float:
"""Calculates the `point-adjusted <https://arxiv.org/abs/1802.03903>`_
precision. Precision answers the question of how accurate our
predictions are. The point-adjusted precision is calculated in the
same way as the point-wise precision (TP / (TP + FP)). However, the
predictions are adjusted before calculation using the ground-truth.
All predictions for an anomaly are set to 1 if at least one correct
prediction for that anomaly segment exists.
Returns:
float: Returns the point-adjust precision.
"""
TPs = (
np.clip(self._overlap_score_mat_org.sum(axis=1), 0, 1)
* self._labels_max_score
).sum()
FPs = self._preds_max_score.sum() - self._overlap_score_mat_org.sum()
return TPs / (TPs + FPs + eps)
[docs]
def point_adjust_recall(self) -> float:
"""Calculates the `point-adjusted <https://arxiv.org/abs/1802.03903>`_
recall. Recall answers the question of how much of anomaly is detected.
The point-adjusted recall is calculated in the same way as the
point-wise recall (TP / (TP + FN)). However, the predictions are
adjusted before calculation using the ground-truth. All predictions
for an anomaly are set to 1 if at least one correct
prediction for that anomaly segment exists.
Returns:
float: Reutrns the point-adjusted recall.
"""
TPs = (
np.clip(self._overlap_score_mat_org.sum(axis=1), 0, 1)
* self._labels_max_score
).sum()
return TPs / (self._labels_max_score.sum() + eps)
[docs]
def point_adjust_scores(self) -> dict[str, float]:
"""Calculates the `point-adjusted <https://arxiv.org/abs/1802.03903>`_
recall, precision, and f1. The metrics are calculated in the same way
as the point-wise scores but the predictions are adjusted before
calculation using the ground-truth. All predictions for an anomaly are
set to 1 if at least one correct prediction for that anomaly segment
exists.
Returns:
dict[str, float]: Returns the point-adjusted scores:
- ``point_adjust/recall``: point-adjusted recall
- ``point_adjust/precision``: point-adjusted precision
- ``point_adjust/f1``: point-adjusted f1
"""
precision = self.point_adjust_precision()
recall = self.point_adjust_recall()
f1 = self.f1(precision, recall)
return {
'point_adjust/recall': recall,
'point_adjust/precision': precision,
'point_adjust/f1': f1,
}
[docs]
@classmethod
def from_preds(
cls,
y_hat: npt.ArrayLike,
y: npt.ArrayLike,
*,
theta_p: float = 0.5,
theta_r: float = 0.1,
) -> 'eTaMetrics':
"""Creates an instance from point-wise predictions and labels.
Args:
y_hat (npt.ArrayLike): Predictions (point-wise).
y (npt.ArrayLike): Labels (point-wise).
theta_p (float, optional): Precision threshold. Only those
predictions who overlap with at least `theta_p` with a detected
anomaly are counted as correct. Defaults to 0.5.
theta_r (float, optional): Recall threshold. Only those anomalies
which overlap at least `theta_r` with an correct prediction are
counted as detected. Defaults to 0.1.
Returns:
eTaMetrics: Returns an instance.
"""
y, y_hat = np.squeeze(y), np.squeeze(y_hat)
check_ndim(y, y_hat, ndim=1)
check_shape(y, y_hat)
preds = mlnext.find_anomalies(y_hat)
labels = mlnext.find_anomalies(y)
eta = eTaMetrics(preds, labels, theta_p=theta_p, theta_r=theta_r)
return eta
[docs]
def evaluate_from_preds(
y_hat: npt.ArrayLike,
y: npt.ArrayLike,
*,
theta_p: float = 0.5,
theta_r: float = 0.1,
) -> dict[str, float | int]:
"""Calculates the `enhanced time-aware (eTa)
<https://dl.acm.org/doi/10.1145/3477314.3507024>`_, point-wise, and
`point-adjusted <https://arxiv.org/abs/1802.03903>`_ performance
metrics (and some other miscellaneous metrics). To see how these
metrics are calculated, check out the respective methods in
:class:`.eTaMetrics`.
Args:
y_hat (npt.ArrayLike): Predictions (point-wise).
y (npt.ArrayLike): Labels (point-wise).
theta_p (float, optional): Precision threshold. Only those
predictions who overlap with at least `theta_p` with a detected
anomaly are counted as correct. Defaults to 0.5.
theta_r (float, optional): Recall threshold. Only those anomalies
which overlap at least `theta_r` with an correct prediction are
counted as detected. Defaults to 0.1.
Returns:
dict[str, float | int]: Returns a mapping with all metrics:
- ``eta/recall``: eTa recall score
- ``eta/recall_detection``: detection score of the recall
- ``eta/recall_portion``: portion score of the recall
- ``eta/detected_anomalies``: number of detected anomalies
- ``eta/precision``: eTa precision score
- ``eta/precision_detection``: detection score of the precision
- ``eta/precision_portion``: portion score of the precision
- ``eta/correct_predictions``: number of correct predictions
- ``eta/f1``: f1 score (harmonic mean of precision and recall)
- ``eta/TP``: number of true positives (points counted)
- ``eta/FP``: number of false positives (points counted)
- ``eta/FN``: number of false negatives (points counted)
- ``eta/wrong_predictions``: number of wrong predictions
- ``eta/missed_anomalies``: number of undetected anomalies
- ``eta/anomalies``: total number of anomalies
- ``eta/segments``: percentage of detected anomalies
- ``point/recall``: point-wise recall (TP / (TP + FN))
- ``point/precision``: point-wise precision (TP / (TP + FP))
- ``point/f1``: point-wise f1 score
- ``point/TP``: number of true positives, correctly classified as 1
- ``point/FP``: number of false positive, incorrectly classified as 1
- ``point/FN``: number of false negatives, incorrectly classified as
0
- ``point/anomalies``: total number of anomalies
- ``point/detected_anomalies``: number of detected anomalies (at
least one point detected)
- ``point/segments``: percentage of detected anomalies
- ``point_adjust/recall``: point-adjusted recall
- ``point_adjust/precision``: point-adjusted precision
- ``point_adjust/f1``: point-adjusted f1
Example:
>>> import faster_etapr
>>> faster_etapr.evaluate_from_ranges(
... y_hat=[0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0],
... y= [0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1],
... theta_p=0.5,
... theta_r=0.1,
... )
{
'eta/recall': 0.3875,
'eta/recall_detection': 0.5,
'eta/recall_portion': 0.275,
'eta/detected_anomalies': 2.0,
'eta/precision': 0.46476766302377037,
'eta/precision_detection': 0.46476766302377037,
'eta/precision_portion': 0.46476766302377037,
'eta/correct_predictions': 2.0,
'eta/f1': 0.4226312395393011,
'eta/TP': 4,
'eta/FP': 5,
'eta/FN': 7,
'eta/wrong_predictions': 2,
'eta/missed_anomalies': 2,
'eta/anomalies': 4,
'eta/segments': 0.499999999999875,
'point/recall': 0.45454545454541323,
'point/precision': 0.5555555555554939,
'point/f1': 0.49999999999945494,
'point/TP': 5,
'point/FP': 4,
'point/FN': 6,
'point/anomalies': 4,
'point/detected_anomalies': 3.0,
'point/segments': 0.75,
'point_adjust/recall': 0.9090909090909091,
'point_adjust/precision': 0.7142857142857143,
'point_adjust/f1': 0.7999999999995071
}
"""
eta = eTaMetrics.from_preds(
y_hat=y_hat,
y=y,
theta_p=theta_p,
theta_r=theta_r,
)
return {
**eta.scores(),
**eta.point_scores(),
**eta.point_adjust_scores(),
}
[docs]
def evaluate_from_ranges(
preds: list[tuple[int, int]],
labels: list[tuple[int, int]],
*,
theta_p: float = 0.5,
theta_r: float = 0.1,
) -> dict[str, float | int]:
"""Calculates the `enhanced time-aware (eTa)
<https://dl.acm.org/doi/10.1145/3477314.3507024>`_, point-wise, and
`point-adjusted <https://arxiv.org/abs/1802.03903>`_ performance
metrics (and some other miscellaneous metrics). To see how these
metrics are calculated, check out the respective methods in
:class:`.eTaMetrics`.
Args:
y_hat (list[tuple[int, int]]): Predictions as list of ranges.
y (list[tuple[int, int]]): Labels as list of ranges.
theta_p (float, optional): Precision threshold. Only those
predictions who overlap with at least `theta_p` with a detected
anomaly are counted as correct. Defaults to 0.5.
theta_r (float, optional): Recall threshold. Only those anomalies
which overlap at least `theta_r` with an correct prediction are
counted as detected. Defaults to 0.1.
Returns:
dict[str, float | int]: Returns a mapping with all metrics:
- ``eta/recall``: eTa recall score
- ``eta/recall_detection``: detection score of the recall
- ``eta/recall_portion``: portion score of the recall
- ``eta/detected_anomalies``: number of detected anomalies
- ``eta/precision``: eTa precision score
- ``eta/precision_detection``: detection score of the precision
- ``eta/precision_portion``: portion score of the precision
- ``eta/correct_predictions``: number of correct predictions
- ``eta/f1``: f1 score (harmonic mean of precision and recall)
- ``eta/TP``: number of true positives (points counted)
- ``eta/FP``: number of false positives (points counted)
- ``eta/FN``: number of false negatives (points counted)
- ``eta/wrong_predictions``: number of wrong predictions
- ``eta/missed_anomalies``: number of undetected anomalies
- ``eta/anomalies``: total number of anomalies
- ``eta/segments``: percentage of detected anomalies
- ``point/recall``: point-wise recall (TP / (TP + FN))
- ``point/precision``: point-wise precision (TP / (TP + FP))
- ``point/f1``: point-wise f1 score
- ``point/TP``: number of true positives, correctly classified as 1
- ``point/FP``: number of false positive, incorrectly classified as 1
- ``point/FN``: number of false negatives, incorrectly classified as
0
- ``point/anomalies``: total number of anomalies
- ``point/detected_anomalies``: number of detected anomalies (at
least one point detected)
- ``point/segments``: percentage of detected anomalies
- ``point_adjust/recall``: point-adjusted recall
- ``point_adjust/precision``: point-adjusted precision
- ``point_adjust/f1``: point-adjusted f1
Example:
>>> import faster_etapr
>>> faster_etapr.evaluate_from_ranges(
... y_hat=[(1, 1), (3, 4), (7, 9), (11, 13)],
... y= [(1, 2), (5, 7), (10, 14), (16, 16)],
... theta_p=0.5,
... theta_r=0.1,
... )
{
'eta/recall': 0.3875,
'eta/recall_detection': 0.5,
'eta/recall_portion': 0.275,
'eta/detected_anomalies': 2.0,
'eta/precision': 0.46476766302377037,
'eta/precision_detection': 0.46476766302377037,
'eta/precision_portion': 0.46476766302377037,
'eta/correct_predictions': 2.0,
'eta/f1': 0.4226312395393011,
'eta/TP': 4,
'eta/FP': 5,
'eta/FN': 7,
'eta/wrong_predictions': 2,
'eta/missed_anomalies': 2,
'eta/anomalies': 4,
'eta/segments': 0.499999999999875,
'point/recall': 0.45454545454541323,
'point/precision': 0.5555555555554939,
'point/f1': 0.49999999999945494,
'point/TP': 5,
'point/FP': 4,
'point/FN': 6,
'point/anomalies': 4,
'point/detected_anomalies': 3.0,
'point/segments': 0.75,
'point_adjust/recall': 0.9090909090909091,
'point_adjust/precision': 0.7142857142857143,
'point_adjust/f1': 0.7999999999995071
}
"""
eta = eTaMetrics(
preds=preds,
labels=labels,
theta_p=theta_p,
theta_r=theta_r,
)
return {
**eta.scores(),
**eta.point_scores(),
**eta.point_adjust_scores(),
}