Source code for faster_etapr.etapr

import einops as eo
import mlnext
import numpy as np
import numpy.typing as npt
from mlnext.utils import check_ndim
from mlnext.utils import check_shape

from .types import eTaPrecision
from .types import eTaRecall
from .utils import check_floats

eps = 1e-16

__all__ = [
    'eTaMetrics',
    'evaluate_from_preds',
    'evaluate_from_ranges',
]



[docs]
class eTaMetrics:
    """Defines the `enhanced time-aware (eTa)
    <https://dl.acm.org/doi/10.1145/3477314.3507024>`_ precision, recall, and
    f1. Moreover, we can also compute the point-wise and
    `point-adjusted <https://arxiv.org/abs/1802.03903>`_ versions.

    For a motivation to use eTaPR check out the documentation.

    Attributes:
        preds (list[tuple[int, int]]): Predictions as a list of ranges.
        labels (list[tuple[int, int]]): Labels as a list of ranges.
        theta_p (float, optional): Precision threshold. Only those
              predictions who overlap with at least `theta_p` with a detected
              anomaly are counted as correct. Defaults to 0.5.
        theta_r (float, optional): Recall threshold. Only those anomalies
            which overlap at least `theta_r` with an correct prediction are
            counted as detected.  Defaults to 0.1.
    """

    def __init__(
        self,
        preds: list[tuple[int, int]],
        labels: list[tuple[int, int]],
        *,
        theta_p: float = 0.5,
        theta_r: float = 0.1,
    ):
        check_floats(
            ('theta_p', theta_p),
            ('theta_r', theta_r),
            min=0,
            max=1,
        )

        self.theta_p = theta_p
        self.theta_r = theta_r

        self.preds = np.array(preds)
        self.labels = np.array(labels)

        self._pred_weights = (
            np.sqrt(self.preds[:, 1] + 1 - self.preds[:, 0])
            if len(preds) > 0
            else np.zeros((1,))
        )
        self._overlap_score_mat_org = self._calculate_overlap_score_mat()
        self._overlap_score_mat = self._overlap_score_mat_org.copy()

        self._labels_max_score = (
            self.labels[:, 1] + 1 - self.labels[:, 0]
            if len(labels) > 0
            else np.zeros((1,))
        )
        self._preds_max_score = (
            self.preds[:, 1] + 1 - self.preds[:, 0]
            if len(preds) > 0
            else np.zeros((1,))
        )

        self._pruning()

    def _calculate_overlap_score_mat(self):
        """Calculation of the overlap matrix (n_anomalies, n_predictions).
        A row represents the overlap of predictions with one anomaly.
        If we add the values col-wise (sum(axis=1)), then we get overlap of
        an anomaly (each row) with all predictions. If we add the values
        row-wise (sum(axis=0)), then we get the overlap of a prediction
        (each col) with all anomalies.
        """

        len_l, len_p = len(self.labels), len(self.preds)

        if len_l == 0 or len_p == 0:
            return np.zeros((1, 1))

        labels_matrix = eo.repeat(self.labels, 'l r -> l p r', p=len_p)
        preds_matrix = eo.repeat(self.preds, 'p r -> l p r', l=len_l)

        detected_starts = np.maximum(
            labels_matrix[..., 0],
            preds_matrix[..., 0],
        )
        detected_ends = np.minimum(
            labels_matrix[..., 1],
            preds_matrix[..., 1],
        )

        overlap_score_mat = np.clip(
            detected_ends + 1 - detected_starts,
            a_min=0,
            a_max=None,
        )

        return overlap_score_mat

    def _pruning(self):
        """Pruning of the overlap matrix. In this process, we eliminate
        rows / cols from the matrix such that only predictions/anomalies remain
        which belong to the set of correct predictions and detected anomalies.
        """

        if len(self.labels) == 0 or len(self.preds) == 0:
            return

        while True:
            labels_portion = self._overlap_score_mat.sum(axis=1) / (
                self._labels_max_score
            )
            label_ids = list(
                set(np.where(labels_portion < self.theta_r)[0])
                - set(np.where(labels_portion == 0.0)[0])
            )
            if label_ids:
                self._overlap_score_mat[label_ids] = np.zeros(
                    (len(label_ids), self._overlap_score_mat.shape[1])
                )

            preds_portion = self._overlap_score_mat.sum(axis=0) / (
                self._preds_max_score
            )

            pred_ids = list(
                set(np.where(preds_portion < self.theta_p)[0])
                - set(np.where(preds_portion == 0.0)[0])
            )
            if pred_ids:
                self._overlap_score_mat[..., pred_ids] = np.zeros(
                    (self._overlap_score_mat.shape[0], len(pred_ids))
                )

            if len(label_ids) == 0 and len(pred_ids) == 0:
                break


[docs]
    def recall(self) -> eTaRecall:
        """Calculates the `enhanced time-aware recall (eTaR)
        <https://dl.acm.org/doi/10.1145/3477314.3507024>`_. Recall answers the
        question of "How much of anomalies is detected?"

        The recall :math:`\\mathrm{RC}^\\mathrm{eTa}` is calculated as a
        combination of the detection score :math:`s^\\mathrm{RD}` and the
        portion score :math:`s^\\mathrm{RP}` as follows:

        .. math::

            \\mathrm{RC}^{\\mathrm{eTa}}(\\tilde{\\mathbf{y}}, \\mathbf{y})
            \\triangleq
            \\frac{1}{|\\mathcal{A}|}
            \\sum_{A_i \\in \\mathcal{A}}
            \\frac{
                s^{\\mathrm{RD}}(A_i) + s^{\\mathrm{RD}}(A_i)
                \\cdot s^{\\mathrm{RP}}(A_i)
            }{2}

        where :math:`\\tilde{\\mathbf{y}}` are the predictions,
        :math:`\\mathbf{y}` the labels, :math:`A_i` an anomaly, and
        :math:`\\mathcal{A}` the set of all anomalies. The recall
        :math:`\\mathrm{RC}^\\mathrm{eTa}` is the average over all anomaly
        segments :math:`\\mathcal{A}`, but only those anomalies
        :math:`A_i` contribute to the overall score which belong to the set
        of the detected anomalies :math:`\\mathcal{A}^D`. Thus, the recall is
        a measure of how well we can anomaly segments.

        The detection score :math:`s^\\mathrm{RD}` of a anomaly :math:`A_i`
        is defined as:

        .. math::

            s^{\\mathrm{RD}}(A_i) = \\begin{cases}
            1, & \\text{if $A_i \\in \\mathcal{A}^D$}\\\\
            0, & \\text{otherwise},
            \\end{cases}

        where :math:`\\mathcal{A}^D` is the set of detected anomalies. An
        anomaly :math:`A_i` belongs to this set, if the overlapped portion
        with a correct prediction :math:`P_j \\in \\mathcal{P}^C` is greater than
        :math:`\\theta_r`. Hence, the detection score :math:`s^\\mathrm{RD}` indicates
        whether an anomaly :math:`A_i` is detected or not.

        The portion score :math:`s^\\mathrm{RP}` is the proportion of an
        anomaly :math:`A_i` which intersects with a correct prediction
        :math:`P_j \\in \\mathcal{P}^C`. Mathematically defined as follows,

        .. math::

            s^{\\mathrm{RP}}(\\mathbf{A}_i) =
            \\frac{
                \\sum_{\\mathbf{P}_j \\in \\mathcal{P}^C}
                |\\mathbf{A}_i \\cap \\mathbf{P}_j|
            }{
                |\\mathbf{A}_i|
            }.

        Returns:
            eTaRecall: Returns a namedtuple containing the
              - precision
              - detection score
              - portion score
              - number of correct predictions

        """

        if len(self.labels) == 0 or len(self.preds) == 0:
            return eTaRecall(0.0, 0.0, 0.0, 0)

        rec_portion = self._overlap_score_mat.sum(axis=1) / (
            self._labels_max_score
        )

        detection_scores = np.where(rec_portion >= self.theta_r, 1.0, 0.0)
        detection_score = detection_scores.sum() / len(detection_scores)

        portion_scores = np.clip(rec_portion, a_min=0.0, a_max=1.0)
        portion_score = portion_scores.mean()

        recall = (
            (detection_scores + detection_scores * portion_scores) / 2
        ).mean()

        detected_segments = detection_scores.sum()

        return eTaRecall(
            recall,
            detection_score,
            portion_score,
            detected_segments,
        )



[docs]
    def precision(self) -> eTaPrecision:
        """Calculates the `enhanced time-aware precision (eTaP)
        <https://dl.acm.org/doi/10.1145/3477314.3507024>`_. Precision
        answers the question of "How many predictions (for anomalies) concern
        real anomalies?".

        The precision :math:`\\mathrm{PR}^\\mathrm{eTa}` is calculated as a
        combination of the detection score :math:`s^\\mathrm{PD}` and the
        portion score :math:`s^\\mathrm{PP}` as follows:

        .. math::

            \\mathrm{PR}^{\\mathrm{eTa}}(\\tilde{\\mathbf{y}}, \\mathbf{y})
            \\triangleq
            \\sum_{P_j \\in \\mathcal{P}} \\left(
                \\frac{s^{\\mathrm{PD}}(P_j) +
                s^{\\mathrm{PD}}(P_j) \\cdot
                s^{\\mathrm{PP}}(P_j)}{2}
            \\right) \\cdot w_{p},

        where :math:`\\tilde{\\mathbf{y}}` are the predictions,
        :math:`\\mathbf{y}` the labels, :math:`P_j` a prediction,
        :math:`\\mathcal{P}` the set of all predictions and :math:`w_{p}` a
        weight for the prediction,

        .. math::

            w_p = \\frac{
                \\sqrt{|P_j|}
            }{
                \\sum_{P_i \\in \mathcal{P}} \sqrt{|P_i|}
            }

        The overall square roots of the lengths of all predictions
        :math:`\sum_{\mathbf{Q} \in \mathcal{P}} \sqrt{|\mathbf{Q}|}` restricts
        the precision score the range [0, 1]. Furthermore, it penalizes the
        detection method for lengthy and frequent incorrect predictions.

        The detection score :math:`s^\\mathrm{PD}` of a prediction :math:`P_j`
        is defined as:

        .. math::

            s^{\\mathrm{PD}}(P_j) = \\begin{cases}
            1, & \\text{if $P_j \\in \\mathcal{P}^C$} \\\\
            0, & \\text{otherwise},
            \\end{cases}

        where :math:`\\mathcal{P}^C` is the set of correct predictions. A
        prediction :math:`P_j` belongs to this set, if at least
        :math:`\\theta_p` of the prediction :math:`P_j` overlaps with a
        detected anomaly :math:`A_i \\in \\mathcal{A}^D`.

        Thus, a prediction :math:`P_j` can only contribute if it is precise
        enough and belongs to the set of correct predictions
        :math:`\\mathcal{P}^C`. Over all predictions :math:`\\mathcal{P}`,
        it is the ratio of correct predictions :math:`\\mathcal{P}^C` to the
        number of all predictions :math:`\\mathcal{P}`, i.e.,
        :math:`\\frac{|\\mathcal{P}^C|}{|\\mathcal{P}|}`.

        The portion score :math:`s^\\mathrm{PP}` is proportion of the
        overlapping parts with a detected anomaly :math:`A_i`:

        .. math::

            s^\\mathrm{PP}(P_j) = \\frac{
                \\sum_{A_i \\in \\mathcal{A}} | A_i \\cap P_j |
            }{
                | P_j |
            }

        Thus, the precision :math:`\\mathrm{PR}^\\mathrm{eTa}` is a measure of
        the quality of the predictions. Only relevant predictions :math:`P_j`,
        i.e., whose overlapping portions are greater than :math:`\\theta_p`,
        can directly contribute to the overall score. However, incorrect
        predictions :math:`P_j \\notin \\mathcal{P}^C` can impact the score
        through the weighting scheme :math:`w_p`.

        Returns:
            eTaPrecision: Returns a namedtuple containing the
              - precision
              - detection score
              - portion score
              - number of correct predictions

        """
        if len(self.labels) == 0 or len(self.preds) == 0:
            return eTaPrecision(0.0, 0.0, 0.0, 0)

        preds_portion = (
            self._overlap_score_mat.sum(axis=0) / self._preds_max_score
        )
        weight = self._pred_weights / self._pred_weights.sum()

        detection_scores = np.where(preds_portion >= self.theta_p, 1.0, 0.0)
        detection_score = (detection_scores * weight).sum()

        portion_scores = np.clip(preds_portion, a_min=0.0, a_max=1.0)
        portion_score = (portion_scores * weight).sum()

        precision = (detection_scores + detection_scores * portion_scores) / 2
        precision = (precision * weight).sum()

        correct_predictions = detection_scores.sum()

        return eTaPrecision(
            precision,
            detection_score,
            portion_score,
            correct_predictions,
        )



[docs]
    def f1(self, precision: float, recall: float) -> float:
        """Calculates the F1 score from `precision` and `recall` as the
        harmonic mean:

        .. math::

            \\mathrm{F1} \\triangleq 2 \\frac{
                \\mathrm{PR} \\cdot \\mathrm{RC}
            }{
                \\mathrm{PR} + \\mathrm{RC}
            }

        Args:
            precision (float): Precision score.
            recall (float): Recall score.

        Returns:
            float: Returns the F1 score.
        """
        return (2 * precision * recall) / (precision + recall + eps)



[docs]
    def scores(self) -> dict[str, float | int]:
        """Calculates the enhanced time-aware (eTa) scores. All keys in the
        result mapping are prefixed with ``eta/``.

        Returns:
            dict[str, float | int]: Returns a mapping containing:
              - ``eta/recall``: recall score
              - ``eta/recall_detection``: detection score of the recall
              - ``eta/recall_portion``: portion score of the recall
              - ``eta/detected_anomalies``: number of detected anomalies
              - ``eta/precision``: precision score
              - ``eta/precision_detection``: detection score of the precision
              - ``eta/precision_portion``: portion score of the precision
              - ``eta/correct_predictions``: number of correct predictions
              - ``eta/f1``: f1 score (harmonic mean of precision and recall)
              - ``eta/TP``: number of true positives (points counted)
              - ``eta/FP``: number of false positives (points counted)
              - ``eta/FN``: number of false negatives (points counted)
              - ``eta/wrong_predictions``: number of wrong predictions
              - ``eta/missed_anomalies``: number of undetected anomalies
              - ``eta/anomalies``: total number of anomalies
              - ``eta/segments``: percentage of detected anomalies
        """

        eTaR = self.recall()
        eTaP = self.precision()

        eTaF1 = self.f1(eTaP.value, eTaR.value)

        TP_point = self._overlap_score_mat.sum()
        TP_range = np.count_nonzero(self._overlap_score_mat.sum(axis=0))

        FP_point = self._preds_max_score.sum() - TP_point
        FP_range = len(self.preds) - TP_range

        FN_point = self._labels_max_score.sum() - TP_point
        FN_range = len(self.labels) - TP_range

        anomalies = len(self.labels)
        detected_anomalies = np.count_nonzero(
            self._overlap_score_mat.sum(axis=1)
        )
        segments = detected_anomalies / (anomalies + eps)

        return {
            **eTaR._asdict(),
            **eTaP._asdict(),
            'eta/f1': eTaF1,
            'eta/TP': TP_point,
            'eta/FP': FP_point,
            'eta/FN': FN_point,
            'eta/wrong_predictions': FP_range,
            'eta/missed_anomalies': FN_range,
            'eta/anomalies': anomalies,
            'eta/segments': segments,
        }



[docs]
    def point_precision(self) -> float:
        """Calculates the point-wise precision score. Precision answers the
        question of "How many predictions (for anomalies) concern real
        anomalies?". In a point-wise manner, we categorize each prediction
        into true positives (TP), false positives (FP), true negative (TN),
        and false negatives (TN). Then we can calculate the precision as
        as ``TP / (TP + FP)``.

        Returns:
            float: Returns the point-wise precision.
        """
        return self._overlap_score_mat_org.sum() / (
            self._preds_max_score.sum() + eps
        )



[docs]
    def point_recall(self) -> float:
        """Calculates the point-wise recall score. Recall answers the question
        of "How much of anomalies is detected?".In a point-wise manner, we
        categorize each prediction into true positives (TP), false positives
        (FP), true negative (TN), and false negatives (TN). Then we can
        calculate the recall as ``TP / (TP + FN)``.

        Returns:
            float: Returns the point-wise recall.
        """

        return self._overlap_score_mat_org.sum() / (
            self._labels_max_score.sum() + eps
        )



[docs]
    def point_scores(self) -> dict[str, float | int]:
        """Calculates the point-wise (traditional) scores. Each data point can
        be categorized as either true positive (TP), false positive (FP),
        true negative (TN) or false negative (FN). Then, we can calculate the
        metrics as follows:

        .. math::
            :nowrap:

            \\begin{align*}
            \\mathrm{RC}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
            \\triangleq \\frac{\\mathrm{TP}}{\\mathrm{TP} + \\mathrm{FN}} \\\\

            \\mathrm{PR}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
            \\triangleq \\frac{\\mathrm{TP}}{\\mathrm{TP} + \\mathrm{FP}} \\\\

            \\mathrm{F1}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
            \\triangleq 2 \\frac{\\mathrm{PR}^{\\mathrm{P}} \\cdot
            \\mathrm{RC}^{\\mathrm{P}}}{\\mathrm{PR}^{\\mathrm{P}} +
            \\mathrm{RC}^{\\mathrm{P}}} = \\frac{2 \\mathrm{TP}}{2\\mathrm{TP}
            + \\mathrm{FP} + \\mathrm{FN}}\\\\

            \\mathrm{SEG}^{\\mathrm{P}}(\\tilde{\\mathbf{y}}, \\mathbf{y}) &
            \\triangleq
            \\sum_{\\mathbf{A}_i \\in \\mathcal{A}} \\mathbb{1}(
            \\sum_{\\mathbf{P}_j \\in \\mathcal{P}} |\\mathbf{P}_j \\cap
            \\mathbf{A}_i| > 0)
            \\end{align*}

        All keys in the return mapping are prefixed with ``point/``.

        Returns:
            dict[str, float | int]: Returns a mapping containing:
              - ``point/recall``: point-wise recall (TP / (TP + FN))
              - ``point/precision``: point-wise precision (TP / (TP + FP))
              - ``point/f1``: point-wise f1 score
              - ``point/TP``: number of true positives, correctly classified as 1
              - ``point/FP``: number of false positive, incorrectly classified as 1
              - ``point/FN``: number of false negatives, incorrectly classified as
                0
              - ``point/anomalies``: total number of anomalies
              - ``point/detected_anomalies``: number of detected anomalies (at
                least one point detected)
              - ``point/segments``: percentage of detected anomalies
        """
        recall = self.point_recall()
        precision = self.point_precision()

        f1 = self.f1(precision, recall)

        TP = self._overlap_score_mat_org.sum()
        FP = self._preds_max_score.sum() - TP
        FN = self._labels_max_score.sum() - TP

        anomalies = len(self.labels)
        detected_anomalies = np.where(
            self._overlap_score_mat_org.sum(axis=1) > 0, 1.0, 0
        ).sum()
        segments = detected_anomalies / (anomalies + eps)

        return {
            'point/recall': recall,
            'point/precision': precision,
            'point/f1': f1,
            'point/TP': TP,
            'point/FP': FP,
            'point/FN': FN,
            'point/anomalies': anomalies,
            'point/detected_anomalies': detected_anomalies,
            'point/segments': segments,
        }



[docs]
    def point_adjust_precision(self) -> float:
        """Calculates the `point-adjusted <https://arxiv.org/abs/1802.03903>`_
        precision. Precision answers the question of how accurate our
        predictions are. The point-adjusted precision is calculated in the
        same way as the point-wise precision (TP / (TP + FP)). However, the
        predictions are adjusted before calculation using the ground-truth.
        All predictions for an anomaly are set to 1 if at least one correct
        prediction for that anomaly segment exists.

        Returns:
            float: Returns the point-adjust precision.
        """
        TPs = (
            np.clip(self._overlap_score_mat_org.sum(axis=1), 0, 1)
            * self._labels_max_score
        ).sum()

        FPs = self._preds_max_score.sum() - self._overlap_score_mat_org.sum()

        return TPs / (TPs + FPs + eps)



[docs]
    def point_adjust_recall(self) -> float:
        """Calculates the `point-adjusted <https://arxiv.org/abs/1802.03903>`_
        recall. Recall answers the question of how much of anomaly is detected.
        The point-adjusted recall is calculated in the same way as the
        point-wise recall (TP / (TP + FN)). However, the predictions are
        adjusted before calculation using the ground-truth. All predictions
        for an anomaly are set to 1 if at least one correct
        prediction for that anomaly segment exists.

        Returns:
            float: Reutrns the point-adjusted recall.
        """
        TPs = (
            np.clip(self._overlap_score_mat_org.sum(axis=1), 0, 1)
            * self._labels_max_score
        ).sum()

        return TPs / (self._labels_max_score.sum() + eps)



[docs]
    def point_adjust_scores(self) -> dict[str, float]:
        """Calculates the `point-adjusted <https://arxiv.org/abs/1802.03903>`_
        recall, precision, and f1. The metrics are calculated in the same way
        as the point-wise scores but the predictions are adjusted before
        calculation using the ground-truth. All predictions for an anomaly are
        set to 1 if at least one correct prediction for that anomaly segment
        exists.

        Returns:
            dict[str, float]: Returns the point-adjusted scores:
              - ``point_adjust/recall``: point-adjusted recall
              - ``point_adjust/precision``: point-adjusted precision
              - ``point_adjust/f1``: point-adjusted f1
        """
        precision = self.point_adjust_precision()
        recall = self.point_adjust_recall()

        f1 = self.f1(precision, recall)

        return {
            'point_adjust/recall': recall,
            'point_adjust/precision': precision,
            'point_adjust/f1': f1,
        }



[docs]
    @classmethod
    def from_preds(
        cls,
        y_hat: npt.ArrayLike,
        y: npt.ArrayLike,
        *,
        theta_p: float = 0.5,
        theta_r: float = 0.1,
    ) -> 'eTaMetrics':
        """Creates an instance from point-wise predictions and labels.

        Args:
            y_hat (npt.ArrayLike): Predictions (point-wise).
            y (npt.ArrayLike): Labels (point-wise).
            theta_p (float, optional): Precision threshold. Only those
              predictions who overlap with at least `theta_p` with a detected
              anomaly are counted as correct. Defaults to 0.5.
            theta_r (float, optional): Recall threshold. Only those anomalies
              which overlap at least `theta_r` with an correct prediction are
              counted as detected.  Defaults to 0.1.

        Returns:
            eTaMetrics: Returns an instance.
        """

        y, y_hat = np.squeeze(y), np.squeeze(y_hat)
        check_ndim(y, y_hat, ndim=1)
        check_shape(y, y_hat)

        preds = mlnext.find_anomalies(y_hat)
        labels = mlnext.find_anomalies(y)

        eta = eTaMetrics(preds, labels, theta_p=theta_p, theta_r=theta_r)

        return eta





[docs]
def evaluate_from_preds(
    y_hat: npt.ArrayLike,
    y: npt.ArrayLike,
    *,
    theta_p: float = 0.5,
    theta_r: float = 0.1,
) -> dict[str, float | int]:
    """Calculates the `enhanced time-aware (eTa)
    <https://dl.acm.org/doi/10.1145/3477314.3507024>`_, point-wise, and
    `point-adjusted <https://arxiv.org/abs/1802.03903>`_ performance
    metrics (and some other miscellaneous metrics). To see how these
    metrics are calculated, check out the respective methods in
    :class:`.eTaMetrics`.

    Args:
        y_hat (npt.ArrayLike): Predictions (point-wise).
        y (npt.ArrayLike): Labels (point-wise).
        theta_p (float, optional): Precision threshold. Only those
          predictions who overlap with at least `theta_p` with a detected
          anomaly are counted as correct. Defaults to 0.5.
        theta_r (float, optional): Recall threshold. Only those anomalies
          which overlap at least `theta_r` with an correct prediction are
          counted as detected.  Defaults to 0.1.

    Returns:
        dict[str, float | int]: Returns a mapping with all metrics:
          - ``eta/recall``: eTa recall score
          - ``eta/recall_detection``: detection score of the recall
          - ``eta/recall_portion``: portion score of the recall
          - ``eta/detected_anomalies``: number of detected anomalies
          - ``eta/precision``: eTa precision score
          - ``eta/precision_detection``: detection score of the precision
          - ``eta/precision_portion``: portion score of the precision
          - ``eta/correct_predictions``: number of correct predictions
          - ``eta/f1``: f1 score (harmonic mean of precision and recall)
          - ``eta/TP``: number of true positives (points counted)
          - ``eta/FP``: number of false positives (points counted)
          - ``eta/FN``: number of false negatives (points counted)
          - ``eta/wrong_predictions``: number of wrong predictions
          - ``eta/missed_anomalies``: number of undetected anomalies
          - ``eta/anomalies``: total number of anomalies
          - ``eta/segments``: percentage of detected anomalies
          - ``point/recall``: point-wise recall (TP / (TP + FN))
          - ``point/precision``: point-wise precision (TP / (TP + FP))
          - ``point/f1``: point-wise f1 score
          - ``point/TP``: number of true positives, correctly classified as 1
          - ``point/FP``: number of false positive, incorrectly classified as 1
          - ``point/FN``: number of false negatives, incorrectly classified as
            0
          - ``point/anomalies``: total number of anomalies
          - ``point/detected_anomalies``: number of detected anomalies (at
            least one point detected)
          - ``point/segments``: percentage of detected anomalies
          - ``point_adjust/recall``: point-adjusted recall
          - ``point_adjust/precision``: point-adjusted precision
          - ``point_adjust/f1``: point-adjusted f1

    Example:

        >>> import faster_etapr
        >>> faster_etapr.evaluate_from_ranges(
        ...     y_hat=[0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0],
        ...     y=    [0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1],
        ...     theta_p=0.5,
        ...     theta_r=0.1,
        ... )
        {
            'eta/recall': 0.3875,
            'eta/recall_detection': 0.5,
            'eta/recall_portion': 0.275,
            'eta/detected_anomalies': 2.0,
            'eta/precision': 0.46476766302377037,
            'eta/precision_detection': 0.46476766302377037,
            'eta/precision_portion': 0.46476766302377037,
            'eta/correct_predictions': 2.0,
            'eta/f1': 0.4226312395393011,
            'eta/TP': 4,
            'eta/FP': 5,
            'eta/FN': 7,
            'eta/wrong_predictions': 2,
            'eta/missed_anomalies': 2,
            'eta/anomalies': 4,
            'eta/segments': 0.499999999999875,
            'point/recall': 0.45454545454541323,
            'point/precision': 0.5555555555554939,
            'point/f1': 0.49999999999945494,
            'point/TP': 5,
            'point/FP': 4,
            'point/FN': 6,
            'point/anomalies': 4,
            'point/detected_anomalies': 3.0,
            'point/segments': 0.75,
            'point_adjust/recall': 0.9090909090909091,
            'point_adjust/precision': 0.7142857142857143,
            'point_adjust/f1': 0.7999999999995071
        }

    """
    eta = eTaMetrics.from_preds(
        y_hat=y_hat,
        y=y,
        theta_p=theta_p,
        theta_r=theta_r,
    )

    return {
        **eta.scores(),
        **eta.point_scores(),
        **eta.point_adjust_scores(),
    }




[docs]
def evaluate_from_ranges(
    preds: list[tuple[int, int]],
    labels: list[tuple[int, int]],
    *,
    theta_p: float = 0.5,
    theta_r: float = 0.1,
) -> dict[str, float | int]:
    """Calculates the `enhanced time-aware (eTa)
    <https://dl.acm.org/doi/10.1145/3477314.3507024>`_, point-wise, and
    `point-adjusted <https://arxiv.org/abs/1802.03903>`_ performance
    metrics (and some other miscellaneous metrics). To see how these
    metrics are calculated, check out the respective methods in
    :class:`.eTaMetrics`.

    Args:
        y_hat (list[tuple[int, int]]): Predictions as list of ranges.
        y (list[tuple[int, int]]): Labels as list of ranges.
        theta_p (float, optional): Precision threshold. Only those
          predictions who overlap with at least `theta_p` with a detected
          anomaly are counted as correct. Defaults to 0.5.
        theta_r (float, optional): Recall threshold. Only those anomalies
          which overlap at least `theta_r` with an correct prediction are
          counted as detected.  Defaults to 0.1.

    Returns:
        dict[str, float | int]: Returns a mapping with all metrics:
          - ``eta/recall``: eTa recall score
          - ``eta/recall_detection``: detection score of the recall
          - ``eta/recall_portion``: portion score of the recall
          - ``eta/detected_anomalies``: number of detected anomalies
          - ``eta/precision``: eTa precision score
          - ``eta/precision_detection``: detection score of the precision
          - ``eta/precision_portion``: portion score of the precision
          - ``eta/correct_predictions``: number of correct predictions
          - ``eta/f1``: f1 score (harmonic mean of precision and recall)
          - ``eta/TP``: number of true positives (points counted)
          - ``eta/FP``: number of false positives (points counted)
          - ``eta/FN``: number of false negatives (points counted)
          - ``eta/wrong_predictions``: number of wrong predictions
          - ``eta/missed_anomalies``: number of undetected anomalies
          - ``eta/anomalies``: total number of anomalies
          - ``eta/segments``: percentage of detected anomalies
          - ``point/recall``: point-wise recall (TP / (TP + FN))
          - ``point/precision``: point-wise precision (TP / (TP + FP))
          - ``point/f1``: point-wise f1 score
          - ``point/TP``: number of true positives, correctly classified as 1
          - ``point/FP``: number of false positive, incorrectly classified as 1
          - ``point/FN``: number of false negatives, incorrectly classified as
            0
          - ``point/anomalies``: total number of anomalies
          - ``point/detected_anomalies``: number of detected anomalies (at
            least one point detected)
          - ``point/segments``: percentage of detected anomalies
          - ``point_adjust/recall``: point-adjusted recall
          - ``point_adjust/precision``: point-adjusted precision
          - ``point_adjust/f1``: point-adjusted f1

    Example:

        >>> import faster_etapr
        >>> faster_etapr.evaluate_from_ranges(
        ...     y_hat=[(1, 1), (3, 4), (7, 9), (11, 13)],
        ...     y=    [(1, 2), (5, 7), (10, 14), (16, 16)],
        ...     theta_p=0.5,
        ...     theta_r=0.1,
        ... )
        {
            'eta/recall': 0.3875,
            'eta/recall_detection': 0.5,
            'eta/recall_portion': 0.275,
            'eta/detected_anomalies': 2.0,
            'eta/precision': 0.46476766302377037,
            'eta/precision_detection': 0.46476766302377037,
            'eta/precision_portion': 0.46476766302377037,
            'eta/correct_predictions': 2.0,
            'eta/f1': 0.4226312395393011,
            'eta/TP': 4,
            'eta/FP': 5,
            'eta/FN': 7,
            'eta/wrong_predictions': 2,
            'eta/missed_anomalies': 2,
            'eta/anomalies': 4,
            'eta/segments': 0.499999999999875,
            'point/recall': 0.45454545454541323,
            'point/precision': 0.5555555555554939,
            'point/f1': 0.49999999999945494,
            'point/TP': 5,
            'point/FP': 4,
            'point/FN': 6,
            'point/anomalies': 4,
            'point/detected_anomalies': 3.0,
            'point/segments': 0.75,
            'point_adjust/recall': 0.9090909090909091,
            'point_adjust/precision': 0.7142857142857143,
            'point_adjust/f1': 0.7999999999995071
        }

    """
    eta = eTaMetrics(
        preds=preds,
        labels=labels,
        theta_p=theta_p,
        theta_r=theta_r,
    )

    return {
        **eta.scores(),
        **eta.point_scores(),
        **eta.point_adjust_scores(),
    }