Source code for orion.evaluation.point

from orion.evaluation.common import _accuracy, _f1_score, _precision, _recall, _weighted_segment


def _point_partition(expected, observed, start=None, end=None):
    expected = set(expected)
    observed = set(observed)

    edge_start = min(expected.union(observed))
    if start is not None:
        edge_start = start

    edge_end = max(expected.union(observed))
    if end is not None:
        edge_end = end

    length = int(edge_end) - int(edge_start) + 1

    expected_parts = [0] * length
    observed_parts = [0] * length

    for edge in expected:
        expected_parts[edge - edge_start] = 1

    for edge in observed:
        observed_parts[edge - edge_start] = 1

    return expected_parts, observed_parts, None


[docs]def point_confusion_matrix(expected, observed, data=None, start=None, end=None):
    """Compute the confusion matrix between the ground truth and the detected anomalies.

    Args:
        expected (DataFrame or list of timestamps):
            Ground truth passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        observed (DataFrame or list of timestamps):
            Detected anomalies passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        data (DataFrame):
            Original data, passed as a ``pandas.DataFrame`` containing timestamp.
            Used to extract start and end.
        start (int):
            Minimum timestamp of the original data.
        end (int):
            Maximum timestamp of the original data.

    Returns:
        tuple:
            number of true negative, false positive, false negative, true positive.
    """

    def _ws(x, y, z, w):
        return _weighted_segment(x, y, _point_partition, z, w)

    if data is not None:
        start = data['timestamp'].min()
        end = data['timestamp'].max()

    if not isinstance(expected, list):
        expected = list(expected['timestamp'])
    if not isinstance(observed, list):
        observed = list(observed['timestamp'])

    return _ws(expected, observed, start, end)


[docs]def point_accuracy(expected, observed, data=None, start=None, end=None):
    """Compute an accuracy score between the ground truth and the detected anomalies.

    Args:
        expected (DataFrame or list of timestamps):
            Ground truth passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        observed (DataFrame or list of timestamps):
            Detected anomalies passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        data (DataFrame):
            Original data, passed as a ``pandas.DataFrame`` containing timestamp.
            Used to extract start and end.
        start (int):
            Minimum timestamp of the original data.
        end (int):
            Maximum timestamp of the original data.

    Returns:
        float:
            Accuracy score between the ground truth and detected anomalies.
    """
    return _accuracy(expected, observed, data, start, end, cm=point_confusion_matrix)


[docs]def point_precision(expected, observed, data=None, start=None, end=None):
    """Compute an precision score between the ground truth and the detected anomalies.

    Args:
        expected (DataFrame or list of timestamps):
            Ground truth passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        observed (DataFrame or list of timestamps):
            Detected anomalies passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        data (DataFrame):
            Original data, passed as a ``pandas.DataFrame`` containing timestamp.
            Used to extract start and end.
        start (int):
            Minimum timestamp of the original data.
        end (int):
            Maximum timestamp of the original data.

    Returns:
        float:
            Precision score between the ground truth and detected anomalies.
    """
    return _precision(expected, observed, data, start, end, cm=point_confusion_matrix)


[docs]def point_recall(expected, observed, data=None, start=None, end=None):
    """Compute an recall score between the ground truth and the detected anomalies.

    Args:
        expected (DataFrame or list of timestamps):
            Ground truth passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        observed (DataFrame or list of timestamps):
            Detected anomalies passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        data (DataFrame):
            Original data, passed as a ``pandas.DataFrame`` containing timestamp.
            Used to extract start and end.
        start (int):
            Minimum timestamp of the original data.
        end (int):
            Maximum timestamp of the original data.

    Returns:
        float:
            Recall score between the ground truth and detected anomalies.
    """
    return _recall(expected, observed, data, start, end, cm=point_confusion_matrix)


[docs]def point_f1_score(expected, observed, data=None, start=None, end=None):
    """Compute an f1 score between the ground truth and the detected anomalies.

    Args:
        expected (DataFrame or list of timestamps):
            Ground truth passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        observed (DataFrame or list of timestamps):
            Detected anomalies passed as a ``pandas.DataFrame`` or list containing
            one column: timestamp.
        data (DataFrame):
            Original data, passed as a ``pandas.DataFrame`` containing timestamp.
            Used to extract start and end.
        start (int):
            Minimum timestamp of the original data.
        end (int):
            Maximum timestamp of the original data.

    Returns:
        float:
            F1 score between the ground truth and detected anomalies.
    """
    return _f1_score(expected, observed, data, start, end, cm=point_confusion_matrix)