Source code for frlearn.statistics.dispersion_measures

"""Measures of dispersion of datasets"""

import numpy as np

__all__ = [
    'interquartile_range', 'maximum_absolute_value', 'standard_deviation', 'total_range',
]


[docs]def interquartile_range(X): """ Distance between the first and the third quartile; range of the central half of the data. Parameters ---------- X : np.array Dataset. Should be a two-dimensional array. Returns ------- a: np.array One-dimensional array that contains the interquartile range for each feature. """ quartiles = np.nanpercentile(X, [25, 75], axis=0) return quartiles[1, :] - quartiles[0, :]
[docs]def maximum_absolute_value(X): """ Maximum distance from 0. Parameters ---------- X : np.array Dataset. Should be a two-dimensional array. Returns ------- a: np.array One-dimensional array that contains the maximum absolute value for each feature. """ return np.nanmax(np.abs(X), axis=0)
[docs]def standard_deviation(X): """ Square root of the sum of the squared distances to the mean. Parameters ---------- X : np.array Dataset. Should be a two-dimensional array. Returns ------- a: np.array One-dimensional array that contains the standard deviation for each feature. """ return np.nanstd(X, axis=0)
[docs]def total_range(X): """ Distance between the smallest and largest value. Parameters ---------- X : np.array Dataset. Should be a two-dimensional array. Returns ------- a: np.array One-dimensional array that contains the ranges for each feature. Notes ----- This function is not called `range` so as not to overwrite the built-in function `range`. """ return np.nanmax(X, axis=0) - np.nanmin(X, axis=0)