Source code for frlearn.uncategorised.vector_size_measures

from dataclasses import dataclass

import numpy as np


[docs]@dataclass(frozen=True) class MinkowskiSize: """ Family of vector size measures of the form `(x1**p + x2**p + ... + xm**p)**(1/p)` (if `unrooted = False`), or `(x1**p + x2**p + ... + xm**p)` (if `unrooted = True`), for `0 < p < ∞`, and their limits in 0 and ∞. For `p = 0`, the rooted variant evaluates to ∞ if there is more than one non-zero coefficient, to 0 if all coefficients are zero, and to the only non-zero coefficient otherwise. The unrooted variant is equal to the number of non-zero coefficients. For `p = ∞`, the rooted variant is the maximum of all coefficients. The unrooted variant evaluates to ∞ if there is at least one coefficient larger than 1, and to the number of coefficients equal to 1 otherwise. Parameters ---------- p: float = 1 Exponent to use. Must be in `[0, ∞]`. unrooted: bool = False Whether to omit the root `**(1/p)` from the formula. For `p = 0`, this gives Hamming size. For `p = 2`, this gives squared Euclidean size. scale_by_dimensionality: bool = False If `True`, values are scaled linearly such that the vector `[1, 1, ..., 1]` has size 1. This can be used to ensure that the range of dissimilarity values in the unit hypercube is `[0, 1]`, which can be useful when working with features scaled to `[0, 1]`. Notes ----- The most used parameter combinations have their own name. * Hamming size is unrooted `p = 0`. * The Boscovich norm is `p = 1`. Also known as cityblock, Manhattan or Taxicab norm. * The Euclidean norm is rooted `p = 2`. Also known as Pythagorean norm. * Squared Euclidean size is unrooted `p = 2`. * The Chebishev norm is rooted `p = ∞`. Also known as chessboard or maximum norm. """ p: float unrooted: bool = False scale_by_dimensionality: bool = False def __post_init__(self): if self.p < 0: raise ValueError('`p` must be in `[0, ∞]`') def __call__(self, u, axis=-1): if self.p == 0: if self.unrooted: result = np.count_nonzero(u, axis=axis) else: result = np.where(np.count_nonzero(u, axis=axis) <= 1, np.sum(np.abs(u), axis=axis), np.inf) elif self.p == 1: result = np.sum(np.abs(u), axis=axis) elif self.p == np.inf: if self.unrooted: result = np.sum(np.where(np.abs(u) < 1, 0, np.where(np.abs(u) > 1, np.inf, 1)), axis=axis) else: result = np.max(u, axis=axis) else: result = np.sum(np.abs(u) ** self.p, axis=axis) if not self.unrooted: result = result**(1/self.p) if self.scale_by_dimensionality and self.p < np.inf: if self.unrooted: result = result / u.shape[axis] else: result = result / (u.shape[axis]**(1/self.p)) return result