"""Support vector data descriptors"""
from __future__ import annotations
from typing import Callable
from sklearn.svm import OneClassSVM
from frlearn.base import DataDescriptor
from frlearn.feature_preprocessors import IQRNormaliser
from frlearn.parametrisations import multiple
from frlearn.transformations import contract
[docs]class SVM(DataDescriptor):
"""
Wrapper for the Support Vector Machine (SVM) data descriptor [1]_ with gaussian kernel, implemented in scikit-learn.
Expresses the signed distance to the separating hyperplane, scaled to `[0, 1]`.
Parameters
----------
nu : float = 0.20
How many nearest neighbour distances / localised proximities to consider.
Corresponds to the scale at which proximity is evaluated.
Should be either a positive integer not larger than the target class size,
or a function that takes the size of the target class and returns such an integer.
c : float or (int -> float) = 0.25 * m
Kernel width.
Should be either a positive float
or a function that takes the dimensionality of the target class and returns such a float.
preprocessors : iterable = (IQRNormaliser(), )
Preprocessors to apply. The default interquartile range normaliser rescales all features
to ensure that they all have the same interquartile range.
sklearn_params
Additional keyword parameters will be passed on as-is to scikit-learn's OneClassSVM constructor.
Notes
-----
`nu` and `c` are the two principal hyperparameters that can be tuned to increase performance.
Its default values are based on the empirical evaluation in [2]_.
References
----------
.. [1] `Schölkopf B, Platt JC, Shawe-Taylor J, Smola AJ, Williamson RC (1999).
Estimating the support of a high-dimensional distribution.
MSR-TR-99-87, Microsoft Research.
<https://www.microsoft.com/en-us/research/publication/estimating-the-support-of-a-high-dimensional-distribution/>`_
.. [2] `Lenz OU, Peralta D, Cornelis C (2021).
Average Localised Proximity: A new data descriptor with good default one-class classification performance.
Pattern Recognition, vol 118, no 107991.
doi: 10.1016/j.patcog.2021.107991
<https://www.sciencedirect.com/science/article/abs/pii/S0031320321001783>`_
"""
def __init__(
self,
nu: float = 0.20,
c: float | Callable[[int], float] = multiple(0.25),
preprocessors=(IQRNormaliser(), ),
**sklearn_params,
):
super().__init__(preprocessors=preprocessors)
self.nu = nu
self.c = c
self.sklearn_params = sklearn_params
def _construct(self, X, ):
model = super()._construct(X)
model.nu = self.nu
model.c = self.c(X.shape[1]) if callable(self.c) else self.c
model.svm = OneClassSVM(nu=model.nu, gamma=1/model.c, **self.sklearn_params).fit(X)
return model
[docs] class Model(DataDescriptor.Model):
nu: float
c: float
svm: OneClassSVM
def _query(self, X):
signed_distance = self.svm.decision_function(X)
# scale signed distance from [-∞, ∞] to (0, 1)
score = contract(signed_distance)
return score