Source code for sekupy.preprocessing.slicers
import numpy as np
from sekupy.preprocessing.base import Transformer
import logging
logger = logging.getLogger(__name__)
[docs]
class FeatureSlicer(Transformer):
""" This transformer filters the dataset using features as specified on a dictionary
The dictionary indicates the feature attributes to be used as key and a list
with conditions to be selected:
selection_dict = {
'accuracy': ['I'],
'frame':[1,2,3]
}
This dictionary means that we will select all features with frame attribute
equal to 1 OR 2 OR 3 AND all samples with accuracy equal to 'I'.
"""
def __init__(self, **kwargs):
self._selection = dict()
for arg in kwargs:
self._selection[arg] = kwargs[arg]
Transformer.__init__(self, name='feature_slicer', **self._selection)
def _set_mapper(self, **kwargs):
for k, v in kwargs.items():
kwargs[k] = "+".join(str(v))
return Transformer._set_mapper(self, **kwargs)
[docs]
def transform(self, ds):
selection_dict = self._selection
selection_mask = np.ones(ds.shape[1], dtype=bool)
for key, values in selection_dict.items():
logger.info("Selected %s from %s attribute.", str(values), key)
ds_values = ds.fa[key].value
condition_mask = np.zeros_like(ds_values, dtype=bool)
for value in values:
if str(value)[0] == '!':
array_val = np.array(value[1:]).astype(ds_values.dtype)
condition_mask = np.logical_or(condition_mask, ds_values != array_val)
else:
condition_mask = np.logical_or(condition_mask, ds_values == value)
selection_mask = np.logical_and(selection_mask, condition_mask)
return Transformer.transform(self, ds[:, selection_mask])
[docs]
class SampleSlicer(Transformer):
"""
Selects only portions of the dataset based on a dictionary
The dictionary indicates the sample attributes to be used as key and a list
with conditions to be selected:
selection_dict = {
'frame': [1,2,3]
}
This dictionary means that we will select all samples with frame attribute
equal to 1 OR 2 OR 3 AND all samples with accuracy equal to 'I'.
"""
def __init__(self, **kwargs):
self._selection = dict()
for arg in kwargs:
self._selection[arg] = kwargs[arg]
Transformer.__init__(self, name='sample_slicer', **kwargs)
def _set_mapper(self, **kwargs):
for k, v in kwargs.items():
kwargs[k] = "+".join([str(vv) for vv in v])
return Transformer._set_mapper(self, **kwargs)
[docs]
def transform(self, ds):
selection_dict = self._selection
selection_mask = np.ones_like(ds.targets, dtype=bool)
for key, values in selection_dict.items():
logger.info("Selected %s from %s attribute.", str(values), key)
ds_values = ds.sa[key].value
condition_mask = np.zeros_like(ds_values, dtype=bool)
for value in values:
condition_mask = np.logical_or(condition_mask, ds_values == value)
selection_mask = np.logical_and(selection_mask, condition_mask)
return Transformer.transform(self, ds[selection_mask])
[docs]
class DatasetMasker(Transformer):
"""
"""
def __init__(self,
mask=None,
**kwargs):
self._mask = mask
Transformer.__init__(self, name='dataset_masker')
[docs]
def transform(self, ds, axis=0):
if self._mask is None:
self._mask = np.ones_like(ds.samples[:, 0])
if axis == 0:
ds = ds[self._mask]
else:
ds = ds[:, self._mask]
return Transformer.transform(self, ds)
[docs]
class SampleExpressionSlicer(Transformer):
def __init__(self, attr, compare_fx=np.greater, attr_transformer=None):
"""This object is used when we want to slice samples based
on some values and thresholds. For example if we want to
exclude samples from subjects with an age greater than the
average plus one standard deviation, or on some trials with
an amplitude smaller than a particular value.
Parameters
----------
attr : str
The sample attribute to use for slicing and calculating
values.
compare_fx : numpy function or lambda
This function must take the sample attribute
and a value/vector as input and return a vector of
boolean.
attr_transformer : funcion, optional
This function can be used to further process the attribute
for example the np.abs can be used, by default None
"""
self.attr = attr
self.compare_fx = compare_fx
self.attr_transformer = attr_transformer
[docs]
def transform(self, ds, value=lambda x: np.mean(x)+1.5*np.std(x)):
"""[summary]
Parameters
----------
ds : pymvpa dataset
The dataset to be used
value : int or fx, optional
The function used to generate a value to be compared
with the attribute using the compare_fx funtion,
by default lambdax:np.mean(x)+1.5*np.std(x)
Returns
-------
ds : pymvpa dataset
The sliced dataset
"""
from types import LambdaType
compare = self.compare_fx
attributes = ds.sa[self.attr].value.copy()
if self.attr_transformer is not None:
attributes = self.attr_transformer(attributes)
if isinstance(value, LambdaType):
value = value(attributes)
mask = compare(attributes, value)
ds_ = ds[mask]
return ds_
[docs]
class FeatureExpressionSlicer(Transformer):
def __init__(self, fx=np.greater):
# Update doc
"""This object is used when we want to slice samples based
on some values and thresholds. For example if we want to
exclude features with some common characteristics,
for example those with nans.
Parameters
----------
attr : str
The sample attribute to use for slicing and calculating
values.
compare_fx : numpy function or lambda
This function must take the sample attribute
and a value/vector as input and return a vector of
boolean.
attr_transformer : funcion, optional
This function can be used to further process the attribute
for example the np.abs can be used, by default None
"""
if fx is None:
def fx(x): np.logical_not(np.isnan(x).sum(0))
self._fx = fx
[docs]
def transform(self, ds):
"""[summary]
Parameters
----------
ds : pymvpa dataset
The dataset to be used
value : int or fx, optional
The function used to generate a value to be compared
with the attribute using the compare_fx funtion,
by default lambdax:np.mean(x)+1.5*np.std(x)
Returns
-------
ds : pymvpa dataset
The sliced dataset
"""
mask = self._fx(ds.samples)
ds_ = ds[:, mask]
return ds_