Source code for sekupy.preprocessing.slicers

import numpy as np
from sekupy.preprocessing.base import Transformer


import logging
logger = logging.getLogger(__name__)



[docs]
class FeatureSlicer(Transformer):
    """ This transformer filters the dataset using features as specified on a dictionary
    The dictionary indicates the feature attributes to be used as key and a list
    with conditions to be selected:
    
    selection_dict = {
                        'accuracy': ['I'],
                        'frame':[1,2,3]
                        }
                        
    This dictionary means that we will select all features with frame attribute
    equal to 1 OR 2 OR 3 AND all samples with accuracy equal to 'I'.

    
    """
    
    def __init__(self, **kwargs):
        
        self._selection = dict()
        for arg in kwargs:
            self._selection[arg] = kwargs[arg]
        Transformer.__init__(self, name='feature_slicer', **self._selection)  

    
    def _set_mapper(self, **kwargs):

        for k, v in kwargs.items():
            kwargs[k] = "+".join(str(v)) 

        return Transformer._set_mapper(self, **kwargs)



[docs]
    def transform(self, ds):
        
        selection_dict = self._selection
    
        selection_mask = np.ones(ds.shape[1], dtype=bool)
        for key, values in selection_dict.items():
            
            logger.info("Selected %s from %s attribute.", str(values), key)
            
            ds_values = ds.fa[key].value
            condition_mask = np.zeros_like(ds_values, dtype=bool)
            
            for value in values:

                if str(value)[0] == '!':
                    array_val = np.array(value[1:]).astype(ds_values.dtype)
                    condition_mask = np.logical_or(condition_mask, ds_values != array_val)
                else:
                    condition_mask = np.logical_or(condition_mask, ds_values == value)
                    
            selection_mask = np.logical_and(selection_mask, condition_mask)
            
        
        return Transformer.transform(self, ds[:, selection_mask])






[docs]
class SampleSlicer(Transformer):
    """
    Selects only portions of the dataset based on a dictionary
    The dictionary indicates the sample attributes to be used as key and a list
    with conditions to be selected:
    
    selection_dict = {
                        'frame': [1,2,3]
                        }
                        
    This dictionary means that we will select all samples with frame attribute
    equal to 1 OR 2 OR 3 AND all samples with accuracy equal to 'I'.
    
    """

    def __init__(self, **kwargs):
        
        self._selection = dict()
        for arg in kwargs:
            self._selection[arg] = kwargs[arg]
         
        Transformer.__init__(self, name='sample_slicer', **kwargs)

    
    def _set_mapper(self, **kwargs):

        for k, v in kwargs.items():
            kwargs[k] = "+".join([str(vv) for vv in v])

        return Transformer._set_mapper(self, **kwargs)




[docs]
    def transform(self, ds):
        
        selection_dict = self._selection
    
        selection_mask = np.ones_like(ds.targets, dtype=bool)
        for key, values in selection_dict.items():
            
            logger.info("Selected %s from %s attribute.", str(values), key)
            
            ds_values = ds.sa[key].value
            condition_mask = np.zeros_like(ds_values, dtype=bool)
            
            for value in values:        
                condition_mask = np.logical_or(condition_mask, ds_values == value)
                
            selection_mask = np.logical_and(selection_mask, condition_mask)
            
        return Transformer.transform(self, ds[selection_mask])





[docs]
class DatasetMasker(Transformer):
    """
    """

    def __init__(self,
                 mask=None,
                 **kwargs):

        self._mask = mask
        Transformer.__init__(self, name='dataset_masker')    



[docs]
    def transform(self, ds, axis=0):

        if self._mask is None:
            self._mask = np.ones_like(ds.samples[:, 0])

        if axis == 0:
            ds = ds[self._mask]
        else:
            ds = ds[:, self._mask]
        
        return Transformer.transform(self, ds)





[docs]
class SampleExpressionSlicer(Transformer):

    def __init__(self, attr, compare_fx=np.greater, attr_transformer=None):
        """This object is used when we want to slice samples based
        on some values and thresholds. For example if we want to 
        exclude samples from subjects with an age greater than the 
        average plus one standard deviation, or on some trials with
        an amplitude smaller than a particular value.

        Parameters
        ----------
        attr : str
            The sample attribute to use for slicing and calculating
            values.
        compare_fx : numpy function or lambda
            This function must take the sample attribute
            and a value/vector as input and return a vector of
            boolean.
        attr_transformer : funcion, optional
            This function can be used to further process the attribute
            for example the np.abs can be used, by default None
        """
        self.attr = attr
        self.compare_fx = compare_fx
        self.attr_transformer = attr_transformer

    

[docs]
    def transform(self, ds, value=lambda x: np.mean(x)+1.5*np.std(x)):
        """[summary]

        Parameters
        ----------
        ds : pymvpa dataset
            The dataset to be used
        value : int or fx, optional
            The function used to generate a value to be compared
            with the attribute using the compare_fx funtion, 
            by default lambdax:np.mean(x)+1.5*np.std(x)

        Returns
        -------
        ds : pymvpa dataset
            The sliced dataset
        """

        from types import LambdaType

        compare = self.compare_fx
        attributes = ds.sa[self.attr].value.copy()

        if self.attr_transformer is not None:
            attributes = self.attr_transformer(attributes)

        if isinstance(value, LambdaType):
            value = value(attributes)

        mask = compare(attributes, value)

        ds_ = ds[mask]

        return ds_





[docs]
class FeatureExpressionSlicer(Transformer):

    def __init__(self, fx=np.greater):
        # Update doc
        """This object is used when we want to slice samples based
        on some values and thresholds. For example if we want to 
        exclude features with some common characteristics, 
        for example those with nans.

        Parameters
        ----------
        attr : str
            The sample attribute to use for slicing and calculating
            values.
        compare_fx : numpy function or lambda
            This function must take the sample attribute
            and a value/vector as input and return a vector of
            boolean.
        attr_transformer : funcion, optional
            This function can be used to further process the attribute
            for example the np.abs can be used, by default None
        """

        if fx is None:
            def fx(x): np.logical_not(np.isnan(x).sum(0))

        self._fx = fx

    

[docs]
    def transform(self, ds):
        """[summary]

        Parameters
        ----------
        ds : pymvpa dataset
            The dataset to be used
        value : int or fx, optional
            The function used to generate a value to be compared
            with the attribute using the compare_fx funtion, 
            by default lambdax:np.mean(x)+1.5*np.std(x)

        Returns
        -------
        ds : pymvpa dataset
            The sliced dataset
        """

        mask = self._fx(ds.samples)

        ds_ = ds[:, mask]

        return ds_