Source code for sekupy.preprocessing.normalizers

from sekupy.preprocessing.base import Transformer
from sekupy.preprocessing.slicers import SampleSlicer
from sekupy.dataset.zscore import ZScoreMapper
from sekupy.dataset.dataset import vstack

import numpy as np

import logging
logger = logging.getLogger(__name__)



[docs]
class FeatureZNormalizer(Transformer):
    
    def __init__(self, chunks_attr=None, param_est=None, **kwargs):
        
        self.node = ZScoreMapper(chunks_attr=chunks_attr, param_est=param_est)
        Transformer.__init__(self, name='feature_znormalizer', 
                                    chunks_attr=chunks_attr)
        
    

[docs]
    def transform(self, ds):
        logger.info('Dataset preprocessing: Zscoring feature-wise...')
        self.node.train(ds)
        ds = self.node.forward(ds)
        return Transformer.transform(self, ds)


    


[docs]
class SampleZNormalizer(Transformer):
    
    def __init__(self, name='sample_znormalizer', **kwargs):
        Transformer.__init__(self, name=name)       


[docs]
    def transform(self, ds):
        logger.info('Dataset preprocessing: Zscoring sample-wise...')

        ds_ = ds.copy()

        ds_.samples -= np.mean(ds_, axis=1)[:, None]
        ds_.samples /= np.std(ds_, axis=1)[:, None]
        
        ds_.samples[np.isnan(ds_.samples)] = 0
        
        return Transformer.transform(self, ds_)





[docs]
class SampleSigmaNormalizer(Transformer):
    
    def __init__(self, name='sample_sigma_normalizer', **kwargs):
        Transformer.__init__(self, name=name)       


[docs]
    def transform(self, ds):
        logger.info('Dataset preprocessing: st. dev. normalization sample-wise...')

        ds_ = ds.copy()
        
        ds_.samples /= np.std(ds_, axis=1)[:, None]
        ds_.samples[np.isnan(ds_.samples)] = 0
        
        return Transformer.transform(self, ds_)





[docs]
class FeatureSigmaNormalizer(Transformer):
    
    # TODO: This is for a particular variable, not the join and so on
    def __init__(self, name='sample_sigma_normalizer', attr='targets'):
        self.attr = attr
        Transformer.__init__(self, name=name, attr=attr)       


[docs]
    def transform(self, ds):
        
        ds_merged = []
        for target in np.unique(ds.sa[self.attr].value):
            
            selection_dict = {self.attr: [target]}
            ds_target = SampleSlicer(**selection_dict).transform(ds)
            ds_target.samples /= np.std(ds_target, axis=0)
            logger.info('Dataset preprocessing: st. dev. normalization feature-wise...')
            
            ds_target.samples[np.isnan(ds_target.samples)] = 0
            ds_merged.append(ds_target)
        
        ds_merged = vstack(ds_merged)
        ds_merged.a.update(ds.a)
        
        return Transformer.transform(self, ds_merged)





[docs]
class FeatureAttrNormalizer(Transformer):
    
    # TODO: This is for a particular variable, not the join and so on
    def __init__(self, name='sample_target_normalizer', attr_dict={'targets':'rest'}):
        self.attr, self.value = list(attr_dict.items())[0]
        Transformer.__init__(self, name=name)       


[docs]
    def transform(self, ds):
        
        ds_merged = []
        selection_dict = {self.attr: [self.value]}
        baseline_ds = SampleSlicer(**selection_dict).transform(ds)
        

        for target in np.unique(ds.sa[self.attr].value):
            
            selection_dict = {self.attr: [target]}
            ds_target = SampleSlicer(**selection_dict).transform(ds)
            ds_target.samples /= np.std(ds_target, axis=0)
            logger.info('Dataset preprocessing: st. dev. normalization feature-wise...')
            
            ds_target.samples[np.isnan(ds_target.samples)] = 0
            ds_merged.append(ds_target)
        
        ds_merged = vstack(ds_merged)
        ds_merged.a.update(ds.a)
        
        return Transformer.transform(self, ds_merged)






[docs]
class DatasetFxNormalizer(Transformer):
    # TODO: This can be more generic by using a lambda
    def __init__(self, name='ds_fx_normalizer', norm_fx=np.divide, ds_fx=np.std):
        
        """This class normalize the entire dataset using a function norm_fx that is used
        to normalize the dataset with respect to a number calculated on the same dataset
        using a ds_fx.
        
        Parameters
        ----------
        name : str, optional
            [description] (the default is 'ds_sigma_normalizer', which [default_description])
        norm_fx : [type], optional
            [description] (the default is np.divide, which [default_description])
        ds_fx : [type], optional
            [description] (the default is np.std, which [default_description])
        
        """

        self._ds_fx = ds_fx
        self._norm_fx = norm_fx
        Transformer.__init__(self, name=name)       


[docs]
    def transform(self, ds):

        logger.info("Normalizing dataset with %s and %s" % (str(self._norm_fx), 
                                                            str(self._ds_fx)))
        
        ds.samples = self._norm_fx(ds.samples, self._ds_fx(ds.samples))
        
        return Transformer.transform(self, ds)





[docs]
class SampleFxNormalizer(Transformer):
    def __init__(self, name='sample_fx_normalizer', fx=np.log):
        """This class normalize the entire dataset using a function ```fx``` that is applied
        to the whole dataset.

        Parameters
        ----------
        name : str, optional
            [description] (the default is 'ds_sigma_normalizer', which [default_description])
        fx : function, optional
            [description] (the default is np.divide, which [default_description])
        
        """

        self._fx = fx
        Transformer.__init__(self, name=name)



[docs]
    def transform(self, ds):

        logger.info("Normalizing dataset with %s" % (str(self._fx)))
        
        ds.samples = self._fx(ds.samples)
        
        return Transformer.transform(self, ds)