Source code for sekupy.preprocessing.normalizers

from sekupy.preprocessing.base import Transformer
from sekupy.preprocessing.slicers import SampleSlicer
from sekupy.dataset.zscore import ZScoreMapper
from sekupy.dataset.dataset import vstack

import numpy as np

import logging
logger = logging.getLogger(__name__)


[docs] class FeatureZNormalizer(Transformer): def __init__(self, chunks_attr=None, param_est=None, **kwargs): self.node = ZScoreMapper(chunks_attr=chunks_attr, param_est=param_est) Transformer.__init__(self, name='feature_znormalizer', chunks_attr=chunks_attr)
[docs] def transform(self, ds): logger.info('Dataset preprocessing: Zscoring feature-wise...') self.node.train(ds) ds = self.node.forward(ds) return Transformer.transform(self, ds)
[docs] class SampleZNormalizer(Transformer): def __init__(self, name='sample_znormalizer', **kwargs): Transformer.__init__(self, name=name)
[docs] def transform(self, ds): logger.info('Dataset preprocessing: Zscoring sample-wise...') ds_ = ds.copy() ds_.samples -= np.mean(ds_, axis=1)[:, None] ds_.samples /= np.std(ds_, axis=1)[:, None] ds_.samples[np.isnan(ds_.samples)] = 0 return Transformer.transform(self, ds_)
[docs] class SampleSigmaNormalizer(Transformer): def __init__(self, name='sample_sigma_normalizer', **kwargs): Transformer.__init__(self, name=name)
[docs] def transform(self, ds): logger.info('Dataset preprocessing: st. dev. normalization sample-wise...') ds_ = ds.copy() ds_.samples /= np.std(ds_, axis=1)[:, None] ds_.samples[np.isnan(ds_.samples)] = 0 return Transformer.transform(self, ds_)
[docs] class FeatureSigmaNormalizer(Transformer): # TODO: This is for a particular variable, not the join and so on def __init__(self, name='sample_sigma_normalizer', attr='targets'): self.attr = attr Transformer.__init__(self, name=name, attr=attr)
[docs] def transform(self, ds): ds_merged = [] for target in np.unique(ds.sa[self.attr].value): selection_dict = {self.attr: [target]} ds_target = SampleSlicer(**selection_dict).transform(ds) ds_target.samples /= np.std(ds_target, axis=0) logger.info('Dataset preprocessing: st. dev. normalization feature-wise...') ds_target.samples[np.isnan(ds_target.samples)] = 0 ds_merged.append(ds_target) ds_merged = vstack(ds_merged) ds_merged.a.update(ds.a) return Transformer.transform(self, ds_merged)
[docs] class FeatureAttrNormalizer(Transformer): # TODO: This is for a particular variable, not the join and so on def __init__(self, name='sample_target_normalizer', attr_dict={'targets':'rest'}): self.attr, self.value = list(attr_dict.items())[0] Transformer.__init__(self, name=name)
[docs] def transform(self, ds): ds_merged = [] selection_dict = {self.attr: [self.value]} baseline_ds = SampleSlicer(**selection_dict).transform(ds) for target in np.unique(ds.sa[self.attr].value): selection_dict = {self.attr: [target]} ds_target = SampleSlicer(**selection_dict).transform(ds) ds_target.samples /= np.std(ds_target, axis=0) logger.info('Dataset preprocessing: st. dev. normalization feature-wise...') ds_target.samples[np.isnan(ds_target.samples)] = 0 ds_merged.append(ds_target) ds_merged = vstack(ds_merged) ds_merged.a.update(ds.a) return Transformer.transform(self, ds_merged)
[docs] class DatasetFxNormalizer(Transformer): # TODO: This can be more generic by using a lambda def __init__(self, name='ds_fx_normalizer', norm_fx=np.divide, ds_fx=np.std): """This class normalize the entire dataset using a function norm_fx that is used to normalize the dataset with respect to a number calculated on the same dataset using a ds_fx. Parameters ---------- name : str, optional [description] (the default is 'ds_sigma_normalizer', which [default_description]) norm_fx : [type], optional [description] (the default is np.divide, which [default_description]) ds_fx : [type], optional [description] (the default is np.std, which [default_description]) """ self._ds_fx = ds_fx self._norm_fx = norm_fx Transformer.__init__(self, name=name)
[docs] def transform(self, ds): logger.info("Normalizing dataset with %s and %s" % (str(self._norm_fx), str(self._ds_fx))) ds.samples = self._norm_fx(ds.samples, self._ds_fx(ds.samples)) return Transformer.transform(self, ds)
[docs] class SampleFxNormalizer(Transformer): def __init__(self, name='sample_fx_normalizer', fx=np.log): """This class normalize the entire dataset using a function ```fx``` that is applied to the whole dataset. Parameters ---------- name : str, optional [description] (the default is 'ds_sigma_normalizer', which [default_description]) fx : function, optional [description] (the default is np.divide, which [default_description]) """ self._fx = fx Transformer.__init__(self, name=name)
[docs] def transform(self, ds): logger.info("Normalizing dataset with %s" % (str(self._fx))) ds.samples = self._fx(ds.samples) return Transformer.transform(self, ds)