Source code for sekupy.analysis.base

import logging
import os
import numpy as np
from sekupy.utils.files import make_dir
from sekupy.utils.time import get_time
from sekupy.utils import get_id
from sekupy.io.configuration import save_configuration
from sekupy.base import Node

logger = logging.getLogger(__name__)



[docs]
class Analyzer(Node):
    """Base class for neuroimaging analysis components.
    
    The Analyzer class extends Node to provide core functionality for
    neuroimaging analyses in the sekupy framework. It handles analysis
    configuration, execution, and result storage following BIDS conventions.
    
    Parameters
    ----------
    name : str, optional
        Name identifier for the analyzer, by default 'analyzer'
    **kwargs : dict
        Additional configuration parameters including:
        - id : str, optional
            Unique identifier for the analysis instance
        - num : int, optional
            Number identifier for the analysis, by default 1
    
    Attributes
    ----------
    id : str
        Unique identifier for the analysis instance
    num : int
        Number identifier for the analysis
    """
    
    def __init__(self, name='analyzer', **kwargs):
        """Initialize an Analyzer instance.
        
        Parameters
        ----------
        name : str, optional
            Name identifier for the analyzer, by default 'analyzer'
        **kwargs : dict
            Additional configuration parameters including:
            - id : str, optional
                Unique identifier for the analysis instance
            - num : int, optional
                Number identifier for the analysis, by default 1
        """

        self.id = get_id()
        if 'id' in kwargs.keys():
            self.id = kwargs['id']


        self.num = 1
        if 'num' in kwargs.keys():
            self.num = kwargs['num']
        
        
        Node.__init__(self, name=name, **kwargs)
        
        

[docs]
    def fit(self, ds, **kwargs):
        """Fit the analyzer to the provided dataset.
        
        This method stores information about the dataset and analysis
        configuration for later use in saving results.
        
        Parameters
        ----------
        ds : Dataset
            The dataset to analyze
        **kwargs : dict
            Additional parameters for the analysis
            
        Returns
        -------
        None
        """
        
        self._info = self._store_info(ds, **kwargs)

        return




[docs]
    def save(self, path=None, **kwargs):
        """Basic function for saving information about the analysis.
        Basically it should be overriden in subclasses.

        This implementation creates the folder in which results are
        stored, following BIDS specification.
        
        Parameters
        ----------
        path : str, optional
            The pathname where results are stored, if None is passed
            it creates the directory
             (the default is None, which [default_description])
        
        **kwargs : dict, optional
            Dictionary of keywords used for directory creation.
        
        Returns
        -------
        path : str
            The directory created or the path passed as parameter.
        """


        # TODO: Keep in mind other analyses
        if not hasattr(self, "scores"):
            logger.error("Please run fit() before saving results.")
            
            return None
        
        # Build path and make dir
        path_info = self._get_analysis_info()

        if path is not None:
            path_info['path'] = path

        if 'pipeline' in kwargs.keys():
            path_info['pipeline'] = kwargs.pop('pipeline')

        logger.debug(path_info)
        path = self._build_path(**path_info)

        make_dir(path)
        logger.info("Result directory is: %s" % path)
        
        prefix = self._get_prefix()

        # Filter some params ?
        # Do it in the subclasses ?
        # Add self._info ?
        logger.debug(kwargs)
        
        kwargs.update(self._info)
        save_configuration(path, kwargs)

        self._save_dataset_description(path)

        return path, prefix



    # Only on Analyzer
    def _build_path(self, **info):

        keys = ['pipeline', 'analysis']

        pipeline_directory = []
        for k in keys:
            if k in info.keys():
                value = info.pop(k)
                if isinstance(value, list):
                    value = "+".join([str(item) for item in value])
                value = value.replace("_", "+")
                pipeline_directory += ["%s-%s" % (k, value)]
        
        path = info.pop('path')
        id_ = info.pop('id')

        for k, v in info.items():
            v = str(v).replace("_", "+")
            pipeline_directory += ["%s-%s" % (k, str(v))]

        logger.info(pipeline_directory)

        pipeline_directory += ["%s-%s" % ('id', id_)]

        subjects = np.unique(self._info['sa'].subject)
        if len(subjects) != 1:
            subdir = 'group'
        else:
            subdir = str(subjects[0])

        
        result_path = os.path.join(path, 
                                   'derivatives',
                                   pipeline_directory[0],
                                   "_".join(pipeline_directory), 
                                   subdir)

        return result_path



    def _get_prefix(self):

        import numpy as np

        fname_list = np.unique(self._info['sa']['file'])

        prefix_list = os.path.basename(fname_list[0]).split("_")

        subjects = np.unique(self._info['sa'].subject)
        if len(subjects) != 1:
            prefix_list[0] = "group"
        
        if len(prefix_list) == 1:
            prefix_list = ["bids", ""]

        return "_".join(prefix_list[:-1])


    # Deprecated maybe
    def _get_fname_info(self):

        info = dict()
        logger.debug(self._info)
        info['path'] = self._info['a'].data_path
        
        info['task'] = self._info['a'].task
        
        info['analysis'] = self.name
        info['subjects'] = self._info['subjects']
        info['is_group'] = len(info['subjects']) != 1
               
        return info
            


    def _store_info(self, ds, **kwargs):

        import numpy as np

        info = dict()
        info['a'] = ds.a.copy()
        info['sa'] = ds.sa.copy()

        info.update({'ds.a.%s' % k: ds.a[k].value for k in ds.a.keys()})
        info.update({'ds.sa.%s' % k: np.unique(ds.sa[k].value) for k in ds.sa.keys()})

        info['targets'] = np.unique(ds.targets)
        info['summary'] = ds.summary()

        for k, v in kwargs.items():

            if isinstance(v, list):
                v = "+".join([str(it) for it in v])

            info[k] = str(v)
            if k == 'prepro':
                info[k] = [v.name]

        if 'subject' in ds.sa.keys():
            info['subjects'] = list(np.unique(ds.sa.subject))

        logger.debug(info)
        return info
    


    def _get_analysis_info(self):

        info = dict()
        info['analysis'] = self.name
        info['path'] = self._info['a'].data_path
        info['id'] = "%s+%04d" % (self.id, self.num)
        info['experiment'] = self._info['a'].experiment

        # Only for testing purposes
        self._test_id = info['id']

        return info

    
    
    def _get_info(self, attributes=['estimator', 'scoring', 
                                    'cv','permutation']):

        # TODO: It may crash whether used with connectivity
        # TODO: maybe it should be performed on subclasses
        
        import numpy as np
        
        info = dict()
        
        for k in attributes:
            info[k] = getattr(self, k)
        info['targets'] = self._info['targets']
        
        for k in self._info['sa'].keys():
            info[k] = np.unique(self._info['sa'][k].value)
        info['summary'] = self._info['summary']

        return info


    # TODO: Look if can be applied to connectivity
    def _get_permutation_indices(self, n_samples):
        """Permutes the indices of the dataset"""
        
        # TODO: Permute labels based on cv_attr
        from sklearn.utils import shuffle
        
        if self.permutation == 0:
            return [range(n_samples)]
        
        indices = [range(n_samples)]
        for r in range(self.permutation):
            idx = shuffle(indices[0], random_state=r)
            indices.append(idx)
        
        return indices


    def _get_test_id(self):
        if '_test_id' in self.__dict__.keys():
            return getattr(self, '_test_id')


    def _save_dataset_description(self, path):
        """This function saves a dataset_description.json
        for BIDS dataformat
        
        Parameters
        ----------
        path : str
            The path that will be used to save the file
        """

        info = self._get_analysis_info()

        keys = ['pipeline', 'analysis', 'id']

        description =  {
                        "Name": "sekupy - Pipelines for neuroimaging",
                        "BIDSVersion": "1.1.1",
                        "PipelineDescription": {
                            "Name": "_".join([info[k] for k in keys if k in info.keys()]),
                        },
                        "CodeURL": "https://github.com/robbisg/sekupy"
                    }

        dataset_desc = os.path.join(os.path.dirname(path), 
                                    "dataset_description.json")
        
        if not os.path.exists(dataset_desc):
            import json
            
            with open(dataset_desc, 'w') as fp:
                json.dump(description, fp)
        
        
    def _get_prepro_info(self, **kwargs):

        from sekupy.analysis.utils import get_params

        params_ = {}

        if 'prepro' in kwargs.keys():
    
            for keyword in ["sample_slicer", "target_transformer", "sample_transformer"]:
                if keyword in kwargs['prepro']:
                    params_ = get_params(kwargs, keyword)

                    if 'fx' in params_.keys() and keyword == 'target_transformer':
                        params_['target_transformer-fx'] = params_['fx'][0]

                    if keyword == "sample_slicer":
                        params_ = {k: "+".join([str(v) for v in value]) for k, value in params_.items()}
                    
                    if keyword == "sample_transformer":
                        params_ = {k: "+".join([str(v) for v in value]) for k, value in params_['attr'].items()}

        
        return params_