Source code for sekupy.analysis.base

import logging
import os
import numpy as np
from sekupy.utils.files import make_dir
from sekupy.utils.time import get_time
from sekupy.utils import get_id
from sekupy.io.configuration import save_configuration
from sekupy.base import Node

logger = logging.getLogger(__name__)


[docs] class Analyzer(Node): """Base class for neuroimaging analysis components. The Analyzer class extends Node to provide core functionality for neuroimaging analyses in the sekupy framework. It handles analysis configuration, execution, and result storage following BIDS conventions. Parameters ---------- name : str, optional Name identifier for the analyzer, by default 'analyzer' **kwargs : dict Additional configuration parameters including: - id : str, optional Unique identifier for the analysis instance - num : int, optional Number identifier for the analysis, by default 1 Attributes ---------- id : str Unique identifier for the analysis instance num : int Number identifier for the analysis """ def __init__(self, name='analyzer', **kwargs): """Initialize an Analyzer instance. Parameters ---------- name : str, optional Name identifier for the analyzer, by default 'analyzer' **kwargs : dict Additional configuration parameters including: - id : str, optional Unique identifier for the analysis instance - num : int, optional Number identifier for the analysis, by default 1 """ self.id = get_id() if 'id' in kwargs.keys(): self.id = kwargs['id'] self.num = 1 if 'num' in kwargs.keys(): self.num = kwargs['num'] Node.__init__(self, name=name, **kwargs)
[docs] def fit(self, ds, **kwargs): """Fit the analyzer to the provided dataset. This method stores information about the dataset and analysis configuration for later use in saving results. Parameters ---------- ds : Dataset The dataset to analyze **kwargs : dict Additional parameters for the analysis Returns ------- None """ self._info = self._store_info(ds, **kwargs) return
[docs] def save(self, path=None, **kwargs): """Basic function for saving information about the analysis. Basically it should be overriden in subclasses. This implementation creates the folder in which results are stored, following BIDS specification. Parameters ---------- path : str, optional The pathname where results are stored, if None is passed it creates the directory (the default is None, which [default_description]) **kwargs : dict, optional Dictionary of keywords used for directory creation. Returns ------- path : str The directory created or the path passed as parameter. """ # TODO: Keep in mind other analyses if not hasattr(self, "scores"): logger.error("Please run fit() before saving results.") return None # Build path and make dir path_info = self._get_analysis_info() if path is not None: path_info['path'] = path if 'pipeline' in kwargs.keys(): path_info['pipeline'] = kwargs.pop('pipeline') logger.debug(path_info) path = self._build_path(**path_info) make_dir(path) logger.info("Result directory is: %s" % path) prefix = self._get_prefix() # Filter some params ? # Do it in the subclasses ? # Add self._info ? logger.debug(kwargs) kwargs.update(self._info) save_configuration(path, kwargs) self._save_dataset_description(path) return path, prefix
# Only on Analyzer def _build_path(self, **info): keys = ['pipeline', 'analysis'] pipeline_directory = [] for k in keys: if k in info.keys(): value = info.pop(k) if isinstance(value, list): value = "+".join([str(item) for item in value]) value = value.replace("_", "+") pipeline_directory += ["%s-%s" % (k, value)] path = info.pop('path') id_ = info.pop('id') for k, v in info.items(): v = str(v).replace("_", "+") pipeline_directory += ["%s-%s" % (k, str(v))] logger.info(pipeline_directory) pipeline_directory += ["%s-%s" % ('id', id_)] subjects = np.unique(self._info['sa'].subject) if len(subjects) != 1: subdir = 'group' else: subdir = str(subjects[0]) result_path = os.path.join(path, 'derivatives', pipeline_directory[0], "_".join(pipeline_directory), subdir) return result_path def _get_prefix(self): import numpy as np fname_list = np.unique(self._info['sa']['file']) prefix_list = os.path.basename(fname_list[0]).split("_") subjects = np.unique(self._info['sa'].subject) if len(subjects) != 1: prefix_list[0] = "group" if len(prefix_list) == 1: prefix_list = ["bids", ""] return "_".join(prefix_list[:-1]) # Deprecated maybe def _get_fname_info(self): info = dict() logger.debug(self._info) info['path'] = self._info['a'].data_path info['task'] = self._info['a'].task info['analysis'] = self.name info['subjects'] = self._info['subjects'] info['is_group'] = len(info['subjects']) != 1 return info def _store_info(self, ds, **kwargs): import numpy as np info = dict() info['a'] = ds.a.copy() info['sa'] = ds.sa.copy() info.update({'ds.a.%s' % k: ds.a[k].value for k in ds.a.keys()}) info.update({'ds.sa.%s' % k: np.unique(ds.sa[k].value) for k in ds.sa.keys()}) info['targets'] = np.unique(ds.targets) info['summary'] = ds.summary() for k, v in kwargs.items(): if isinstance(v, list): v = "+".join([str(it) for it in v]) info[k] = str(v) if k == 'prepro': info[k] = [v.name] if 'subject' in ds.sa.keys(): info['subjects'] = list(np.unique(ds.sa.subject)) logger.debug(info) return info def _get_analysis_info(self): info = dict() info['analysis'] = self.name info['path'] = self._info['a'].data_path info['id'] = "%s+%04d" % (self.id, self.num) info['experiment'] = self._info['a'].experiment # Only for testing purposes self._test_id = info['id'] return info def _get_info(self, attributes=['estimator', 'scoring', 'cv','permutation']): # TODO: It may crash whether used with connectivity # TODO: maybe it should be performed on subclasses import numpy as np info = dict() for k in attributes: info[k] = getattr(self, k) info['targets'] = self._info['targets'] for k in self._info['sa'].keys(): info[k] = np.unique(self._info['sa'][k].value) info['summary'] = self._info['summary'] return info # TODO: Look if can be applied to connectivity def _get_permutation_indices(self, n_samples): """Permutes the indices of the dataset""" # TODO: Permute labels based on cv_attr from sklearn.utils import shuffle if self.permutation == 0: return [range(n_samples)] indices = [range(n_samples)] for r in range(self.permutation): idx = shuffle(indices[0], random_state=r) indices.append(idx) return indices def _get_test_id(self): if '_test_id' in self.__dict__.keys(): return getattr(self, '_test_id') def _save_dataset_description(self, path): """This function saves a dataset_description.json for BIDS dataformat Parameters ---------- path : str The path that will be used to save the file """ info = self._get_analysis_info() keys = ['pipeline', 'analysis', 'id'] description = { "Name": "sekupy - Pipelines for neuroimaging", "BIDSVersion": "1.1.1", "PipelineDescription": { "Name": "_".join([info[k] for k in keys if k in info.keys()]), }, "CodeURL": "https://github.com/robbisg/sekupy" } dataset_desc = os.path.join(os.path.dirname(path), "dataset_description.json") if not os.path.exists(dataset_desc): import json with open(dataset_desc, 'w') as fp: json.dump(description, fp) def _get_prepro_info(self, **kwargs): from sekupy.analysis.utils import get_params params_ = {} if 'prepro' in kwargs.keys(): for keyword in ["sample_slicer", "target_transformer", "sample_transformer"]: if keyword in kwargs['prepro']: params_ = get_params(kwargs, keyword) if 'fx' in params_.keys() and keyword == 'target_transformer': params_['target_transformer-fx'] = params_['fx'][0] if keyword == "sample_slicer": params_ = {k: "+".join([str(v) for v in value]) for k, value in params_.items()} if keyword == "sample_transformer": params_ = {k: "+".join([str(v) for v in value]) for k, value in params_['attr'].items()} return params_