Source code for sekupy.io
from sekupy.io.base import load_dataset
from sekupy.io.configuration import read_configuration
from sekupy.io.subjects import load_subjects
from sekupy.dataset.dataset import vstack
import os
import logging
logger = logging.getLogger(__name__)
[docs]
def load_ds(conf_file, task,
extra_sa=None,
loader=load_dataset,
prepro=None,
n_subjects=None,
selected_subjects=None,
**kwargs):
"""This is function loads a PyMVPA dataset given
the configuration file and a loader.
Parameters
----------
conf_file : str
Path of the configuration file
(see more in ```sekupy.io.configuration.read_configuration```)
task : str
name of the task that is used, this should be contatined in
configuration file
extra_sa : dictionary, optional
set of extra sample attributes to be attached to the dataset, by default None
loader : function, optional
The function used to load the data in a correct way, by default load_dataset
prepro : ```sekupy.preprocessing.Pipeline``` object
or list of ```sekupy.preprocessing.Transformer```, optional
Preprocessing pipeline to be performed at dataset-level, by default None
n_subjects : int, optional
number of subjects to be loaded, by default None
selected_subjects : list of string, optional
name of the subjects to be loaded, by default None
Returns
-------
```sekupy.dataset.base.Dataset```
The loaded dataset
"""
# TODO: conf file should include the full path
conf = read_configuration(conf_file, task)
conf.update(kwargs)
logger.debug(conf)
data_path = conf['data_path']
if len(data_path) == 1:
data_path = os.path.abspath(os.path.join(conf_file, os.pardir))
conf['data_path'] = data_path
# Subject file should be included in configuration
# TODO: Keep in mind BIDS
subjects, extra_sa = load_subjects(conf, selected_subjects, n_subjects)
logger.debug(subjects)
logger.info('Merging %s subjects from %s' % (str(len(subjects)), data_path))
ds_merged = []
for i, subj in enumerate(subjects):
# TODO: Keep in mind BIDS
ds = loader(data_path, subj, task, **conf)
ds = prepro.transform(ds)
# add extra samples
if extra_sa is not None:
for k, v in extra_sa.items():
if len(v) == len(subjects):
ds.sa[k] = [v[i] for _ in range(ds.samples.shape[0])]
ds_merged.append(ds)
del ds
ds_merged = vstack(ds_merged, a='all')
if len(subjects) > 1:
for k in ds_merged.a.keys():
if k not in ['snr', 'states', 'time', 'mapper']:
ds_merged.a[k] = ds_merged.a[k].value[0]
ds_merged.a.update(conf)
ds_merged.a['task'] = task
if 'name' in ds_merged.sa.keys():
ds_merged.sa['subject'] = ds_merged.sa.pop('name')
return ds_merged
[docs]
def dataset_wizard(X, y=None, **kwargs):
from sekupy.dataset.collections import SampleAttributesCollection, \
DatasetAttributesCollection, FeatureAttributesCollection
from sekupy.dataset.base import Dataset
import numpy as np
sa = SampleAttributesCollection({
'targets': y,
'subject': np.ones(X.shape[0]),
'file': ["foo.mat" for _ in range(X.shape[0])]
})
fa = FeatureAttributesCollection({'matrix_values':np.ones(X.shape[1])})
a = DatasetAttributesCollection({'data_path':'/media/robbis/DATA/meg/hcp/',
'experiment':'hcp',
})
ds = Dataset(X, sa=sa, a=a, fa=fa)
return ds