Source code for sekupy.utils.dataset
import numpy as np
import os
import pandas as pd
[docs]
def ds_to_dataframe(ds, keys=None, melt=False):
"""[summary]
Parameters
----------
ds : [type]
[description]
keys : list, optional
[description] (the default is ['band', 'targets', 'subjects'], which [default_description])
melt : bool, optional
[description] (the default is False, which [default_description])
Returns
-------
[type]
[description]
"""
df_dict = {}
df_list = []
if keys is None:
keys = list(ds.sa.keys())
for sample in ds:
for k in keys:
df_dict[k] = sample.sa[k].value[0]
sample_data = sample.samples.squeeze()
feature_dict = {"feature_%04d"%(i+1) : v for i, v in enumerate(sample_data)}
df_dict.update(feature_dict)
dd = df_dict.copy()
df_list.append(dd)
df = pd.DataFrame(df_list)
if melt:
df = pd.melt(df, id_vars=keys, value_name='value')
return df
[docs]
def get_ds_data(ds, target_attribute='targets'):
"""This function simpy returns X and y for scikit-learn analyses
starting from a pymvpa dataset.
Parameters
----------
ds : dataset ``sekupy.dataset.base.Dataset``
The dataset in pymvpa format
target_attribute : str, optional
The sample attribute to be used to extract labels
(the default is 'targets', which [default_description])
Returns
-------
X, y
A tuple with the X data matrix (samples x features) and the y
array of labels.
"""
return ds.samples, ds.sa[target_attribute].value
[docs]
def temporal_attribute_reshaping(attribute_list, time_attribute):
"""[summary]
Parameters
----------
attribute_list : [type]
[description]
time_attribute : [type]
[description]
Returns
-------
[type]
[description]
"""
times = np.unique(time_attribute)
y = attribute_list.reshape(-1, len(times))
labels = []
for yy in y:
l, c = np.unique(yy, return_counts=True)
labels.append(l[np.argmax(c)])
return np.array(labels)