Module minder_utils.visualisation.sleep_data
Expand source code
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from minder_utils.util import formatting_plots
from minder_utils.configurations import visual_config
sns.set()
class Visual_Sleep:
def __init__(self, path, style='age', filename='imperial_dementia_20211026'):
'''
Visualise the sleep data
Parameters
----------
path: str, path to the sleep data
style: str, plot style
- age: lineplot, hue = age
- joint: lineplot, hue = age, style = Sleep Time
- face: facegrid
- re: relation plot
'''
self.config = visual_config['sleep']
self.style = style
if 'imperial' in filename:
self.data = pd.read_csv(os.path.join(path, filename + '.csv'), delimiter=';')
else:
self.data = pd.read_csv(os.path.join(path, filename + '.csv'))
# Divide the data by time
self.data.start_date = pd.to_datetime(self.data.start_date)
self.data['Sleep Time'] = 'Late'
index = pd.DatetimeIndex(self.data.start_date)
self.data['Sleep Time'].iloc[index.indexer_between_time('10:00', '21:00')] = 'Early'
if 'imperial' in filename:
self.data['age'] = 2021 - pd.to_datetime(self.data['birthdate']).dt.year
self.data = self.data[self.data['age'] >= 60]
# Group by ages
self.data.age[self.data.age <= 50] = 0
self.data.age[(self.data.age > 50) & (self.data.age <= 60)] = 1
self.data.age[(self.data.age > 60) & (self.data.age <= 70)] = 2
self.data.age[(self.data.age > 70) & (self.data.age <= 80)] = 3
self.data.age[self.data.age > 80] = 4
mapping = {
0: '<=50', 1: '50-60', 2: '60-70', 3: '70-80', 4: '>80'
}
self.data.age = self.data.age.map(mapping)
new_cols = []
for col in self.data.columns:
append = True
for ele in self.config['stages']:
if col in ele:
new_cols.append(ele)
append = False
if append:
new_cols.append(col)
self.data.columns = new_cols
df = self.data[self.config['stages']]
for col in self.config['stages']:
if col not in ['Sleep Time', 'age', 'user_id']:
df = self.filter_df(df, col)
df[col] /= 3600
df['Sleep'] = df['light_duration (s)'] + df['deep_duration (s)'] + df['rem_duration (s)']
df = df[['user_id', 'awake_duration (s)', 'Sleep Time', 'age', 'Sleep']]
df = df.melt(id_vars=['user_id', 'Sleep Time', 'age'], var_name='State', value_name='Duration (H)')
mapping = {
'awake_duration (s)': 'Awake in bed',
'Sleep': 'Sleep'
}
df['State'] = df['State'].map(mapping)
self.df = df
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False)
def lineplot(self):
self.plt_func(sns.lineplot)
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False)
def violinplot(self):
self.plt_func(sns.violinplot)
@formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False)
def boxplot_separate(self):
self.plt_func(sns.boxplot)
@formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False)
def boxplot_joint(self):
style = self.style
self.style = 'no_hue'
self.plt_func(sns.boxplot)
self.style = style
@staticmethod
def filter_df(df, col, width=1.5):
# Computing IQR
new_df = []
for age in df.age.unique():
Q1 = df[df.age == age][col].quantile(0.25)
Q3 = df[df.age == age][col].quantile(0.75)
IQR = Q3 - Q1
indices = (df[df.age == age][col] >= Q1 - width * IQR) & (df[df.age == age][col] <= Q3 + width * IQR)
new_df.append(df[df.age == age].loc[indices])
# Filtering Values between Q1-1.5IQR and Q3+1.5IQR
return pd.concat(new_df)
def plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age'):
if self.style == 'age':
length = len(self.df[hue_name].unique())
func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order'][-length:])
elif self.style == 'joint':
try:
func(x=x_name, y=y_name, hue=hue_name, style='Sleep Time', data=self.df,
hue_order=self.config['hue_order'])
except TypeError:
func(x=x_name, y=y_name, hue=hue_name, data=self.df,
hue_order=self.config['hue_order'])
elif self.style == 'face':
g = sns.FacetGrid(self.df, col=hue_name, row='Sleep Time', col_order=self.config['hue_order'])
g.map(func, x_name, y_name)
for axes in g.axes.flat:
_ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90)
elif self.style == 're':
sns.relplot(
data=self.df,
x=x_name, y=y_name,
hue="Sleep Time", col=hue_name,
kind="line",
)
else:
func(x=x_name, y=y_name, data=self.df, hue_order=self.config['hue_order'])
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False)
def visual_phy(self):
df = self.data[self.config['phy_stages']]
for col in self.config['phy_stages']:
if col not in ['age']:
df[col] = df[col].apply(lambda x: float('.'.join(str(x).split(','))))
df[col] = self.filter_df(df, col)
df[col] /= df[col].max()
df = df.melt(id_vars='age', var_name='Other Data', value_name='Value')
sns.boxplot(x='Other Data', y='Value', hue='age', hue_order=self.config['hue_order'], data=df)
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False,
title='Time of participants went to bed')
def visualise_counts(self):
df = self.data[['age', 'Sleep Time', 'start_date']]
df.start_date = pd.to_datetime(df.start_date)
df['Time'] = df.start_date.dt.hour
df['Percentage'] = 1
df = df.groupby(by=['Sleep Time', 'age', 'Time'])['Percentage'].sum().reset_index()
for a in df.age.unique():
df['Percentage'][df.age == a] /= sum(df[df.age == a]['Percentage'])
sns.lineplot(x='Time', y='Percentage', hue='age', linestyle='--', hue_order=self.config['hue_order'], data=df)
plt.ylim(0, 0.35)
ticks = []
for i in range(24):
ticks.append('{}.00'.format(str(i).zfill(2)))
plt.xticks(range(24), ticks)
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15),
fancybox=True, shadow=True, ncol=5)
if __name__ == '__main__':
vs = Visual_Sleep('/Users/mozzie/Desktop/DATA.nosync/sleep_mat', 'age')
vs.visualise_counts()
vs.boxplot_joint()
# vs.boxplot_separate()
vs_with = Visual_Sleep('/Users/mozzie/Desktop/DATA.nosync/sleep_mat', 'age', filename='withings_sleep_dataset')
# Joint, Controlled age group
minder = vs.df
minder['Dataset'] = 'Dementia'
withings = vs_with.df
withings_ages = dict(withings.age.value_counts())
minder_ages = dict(minder.age.value_counts())
min_times = 100000
for age in minder_ages:
times = withings_ages[age] / minder_ages[age]
if times < min_times:
min_times = times
withings_df = []
mappings = {}
for age in minder_ages:
num = minder_ages[age] * min_times
mappings[age] = age + ' (' + str(round(minder_ages[age] / len(minder) * 100))[:2] + '%' + ')'
withings_df.append(withings[withings.age == age].sample(n=int(num)))
withings = pd.concat(withings_df)
withings['Dataset'] = 'Aged Matched Control Group'
withings.age = withings.age.map(mappings)
minder.age = minder.age.map(mappings)
df = pd.concat([minder, withings])
sns.boxplot(x='State', y='Duration (H)', hue='Dataset', data=df)
plt.savefig('joint.png')
plt.clf()
g = sns.FacetGrid(df, col='age', col_order=['60-70 (59%)', '70-80 (27%)', '>80 (14%)'])
g.map(sns.boxplot, 'State', 'Duration (H)', 'Dataset')
g.set(xlabel=None)
plt.legend(loc='center left', bbox_to_anchor=(-1.4, -0.2),
fancybox=True, shadow=True, ncol=5)
plt.savefig('joint_face.png')
Classes
class Visual_Sleep (path, style='age', filename='imperial_dementia_20211026')
-
Visualise the sleep data Parameters
path
:str, path to the sleep data
style
:str, plot style
-
- age: lineplot, hue = age
- joint: lineplot, hue = age, style = Sleep Time
- face: facegrid
- re: relation plot
Expand source code
class Visual_Sleep: def __init__(self, path, style='age', filename='imperial_dementia_20211026'): ''' Visualise the sleep data Parameters ---------- path: str, path to the sleep data style: str, plot style - age: lineplot, hue = age - joint: lineplot, hue = age, style = Sleep Time - face: facegrid - re: relation plot ''' self.config = visual_config['sleep'] self.style = style if 'imperial' in filename: self.data = pd.read_csv(os.path.join(path, filename + '.csv'), delimiter=';') else: self.data = pd.read_csv(os.path.join(path, filename + '.csv')) # Divide the data by time self.data.start_date = pd.to_datetime(self.data.start_date) self.data['Sleep Time'] = 'Late' index = pd.DatetimeIndex(self.data.start_date) self.data['Sleep Time'].iloc[index.indexer_between_time('10:00', '21:00')] = 'Early' if 'imperial' in filename: self.data['age'] = 2021 - pd.to_datetime(self.data['birthdate']).dt.year self.data = self.data[self.data['age'] >= 60] # Group by ages self.data.age[self.data.age <= 50] = 0 self.data.age[(self.data.age > 50) & (self.data.age <= 60)] = 1 self.data.age[(self.data.age > 60) & (self.data.age <= 70)] = 2 self.data.age[(self.data.age > 70) & (self.data.age <= 80)] = 3 self.data.age[self.data.age > 80] = 4 mapping = { 0: '<=50', 1: '50-60', 2: '60-70', 3: '70-80', 4: '>80' } self.data.age = self.data.age.map(mapping) new_cols = [] for col in self.data.columns: append = True for ele in self.config['stages']: if col in ele: new_cols.append(ele) append = False if append: new_cols.append(col) self.data.columns = new_cols df = self.data[self.config['stages']] for col in self.config['stages']: if col not in ['Sleep Time', 'age', 'user_id']: df = self.filter_df(df, col) df[col] /= 3600 df['Sleep'] = df['light_duration (s)'] + df['deep_duration (s)'] + df['rem_duration (s)'] df = df[['user_id', 'awake_duration (s)', 'Sleep Time', 'age', 'Sleep']] df = df.melt(id_vars=['user_id', 'Sleep Time', 'age'], var_name='State', value_name='Duration (H)') mapping = { 'awake_duration (s)': 'Awake in bed', 'Sleep': 'Sleep' } df['State'] = df['State'].map(mapping) self.df = df @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def lineplot(self): self.plt_func(sns.lineplot) @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def violinplot(self): self.plt_func(sns.violinplot) @formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_separate(self): self.plt_func(sns.boxplot) @formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_joint(self): style = self.style self.style = 'no_hue' self.plt_func(sns.boxplot) self.style = style @staticmethod def filter_df(df, col, width=1.5): # Computing IQR new_df = [] for age in df.age.unique(): Q1 = df[df.age == age][col].quantile(0.25) Q3 = df[df.age == age][col].quantile(0.75) IQR = Q3 - Q1 indices = (df[df.age == age][col] >= Q1 - width * IQR) & (df[df.age == age][col] <= Q3 + width * IQR) new_df.append(df[df.age == age].loc[indices]) # Filtering Values between Q1-1.5IQR and Q3+1.5IQR return pd.concat(new_df) def plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age'): if self.style == 'age': length = len(self.df[hue_name].unique()) func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order'][-length:]) elif self.style == 'joint': try: func(x=x_name, y=y_name, hue=hue_name, style='Sleep Time', data=self.df, hue_order=self.config['hue_order']) except TypeError: func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order']) elif self.style == 'face': g = sns.FacetGrid(self.df, col=hue_name, row='Sleep Time', col_order=self.config['hue_order']) g.map(func, x_name, y_name) for axes in g.axes.flat: _ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90) elif self.style == 're': sns.relplot( data=self.df, x=x_name, y=y_name, hue="Sleep Time", col=hue_name, kind="line", ) else: func(x=x_name, y=y_name, data=self.df, hue_order=self.config['hue_order']) @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def visual_phy(self): df = self.data[self.config['phy_stages']] for col in self.config['phy_stages']: if col not in ['age']: df[col] = df[col].apply(lambda x: float('.'.join(str(x).split(',')))) df[col] = self.filter_df(df, col) df[col] /= df[col].max() df = df.melt(id_vars='age', var_name='Other Data', value_name='Value') sns.boxplot(x='Other Data', y='Value', hue='age', hue_order=self.config['hue_order'], data=df) @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False, title='Time of participants went to bed') def visualise_counts(self): df = self.data[['age', 'Sleep Time', 'start_date']] df.start_date = pd.to_datetime(df.start_date) df['Time'] = df.start_date.dt.hour df['Percentage'] = 1 df = df.groupby(by=['Sleep Time', 'age', 'Time'])['Percentage'].sum().reset_index() for a in df.age.unique(): df['Percentage'][df.age == a] /= sum(df[df.age == a]['Percentage']) sns.lineplot(x='Time', y='Percentage', hue='age', linestyle='--', hue_order=self.config['hue_order'], data=df) plt.ylim(0, 0.35) ticks = [] for i in range(24): ticks.append('{}.00'.format(str(i).zfill(2))) plt.xticks(range(24), ticks) plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), fancybox=True, shadow=True, ncol=5)
Static methods
def filter_df(df, col, width=1.5)
-
Expand source code
@staticmethod def filter_df(df, col, width=1.5): # Computing IQR new_df = [] for age in df.age.unique(): Q1 = df[df.age == age][col].quantile(0.25) Q3 = df[df.age == age][col].quantile(0.75) IQR = Q3 - Q1 indices = (df[df.age == age][col] >= Q1 - width * IQR) & (df[df.age == age][col] <= Q3 + width * IQR) new_df.append(df[df.age == age].loc[indices]) # Filtering Values between Q1-1.5IQR and Q3+1.5IQR return pd.concat(new_df)
Methods
def boxplot_joint(self)
-
Expand source code
@formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_joint(self): style = self.style self.style = 'no_hue' self.plt_func(sns.boxplot) self.style = style
def boxplot_separate(self)
-
Expand source code
@formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_separate(self): self.plt_func(sns.boxplot)
def lineplot(self)
-
Expand source code
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def lineplot(self): self.plt_func(sns.lineplot)
def plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age')
-
Expand source code
def plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age'): if self.style == 'age': length = len(self.df[hue_name].unique()) func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order'][-length:]) elif self.style == 'joint': try: func(x=x_name, y=y_name, hue=hue_name, style='Sleep Time', data=self.df, hue_order=self.config['hue_order']) except TypeError: func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order']) elif self.style == 'face': g = sns.FacetGrid(self.df, col=hue_name, row='Sleep Time', col_order=self.config['hue_order']) g.map(func, x_name, y_name) for axes in g.axes.flat: _ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90) elif self.style == 're': sns.relplot( data=self.df, x=x_name, y=y_name, hue="Sleep Time", col=hue_name, kind="line", ) else: func(x=x_name, y=y_name, data=self.df, hue_order=self.config['hue_order'])
def violinplot(self)
-
Expand source code
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def violinplot(self): self.plt_func(sns.violinplot)
def visual_phy(self)
-
Expand source code
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def visual_phy(self): df = self.data[self.config['phy_stages']] for col in self.config['phy_stages']: if col not in ['age']: df[col] = df[col].apply(lambda x: float('.'.join(str(x).split(',')))) df[col] = self.filter_df(df, col) df[col] /= df[col].max() df = df.melt(id_vars='age', var_name='Other Data', value_name='Value') sns.boxplot(x='Other Data', y='Value', hue='age', hue_order=self.config['hue_order'], data=df)
def visualise_counts(self)
-
Expand source code
@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False, title='Time of participants went to bed') def visualise_counts(self): df = self.data[['age', 'Sleep Time', 'start_date']] df.start_date = pd.to_datetime(df.start_date) df['Time'] = df.start_date.dt.hour df['Percentage'] = 1 df = df.groupby(by=['Sleep Time', 'age', 'Time'])['Percentage'].sum().reset_index() for a in df.age.unique(): df['Percentage'][df.age == a] /= sum(df[df.age == a]['Percentage']) sns.lineplot(x='Time', y='Percentage', hue='age', linestyle='--', hue_order=self.config['hue_order'], data=df) plt.ylim(0, 0.35) ticks = [] for i in range(24): ticks.append('{}.00'.format(str(i).zfill(2))) plt.xticks(range(24), ticks) plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), fancybox=True, shadow=True, ncol=5)