Module minder_utils.visualisation.sleep_data
Expand source code
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from minder_utils.util import formatting_plots
from minder_utils.configurations import visual_config
sns.set()
class Visual_Sleep:
    def __init__(self, path, style='age', filename='imperial_dementia_20211026'):
        '''
        Visualise the sleep data
        Parameters
        ----------
        path: str, path to the sleep data
        style: str, plot style
            - age: lineplot, hue = age
            - joint:  lineplot, hue = age, style = Sleep Time
            - face: facegrid
            - re: relation plot
        '''
        self.config = visual_config['sleep']
        self.style = style
        if 'imperial' in filename:
            self.data = pd.read_csv(os.path.join(path, filename + '.csv'), delimiter=';')
        else:
            self.data = pd.read_csv(os.path.join(path, filename + '.csv'))
        # Divide the data by time
        self.data.start_date = pd.to_datetime(self.data.start_date)
        self.data['Sleep Time'] = 'Late'
        index = pd.DatetimeIndex(self.data.start_date)
        self.data['Sleep Time'].iloc[index.indexer_between_time('10:00', '21:00')] = 'Early'
        if 'imperial' in filename:
            self.data['age'] = 2021 - pd.to_datetime(self.data['birthdate']).dt.year
            self.data = self.data[self.data['age'] >= 60]
        # Group by ages
        self.data.age[self.data.age <= 50] = 0
        self.data.age[(self.data.age > 50) & (self.data.age <= 60)] = 1
        self.data.age[(self.data.age > 60) & (self.data.age <= 70)] = 2
        self.data.age[(self.data.age > 70) & (self.data.age <= 80)] = 3
        self.data.age[self.data.age > 80] = 4
        mapping = {
            0: '<=50', 1: '50-60', 2: '60-70', 3: '70-80', 4: '>80'
        }
        self.data.age = self.data.age.map(mapping)
        new_cols = []
        for col in self.data.columns:
            append = True
            for ele in self.config['stages']:
                if col in ele:
                    new_cols.append(ele)
                    append = False
            if append:
                new_cols.append(col)
        self.data.columns = new_cols
        df = self.data[self.config['stages']]
        for col in self.config['stages']:
            if col not in ['Sleep Time', 'age', 'user_id']:
                df = self.filter_df(df, col)
                df[col] /= 3600
        df['Sleep'] = df['light_duration (s)'] + df['deep_duration (s)'] + df['rem_duration (s)']
        df = df[['user_id', 'awake_duration (s)', 'Sleep Time', 'age', 'Sleep']]
        df = df.melt(id_vars=['user_id', 'Sleep Time', 'age'], var_name='State', value_name='Duration (H)')
        mapping = {
            'awake_duration (s)': 'Awake in bed',
            'Sleep': 'Sleep'
        }
        df['State'] = df['State'].map(mapping)
        self.df = df
    @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False)
    def lineplot(self):
        self.plt_func(sns.lineplot)
    @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False)
    def violinplot(self):
        self.plt_func(sns.violinplot)
    @formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False)
    def boxplot_separate(self):
        self.plt_func(sns.boxplot)
    @formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False)
    def boxplot_joint(self):
        style = self.style
        self.style = 'no_hue'
        self.plt_func(sns.boxplot)
        self.style = style
    @staticmethod
    def filter_df(df, col, width=1.5):
        # Computing IQR
        new_df = []
        for age in df.age.unique():
            Q1 = df[df.age == age][col].quantile(0.25)
            Q3 = df[df.age == age][col].quantile(0.75)
            IQR = Q3 - Q1
            indices = (df[df.age == age][col] >= Q1 - width * IQR) & (df[df.age == age][col] <= Q3 + width * IQR)
            new_df.append(df[df.age == age].loc[indices])
        # Filtering Values between Q1-1.5IQR and Q3+1.5IQR
        return pd.concat(new_df)
    def plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age'):
        if self.style == 'age':
            length = len(self.df[hue_name].unique())
            func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order'][-length:])
        elif self.style == 'joint':
            try:
                func(x=x_name, y=y_name, hue=hue_name, style='Sleep Time', data=self.df,
                     hue_order=self.config['hue_order'])
            except TypeError:
                func(x=x_name, y=y_name, hue=hue_name, data=self.df,
                     hue_order=self.config['hue_order'])
        elif self.style == 'face':
            g = sns.FacetGrid(self.df, col=hue_name, row='Sleep Time', col_order=self.config['hue_order'])
            g.map(func, x_name, y_name)
            for axes in g.axes.flat:
                _ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90)
        elif self.style == 're':
            sns.relplot(
                data=self.df,
                x=x_name, y=y_name,
                hue="Sleep Time", col=hue_name,
                kind="line",
            )
        else:
            func(x=x_name, y=y_name, data=self.df, hue_order=self.config['hue_order'])
    @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False)
    def visual_phy(self):
        df = self.data[self.config['phy_stages']]
        for col in self.config['phy_stages']:
            if col not in ['age']:
                df[col] = df[col].apply(lambda x: float('.'.join(str(x).split(','))))
                df[col] = self.filter_df(df, col)
                df[col] /= df[col].max()
        df = df.melt(id_vars='age', var_name='Other Data', value_name='Value')
        sns.boxplot(x='Other Data', y='Value', hue='age', hue_order=self.config['hue_order'], data=df)
    @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False,
                      title='Time of participants went to bed')
    def visualise_counts(self):
        df = self.data[['age', 'Sleep Time', 'start_date']]
        df.start_date = pd.to_datetime(df.start_date)
        df['Time'] = df.start_date.dt.hour
        df['Percentage'] = 1
        df = df.groupby(by=['Sleep Time', 'age', 'Time'])['Percentage'].sum().reset_index()
        for a in df.age.unique():
            df['Percentage'][df.age == a] /= sum(df[df.age == a]['Percentage'])
        sns.lineplot(x='Time', y='Percentage', hue='age', linestyle='--', hue_order=self.config['hue_order'], data=df)
        plt.ylim(0, 0.35)
        ticks = []
        for i in range(24):
            ticks.append('{}.00'.format(str(i).zfill(2)))
        plt.xticks(range(24), ticks)
        plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15),
               fancybox=True, shadow=True, ncol=5)
if __name__ == '__main__':
    vs = Visual_Sleep('/Users/mozzie/Desktop/DATA.nosync/sleep_mat', 'age')
    vs.visualise_counts()
    vs.boxplot_joint()
    # vs.boxplot_separate()
    vs_with = Visual_Sleep('/Users/mozzie/Desktop/DATA.nosync/sleep_mat', 'age', filename='withings_sleep_dataset')
    # Joint, Controlled age group
    minder = vs.df
    minder['Dataset'] = 'Dementia'
    withings = vs_with.df
    withings_ages = dict(withings.age.value_counts())
    minder_ages = dict(minder.age.value_counts())
    min_times = 100000
    for age in minder_ages:
        times = withings_ages[age] / minder_ages[age]
        if times < min_times:
            min_times = times
    withings_df = []
    mappings = {}
    for age in minder_ages:
        num = minder_ages[age] * min_times
        mappings[age] = age + ' (' + str(round(minder_ages[age] / len(minder) * 100))[:2] + '%' + ')'
        withings_df.append(withings[withings.age == age].sample(n=int(num)))
    withings = pd.concat(withings_df)
    withings['Dataset'] = 'Aged Matched Control Group'
    withings.age = withings.age.map(mappings)
    minder.age = minder.age.map(mappings)
    df = pd.concat([minder, withings])
    sns.boxplot(x='State', y='Duration (H)', hue='Dataset', data=df)
    plt.savefig('joint.png')
    plt.clf()
    g = sns.FacetGrid(df, col='age', col_order=['60-70 (59%)', '70-80 (27%)', '>80 (14%)'])
    g.map(sns.boxplot, 'State', 'Duration (H)', 'Dataset')
    g.set(xlabel=None)
    plt.legend(loc='center left', bbox_to_anchor=(-1.4, -0.2),
               fancybox=True, shadow=True, ncol=5)
    plt.savefig('joint_face.png')Classes
- class Visual_Sleep (path, style='age', filename='imperial_dementia_20211026')
- 
Visualise the sleep data Parameters 
 - path:- str, path to the sleep data
- style:- str, plot style
- 
- age: lineplot, hue = age
- joint: lineplot, hue = age, style = Sleep Time
- face: facegrid
- re: relation plot
 
 Expand source codeclass Visual_Sleep: def __init__(self, path, style='age', filename='imperial_dementia_20211026'): ''' Visualise the sleep data Parameters ---------- path: str, path to the sleep data style: str, plot style - age: lineplot, hue = age - joint: lineplot, hue = age, style = Sleep Time - face: facegrid - re: relation plot ''' self.config = visual_config['sleep'] self.style = style if 'imperial' in filename: self.data = pd.read_csv(os.path.join(path, filename + '.csv'), delimiter=';') else: self.data = pd.read_csv(os.path.join(path, filename + '.csv')) # Divide the data by time self.data.start_date = pd.to_datetime(self.data.start_date) self.data['Sleep Time'] = 'Late' index = pd.DatetimeIndex(self.data.start_date) self.data['Sleep Time'].iloc[index.indexer_between_time('10:00', '21:00')] = 'Early' if 'imperial' in filename: self.data['age'] = 2021 - pd.to_datetime(self.data['birthdate']).dt.year self.data = self.data[self.data['age'] >= 60] # Group by ages self.data.age[self.data.age <= 50] = 0 self.data.age[(self.data.age > 50) & (self.data.age <= 60)] = 1 self.data.age[(self.data.age > 60) & (self.data.age <= 70)] = 2 self.data.age[(self.data.age > 70) & (self.data.age <= 80)] = 3 self.data.age[self.data.age > 80] = 4 mapping = { 0: '<=50', 1: '50-60', 2: '60-70', 3: '70-80', 4: '>80' } self.data.age = self.data.age.map(mapping) new_cols = [] for col in self.data.columns: append = True for ele in self.config['stages']: if col in ele: new_cols.append(ele) append = False if append: new_cols.append(col) self.data.columns = new_cols df = self.data[self.config['stages']] for col in self.config['stages']: if col not in ['Sleep Time', 'age', 'user_id']: df = self.filter_df(df, col) df[col] /= 3600 df['Sleep'] = df['light_duration (s)'] + df['deep_duration (s)'] + df['rem_duration (s)'] df = df[['user_id', 'awake_duration (s)', 'Sleep Time', 'age', 'Sleep']] df = df.melt(id_vars=['user_id', 'Sleep Time', 'age'], var_name='State', value_name='Duration (H)') mapping = { 'awake_duration (s)': 'Awake in bed', 'Sleep': 'Sleep' } df['State'] = df['State'].map(mapping) self.df = df @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def lineplot(self): self.plt_func(sns.lineplot) @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def violinplot(self): self.plt_func(sns.violinplot) @formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_separate(self): self.plt_func(sns.boxplot) @formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_joint(self): style = self.style self.style = 'no_hue' self.plt_func(sns.boxplot) self.style = style @staticmethod def filter_df(df, col, width=1.5): # Computing IQR new_df = [] for age in df.age.unique(): Q1 = df[df.age == age][col].quantile(0.25) Q3 = df[df.age == age][col].quantile(0.75) IQR = Q3 - Q1 indices = (df[df.age == age][col] >= Q1 - width * IQR) & (df[df.age == age][col] <= Q3 + width * IQR) new_df.append(df[df.age == age].loc[indices]) # Filtering Values between Q1-1.5IQR and Q3+1.5IQR return pd.concat(new_df) def plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age'): if self.style == 'age': length = len(self.df[hue_name].unique()) func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order'][-length:]) elif self.style == 'joint': try: func(x=x_name, y=y_name, hue=hue_name, style='Sleep Time', data=self.df, hue_order=self.config['hue_order']) except TypeError: func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order']) elif self.style == 'face': g = sns.FacetGrid(self.df, col=hue_name, row='Sleep Time', col_order=self.config['hue_order']) g.map(func, x_name, y_name) for axes in g.axes.flat: _ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90) elif self.style == 're': sns.relplot( data=self.df, x=x_name, y=y_name, hue="Sleep Time", col=hue_name, kind="line", ) else: func(x=x_name, y=y_name, data=self.df, hue_order=self.config['hue_order']) @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def visual_phy(self): df = self.data[self.config['phy_stages']] for col in self.config['phy_stages']: if col not in ['age']: df[col] = df[col].apply(lambda x: float('.'.join(str(x).split(',')))) df[col] = self.filter_df(df, col) df[col] /= df[col].max() df = df.melt(id_vars='age', var_name='Other Data', value_name='Value') sns.boxplot(x='Other Data', y='Value', hue='age', hue_order=self.config['hue_order'], data=df) @formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False, title='Time of participants went to bed') def visualise_counts(self): df = self.data[['age', 'Sleep Time', 'start_date']] df.start_date = pd.to_datetime(df.start_date) df['Time'] = df.start_date.dt.hour df['Percentage'] = 1 df = df.groupby(by=['Sleep Time', 'age', 'Time'])['Percentage'].sum().reset_index() for a in df.age.unique(): df['Percentage'][df.age == a] /= sum(df[df.age == a]['Percentage']) sns.lineplot(x='Time', y='Percentage', hue='age', linestyle='--', hue_order=self.config['hue_order'], data=df) plt.ylim(0, 0.35) ticks = [] for i in range(24): ticks.append('{}.00'.format(str(i).zfill(2))) plt.xticks(range(24), ticks) plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), fancybox=True, shadow=True, ncol=5)Static methods- def filter_df(df, col, width=1.5)
- 
Expand source code@staticmethod def filter_df(df, col, width=1.5): # Computing IQR new_df = [] for age in df.age.unique(): Q1 = df[df.age == age][col].quantile(0.25) Q3 = df[df.age == age][col].quantile(0.75) IQR = Q3 - Q1 indices = (df[df.age == age][col] >= Q1 - width * IQR) & (df[df.age == age][col] <= Q3 + width * IQR) new_df.append(df[df.age == age].loc[indices]) # Filtering Values between Q1-1.5IQR and Q3+1.5IQR return pd.concat(new_df)
 Methods- def boxplot_joint(self)
- 
Expand source code@formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_joint(self): style = self.style self.style = 'no_hue' self.plt_func(sns.boxplot) self.style = style
- def boxplot_separate(self)
- 
Expand source code@formatting_plots(title='Duration of different states', save_path=visual_config['sleep']['save_path'], rotation=0, legend=False) def boxplot_separate(self): self.plt_func(sns.boxplot)
- def lineplot(self)
- 
Expand source code@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def lineplot(self): self.plt_func(sns.lineplot)
- def plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age')
- 
Expand source codedef plt_func(self, func, x_name='State', y_name='Duration (H)', hue_name='age'): if self.style == 'age': length = len(self.df[hue_name].unique()) func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order'][-length:]) elif self.style == 'joint': try: func(x=x_name, y=y_name, hue=hue_name, style='Sleep Time', data=self.df, hue_order=self.config['hue_order']) except TypeError: func(x=x_name, y=y_name, hue=hue_name, data=self.df, hue_order=self.config['hue_order']) elif self.style == 'face': g = sns.FacetGrid(self.df, col=hue_name, row='Sleep Time', col_order=self.config['hue_order']) g.map(func, x_name, y_name) for axes in g.axes.flat: _ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90) elif self.style == 're': sns.relplot( data=self.df, x=x_name, y=y_name, hue="Sleep Time", col=hue_name, kind="line", ) else: func(x=x_name, y=y_name, data=self.df, hue_order=self.config['hue_order'])
- def violinplot(self)
- 
Expand source code@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def violinplot(self): self.plt_func(sns.violinplot)
- def visual_phy(self)
- 
Expand source code@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False) def visual_phy(self): df = self.data[self.config['phy_stages']] for col in self.config['phy_stages']: if col not in ['age']: df[col] = df[col].apply(lambda x: float('.'.join(str(x).split(',')))) df[col] = self.filter_df(df, col) df[col] /= df[col].max() df = df.melt(id_vars='age', var_name='Other Data', value_name='Value') sns.boxplot(x='Other Data', y='Value', hue='age', hue_order=self.config['hue_order'], data=df)
- def visualise_counts(self)
- 
Expand source code@formatting_plots(save_path=visual_config['sleep']['save_path'], rotation=90, legend=False, title='Time of participants went to bed') def visualise_counts(self): df = self.data[['age', 'Sleep Time', 'start_date']] df.start_date = pd.to_datetime(df.start_date) df['Time'] = df.start_date.dt.hour df['Percentage'] = 1 df = df.groupby(by=['Sleep Time', 'age', 'Time'])['Percentage'].sum().reset_index() for a in df.age.unique(): df['Percentage'][df.age == a] /= sum(df[df.age == a]['Percentage']) sns.lineplot(x='Time', y='Percentage', hue='age', linestyle='--', hue_order=self.config['hue_order'], data=df) plt.ylim(0, 0.35) ticks = [] for i in range(24): ticks.append('{}.00'.format(str(i).zfill(2))) plt.xticks(range(24), ticks) plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), fancybox=True, shadow=True, ncol=5)