Module minder_utils.visualisation.feature_engineering
Expand source code
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import pandas as pd
from minder_utils.formatting.label import label_by_week, label_dataframe
from minder_utils.feature_engineering import Feature_engineer
from minder_utils.feature_engineering.calculation import *
from minder_utils.util import formatting_plots
from minder_utils.formatting import Formatting
fe = Feature_engineer(Formatting())
sns.set()
att = 'bathroom_night'
figure_title = {
'bathroom_night': 'Bathroom activity during the night',
'bathroom_daytime': 'Bathroom activity during the day',
}
patient_id = ''
def process_dataframe(df, week_shift=0):
df = df[df.id == patient_id]
map_dict = {i: j - week_shift for j, i in enumerate(df.week.sort_values().unique())}
df.week = df.week.map(map_dict)
return df
def visualise_flags(df):
for v in [True, False]:
data = df[df.valid == v]
not_labelled = True
for week in data.week.unique():
if v is True:
plt.axvline(week, 0, 0.17, color='red', label='UTI' if not_labelled else None)
not_labelled = False
elif v is False:
plt.axvline(week, 0, 0.17, color='blue', label='not UTI' if not_labelled else None)
not_labelled = False
@formatting_plots(figure_title[att])
def visualise_weekly_data(df):
df = process_dataframe(df)
sns.violinplot(data=df, x='week', y='value')
visualise_flags(df)
return df
@formatting_plots('P value, ' + figure_title[att])
def visualise_weekly_statistical_analysis(df, results):
df = process_dataframe(df, 1)
visualise_flags(df)
data = results[patient_id]
df = {'week': [], 'p_value': []}
for idx, sta in enumerate(data):
df['week'].append(idx + 1)
df['p_value'].append(sta[1])
sns.lineplot(df['week'], df['p_value'])
@formatting_plots('Body temperature')
def visualise_body_temperature(df):
df = process_dataframe(df)
visualise_flags(df)
sns.lineplot(df.week, df.value)
def visualise_data_time_lineplot(time_array, values_array, name, fill_either_side_array=None, fig = None, ax = None):
'''
This function accepts a dataframe that has a ```'time'``` column and
and a ```'value'``` column.
'''
if ax is None:
fig, ax = plt.subplots(1,1,figsize = (10,6))
ax.plot(time_array, values_array)
if not fill_either_side_array is None:
ax.fill_between(time_array,
y1=values_array-fill_either_side_array,
y2=values_array+fill_either_side_array,
alpha = 0.3)
return fig, ax
def visualise_data_time_heatmap(data_plot, name, fig = None, ax = None):
'''
This function accepts a dataframe in which the columns are the days and
the rows are the aggregated times of the day.
'''
if ax is None:
fig, axes = plt.subplots(1,1,figsize = (10,6))
ax = sns.heatmap(data_plot.values, cmap = 'Blues', cbar_kws={'label': name})
ax.invert_yaxis()
x_tick_loc = np.arange(0, data_plot.shape[1], 90)
ax.set_xticks(x_tick_loc + 0.5)
ax.set_xticklabels(data_plot.columns.astype(str)[x_tick_loc].values)
y_tick_loc = np.arange(0, data_plot.shape[0], 3)
ax.set_yticks(y_tick_loc + 0.5)
ax.set_yticklabels([pd.to_datetime(time).strftime("%H:%M") for time in data_plot.index.values[y_tick_loc]], rotation = 0)
ax.set_xlabel('Day')
ax.set_ylabel('Time of Day')
return fig, ax
def visualise_activity_daily_data(fe):
'''
Arguments
---------
- fe: class:
The feature engineering class that produces the data.
'''
activity_daily = fe.activity_specific_agg(agg='daily', load_smaller_aggs = True)
activity_daily = label_dataframe(activity_daily, days_either_side=0)
activity_daily=activity_daily.rename(columns = {'valid':'UTI Label'})
activity_daily['Feature'] = activity_daily['location'].map(fe.info)
sns.set_theme('talk')
fig_list = []
axes_list = []
for feature in activity_daily['location'].unique():
data_plot = activity_daily[activity_daily['location'].isin([feature])]
fig, ax = plt.subplots(1,1,figsize = (8,6))
ax = sns.boxplot(data=data_plot, x='value', y = 'Feature', hue='UTI Label', ax=ax, **{'showfliers':False})
ax.set_ylabel(None)
ax.set_yticks([])
ax.set_title('{}'.format(fe.info[feature]))
ax.set_xlabel('Value')
fig_list.append(fig)
axes_list.append(ax)
return fig_list, axes_list
def visualise_activity_weekly_data(fe):
'''
Arguments
---------
- fe: class:
The feature engineering class that produces the data.
'''
activity_weekly = fe.activity_specific_agg(agg='weekly', load_smaller_aggs = True)
activity_weekly = label_by_week(activity_weekly)
activity_weekly=activity_weekly.rename(columns = {'valid':'UTI Label'})
activity_weekly['Feature'] = activity_weekly['location'].map(fe.info)
sns.set_theme('talk')
fig_list = []
axes_list = []
for feature in activity_weekly['location'].unique():
data_plot = activity_weekly[activity_weekly['location'].isin([feature])]
fig, ax = plt.subplots(1,1,figsize = (8,6))
ax = sns.boxplot(data=data_plot, x='value', y = 'Feature', hue='UTI Label', ax=ax, **{'showfliers':False})
ax.set_ylabel(None)
ax.set_yticks([])
ax.set_title('{}'.format(fe.info[feature]))
ax.set_xlabel('Value')
fig_list.append(fig)
axes_list.append(ax)
return fig_list, axes_list
def visualise_activity_evently_data(fe):
'''
Arguments
---------
- fe: class:
The feature engineering class that produces the data.
'''
activity_evently = fe.activity_specific_agg(agg='evently', load_smaller_aggs = True)
activity_evently = label_dataframe(activity_evently, days_either_side=0)
activity_evently=activity_evently.rename(columns = {'valid':'UTI Label'})
activity_evently['Feature'] = activity_evently['location'].map(fe.info)
sns.set_theme('talk')
fig_list = []
axes_list = []
for feature in activity_evently['location'].unique():
data_plot = activity_evently[activity_evently['location'].isin([feature])]
fig, ax = plt.subplots(1,1,figsize = (8,6))
ax = sns.boxplot(data=data_plot, x='value', y = 'Feature', hue='UTI Label', ax=ax, **{'showfliers':False})
ax.set_ylabel(None)
ax.set_yticks([])
ax.set_title('{}'.format(fe.info[feature]))
ax.set_xlabel('Value')
fig_list.append(fig)
axes_list.append(ax)
return fig_list, axes_list
if __name__ == '__main__':
results = weekly_compare(getattr(fe, att), kolmogorov_smirnov)
df = label_by_week(getattr(fe, att))
visualise_weekly_data(df)
visualise_weekly_statistical_analysis(df)
visualise_body_temperature(label_by_week(fe.body_temperature))
Functions
def process_dataframe(df, week_shift=0)
-
Expand source code
def process_dataframe(df, week_shift=0): df = df[df.id == patient_id] map_dict = {i: j - week_shift for j, i in enumerate(df.week.sort_values().unique())} df.week = df.week.map(map_dict) return df
def visualise_activity_daily_data(fe)
-
Arguments
- fe: class: The feature engineering class that produces the data.
Expand source code
def visualise_activity_daily_data(fe): ''' Arguments --------- - fe: class: The feature engineering class that produces the data. ''' activity_daily = fe.activity_specific_agg(agg='daily', load_smaller_aggs = True) activity_daily = label_dataframe(activity_daily, days_either_side=0) activity_daily=activity_daily.rename(columns = {'valid':'UTI Label'}) activity_daily['Feature'] = activity_daily['location'].map(fe.info) sns.set_theme('talk') fig_list = [] axes_list = [] for feature in activity_daily['location'].unique(): data_plot = activity_daily[activity_daily['location'].isin([feature])] fig, ax = plt.subplots(1,1,figsize = (8,6)) ax = sns.boxplot(data=data_plot, x='value', y = 'Feature', hue='UTI Label', ax=ax, **{'showfliers':False}) ax.set_ylabel(None) ax.set_yticks([]) ax.set_title('{}'.format(fe.info[feature])) ax.set_xlabel('Value') fig_list.append(fig) axes_list.append(ax) return fig_list, axes_list
def visualise_activity_evently_data(fe)
-
Arguments
- fe: class: The feature engineering class that produces the data.
Expand source code
def visualise_activity_evently_data(fe): ''' Arguments --------- - fe: class: The feature engineering class that produces the data. ''' activity_evently = fe.activity_specific_agg(agg='evently', load_smaller_aggs = True) activity_evently = label_dataframe(activity_evently, days_either_side=0) activity_evently=activity_evently.rename(columns = {'valid':'UTI Label'}) activity_evently['Feature'] = activity_evently['location'].map(fe.info) sns.set_theme('talk') fig_list = [] axes_list = [] for feature in activity_evently['location'].unique(): data_plot = activity_evently[activity_evently['location'].isin([feature])] fig, ax = plt.subplots(1,1,figsize = (8,6)) ax = sns.boxplot(data=data_plot, x='value', y = 'Feature', hue='UTI Label', ax=ax, **{'showfliers':False}) ax.set_ylabel(None) ax.set_yticks([]) ax.set_title('{}'.format(fe.info[feature])) ax.set_xlabel('Value') fig_list.append(fig) axes_list.append(ax) return fig_list, axes_list
def visualise_activity_weekly_data(fe)
-
Arguments
- fe: class: The feature engineering class that produces the data.
Expand source code
def visualise_activity_weekly_data(fe): ''' Arguments --------- - fe: class: The feature engineering class that produces the data. ''' activity_weekly = fe.activity_specific_agg(agg='weekly', load_smaller_aggs = True) activity_weekly = label_by_week(activity_weekly) activity_weekly=activity_weekly.rename(columns = {'valid':'UTI Label'}) activity_weekly['Feature'] = activity_weekly['location'].map(fe.info) sns.set_theme('talk') fig_list = [] axes_list = [] for feature in activity_weekly['location'].unique(): data_plot = activity_weekly[activity_weekly['location'].isin([feature])] fig, ax = plt.subplots(1,1,figsize = (8,6)) ax = sns.boxplot(data=data_plot, x='value', y = 'Feature', hue='UTI Label', ax=ax, **{'showfliers':False}) ax.set_ylabel(None) ax.set_yticks([]) ax.set_title('{}'.format(fe.info[feature])) ax.set_xlabel('Value') fig_list.append(fig) axes_list.append(ax) return fig_list, axes_list
def visualise_body_temperature(df)
-
Expand source code
@formatting_plots('Body temperature') def visualise_body_temperature(df): df = process_dataframe(df) visualise_flags(df) sns.lineplot(df.week, df.value)
def visualise_data_time_heatmap(data_plot, name, fig=None, ax=None)
-
This function accepts a dataframe in which the columns are the days and the rows are the aggregated times of the day.
Expand source code
def visualise_data_time_heatmap(data_plot, name, fig = None, ax = None): ''' This function accepts a dataframe in which the columns are the days and the rows are the aggregated times of the day. ''' if ax is None: fig, axes = plt.subplots(1,1,figsize = (10,6)) ax = sns.heatmap(data_plot.values, cmap = 'Blues', cbar_kws={'label': name}) ax.invert_yaxis() x_tick_loc = np.arange(0, data_plot.shape[1], 90) ax.set_xticks(x_tick_loc + 0.5) ax.set_xticklabels(data_plot.columns.astype(str)[x_tick_loc].values) y_tick_loc = np.arange(0, data_plot.shape[0], 3) ax.set_yticks(y_tick_loc + 0.5) ax.set_yticklabels([pd.to_datetime(time).strftime("%H:%M") for time in data_plot.index.values[y_tick_loc]], rotation = 0) ax.set_xlabel('Day') ax.set_ylabel('Time of Day') return fig, ax
def visualise_data_time_lineplot(time_array, values_array, name, fill_either_side_array=None, fig=None, ax=None)
-
This function accepts a dataframe that has a
'time'
column and and a'value'
column.Expand source code
def visualise_data_time_lineplot(time_array, values_array, name, fill_either_side_array=None, fig = None, ax = None): ''' This function accepts a dataframe that has a ```'time'``` column and and a ```'value'``` column. ''' if ax is None: fig, ax = plt.subplots(1,1,figsize = (10,6)) ax.plot(time_array, values_array) if not fill_either_side_array is None: ax.fill_between(time_array, y1=values_array-fill_either_side_array, y2=values_array+fill_either_side_array, alpha = 0.3) return fig, ax
def visualise_flags(df)
-
Expand source code
def visualise_flags(df): for v in [True, False]: data = df[df.valid == v] not_labelled = True for week in data.week.unique(): if v is True: plt.axvline(week, 0, 0.17, color='red', label='UTI' if not_labelled else None) not_labelled = False elif v is False: plt.axvline(week, 0, 0.17, color='blue', label='not UTI' if not_labelled else None) not_labelled = False
def visualise_weekly_data(df)
-
Expand source code
@formatting_plots(figure_title[att]) def visualise_weekly_data(df): df = process_dataframe(df) sns.violinplot(data=df, x='week', y='value') visualise_flags(df) return df
def visualise_weekly_statistical_analysis(df, results)
-
Expand source code
@formatting_plots('P value, ' + figure_title[att]) def visualise_weekly_statistical_analysis(df, results): df = process_dataframe(df, 1) visualise_flags(df) data = results[patient_id] df = {'week': [], 'p_value': []} for idx, sta in enumerate(data): df['week'].append(idx + 1) df['p_value'].append(sta[1]) sns.lineplot(df['week'], df['p_value'])