Module minder_utils.feature_engineering.engineer
Expand source code
import numpy as np
from minder_utils.util.decorators import load_save
from .adding_features import *
from .calculation import calculate_entropy, anomaly_detection_freq
from .TimeFunctions import single_location_delta, rp_single_location_delta
from .util import week_to_date
class Feature_engineer:
'''
Take the formatting as input, re-construct the data in weekly format
1. needing to pee more often than usual during the night (nocturia)
- Compare the frequency of bathroom (Night)
2. needing to pee suddenly or more urgently than usual
- Compare the time difference between the triggered last sensor and bathroom sensor
3. needing to pee more often than usual
- Compare the frequency of bathroom (Daytime)
4. a high temperature, or feeling hot and shivery
- Body temperature (High)
5. a very low temperature below 36C
- Body temperature (Low)
To view the descriptions of each of the attributes, please use the property
```.info```
'''
def __init__(self, formatter, agg_method = 'sum'):
self.formatter = formatter
self.agg_method = agg_method
@property
def info(self):
'''
Descriptions of the attributes.
'''
return {
'bathroom_night': 'Bathroom activity during the night',
'bathroom_daytime': 'Bathroom activity during the day',
# 'bathroom_urgent': 'TODO',
'body_temperature': 'Mean of body temperature of the participant during the week',
'entropy': 'Entropy of the activity',
# 'raw_activity': 'Raw activity data (weekly)',
'entropy_rate': 'Entropy rate of markov chain over the week',
'entropy_rate_daily': 'Entropy rate of markov chain over the day',
'bathroom_night_ma': 'Moving average of bathroom activity during the night',
'bathroom_night_ma_delta': 'Delta in the moving average of bathroom activity during the night',
'bathroom_daytime_ma': 'Moving average of bathroom activity during the day',
'bathroom_daytime_ma_delta': 'Delta in the moving average of bathroom activity during the day',
'bathroom_urgent_reverse_percentage': 'Reverse percentile of the time to the bathroom',
'outlier_score_activity': 'Outlier score of the activity',
'rp_location_time_delta': 'Reverse percentile of the time between activities',
'snore_freq': 'Number of snoring activities detected',
'state_awake_freq' : 'Number of awake sleep instances',
'state_deep_freq' : 'Number of deep sleep instances',
'state_light_freq' : 'Number of light sleep instances',
'state_rem_freq' : 'Number of REM sleep instances',
'sleep_hr_mean' : 'Mean of sleeping heart rate',
'sleep_hr_std' : 'Standard Deviation of sleeping heart rate',
'sleep_rr_mean' : 'Mean of sleeping respiratory rate',
'sleep_rr_std' : 'Standard Deviation of sleeping respiratory rate',
}
@property
def agg_info(self):
'''
These are the time aggregations of each of the datasets.
'''
return {'evently': ['rp_location_time_delta'],
'daily': [
'bathroom_night',
'bathroom_daytime',
'outlier_score_activity',
'bathroom_night_ma',
'bathroom_night_ma_delta',
'bathroom_daytime_ma',
#'bathroom_urgent',
'bathroom_urgent_reverse_percentage',
'bathroom_daytime_ma_delta',
'entropy_rate_daily',
'snore_freq',
'state_awake_freq',
'state_deep_freq',
'state_rem_freq',
'state_light_freq',
'sleep_hr_mean',
'sleep_hr_std',
'sleep_rr_mean',
'sleep_rr_std'
],
'weekly': ['body_temperature',
'entropy',
'entropy_rate']}
@property
@load_save(**feature_config['bathroom_night']['save'])
def bathroom_night(self):
return get_bathroom_activity(self.formatter.activity_data.sort_values('time'), feature_config['nocturia']['time_range'], 'bathroom_night')
@property
@load_save(**feature_config['bathroom_night_ma']['save'])
def bathroom_night_ma(self):
def get_moving_average_groupby(x):
x = get_moving_average(x,
w=feature_config['bathroom_night_ma']['w'],
name='bathroom_night_ma')
return x
return (self.bathroom_night).groupby('id').apply(get_moving_average_groupby)
@property
@load_save(**feature_config['bathroom_night_ma_delta']['save'])
def bathroom_night_ma_delta(self):
def get_value_delta_groupby(x):
x = get_value_delta(x,
name='bathroom_night_ma_delta')
return x
return (self.bathroom_night_ma).groupby('id').apply(get_value_delta_groupby)
@property
@load_save(**feature_config['bathroom_daytime']['save'])
def bathroom_daytime(self):
return get_bathroom_activity(self.formatter.activity_data.sort_values('time'), feature_config['nocturia']['time_range'][::-1], 'bathroom_daytime')
@property
@load_save(**feature_config['bathroom_daytime_ma']['save'])
def bathroom_daytime_ma(self):
def get_moving_average_groupby(x):
x = get_moving_average(x,
w=feature_config['bathroom_daytime_ma']['w'],
name='bathroom_daytime_ma')
return x
return (self.bathroom_daytime).groupby(by='id').apply(get_moving_average_groupby)
@property
@load_save(**feature_config['bathroom_daytime_ma_delta']['save'])
def bathroom_daytime_ma_delta(self):
def get_value_delta_groupby(x):
x = get_value_delta(x,
name='bathroom_daytime_ma_delta')
return x
return (self.bathroom_daytime_ma).groupby('id').apply(get_value_delta_groupby)
@property
@load_save(**feature_config['bedroom_activity']['save'])
def bedroom_activity(self):
return get_bedroom_activity(self.formatter.activity_data.sort_values('time'), time_range=None, name='bedroom_activity')
@property
@load_save(**feature_config['bedroom_activity_ma']['save'])
def bedroom_activity_ma(self):
def get_moving_average_groupby(x):
x = get_moving_average(x,
w=feature_config['bedroom_activity_ma']['w'],
name='bedroom_activity_ma')
return x
return (self.bedroom_activity).groupby('id').apply(get_moving_average_groupby)
@property
@load_save(**feature_config['bedroom_activity_ma_delta']['save'])
def bedroom_activity_ma_delta(self):
def get_value_delta_groupby(x):
x = get_value_delta(x,
name='bedroom_activity_ma_delta')
return x
return (self.bedroom_activity_ma).groupby('id').apply(get_value_delta_groupby)
@property
@load_save(**feature_config['bathroom_urgent']['save'])
def bathroom_urgent(self):
return get_bathroom_delta(self.formatter.activity_data.sort_values('time'), single_location_delta, 'bathroom_urgent')
@property
@load_save(**feature_config['bathroom_urgent_reverse_percentage']['save'])
def bathroom_urgent_reverse_percentage(self):
data = get_bathroom_delta(self.formatter.activity_data.sort_values('time'), rp_single_location_delta, 'bathroom_urgent_reverse_percentage')
def value_group_by(x):
x[np.where(x == -1)] = np.nan
x = np.nanmean(x)
return x
data['value'] = data['value'].apply(value_group_by)
return data
@property
@load_save(**feature_config['sleep_freq']['save'])
def state_awake_freq(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='state_AWAKE',
location_name='state_awake_freq',
agg_func=feature_config['sleep_freq']['agg_type'])
@property
@load_save(**feature_config['sleep_freq']['save'])
def state_light_freq(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='state_LIGHT',
location_name='state_light_freq',
agg_func=feature_config['sleep_freq']['agg_type'])
@property
@load_save(**feature_config['sleep_freq']['save'])
def state_deep_freq(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='state_DEEP',
location_name='state_deep_freq',
agg_func=feature_config['sleep_freq']['agg_type'])
@property
@load_save(**feature_config['sleep_freq']['save'])
def state_rem_freq(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='state_REM',
location_name='state_rem_freq',
agg_func=feature_config['sleep_freq']['agg_type'])
@property
@load_save(**feature_config['snore_freq']['save'])
def snore_freq(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='snoring',
location_name='snore_freq',
agg_func=feature_config['snore_freq']['agg_type'])
@property
@load_save(**feature_config['bed_freq']['save'])
def bed_in_freq(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='value_bed_in',
location_name='bed_in_freq',
agg_func=feature_config['bed_freq']['agg_type'])
@property
@load_save(**feature_config['bed_freq']['save'])
def bed_out_freq(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='value_bed_out',
location_name='bed_out_freq',
agg_func=feature_config['bed_freq']['agg_type'])
@property
@load_save(**feature_config['sleep_hr_mean']['save'])
def sleep_hr_mean(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='heart_rate',
location_name='sleep_hr_mean',
agg_func=feature_config['sleep_hr_mean']['agg_type'])
@property
@load_save(**feature_config['sleep_hr_std']['save'])
def sleep_hr_std(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='heart_rate',
location_name='sleep_hr_std',
agg_func=feature_config['sleep_hr_std']['agg_type'])
@property
@load_save(**feature_config['sleep_rr_mean']['save'])
def sleep_rr_mean(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='respiratory_rate',
location_name='sleep_rr_mean',
agg_func=feature_config['sleep_rr_mean']['agg_type'])
@property
@load_save(**feature_config['sleep_rr_std']['save'])
def sleep_rr_std(self):
return get_daily_agg(self.formatter.sleep_data,
value_name='respiratory_rate',
location_name='sleep_rr_std',
agg_func=feature_config['sleep_rr_std']['agg_type'])
@property
@load_save(**feature_config['body_temperature']['save'])
def body_temperature(self):
return get_body_temperature(self.formatter.physiological_data)
@property
@load_save(**feature_config['entropy']['save'])
def entropy(self):
return calculate_entropy(self.formatter.activity_data.sort_values('time'), feature_config['entropy']['sensors'])
@property
@load_save(**feature_config['entropy_rate']['save'])
def entropy_rate(self):
return get_entropy_rate(df=self.formatter.activity_data.sort_values('time'),
sensors=feature_config['entropy_rate']['sensors'],
name='entropy_rate',
week_or_day='week')
@property
@load_save(**feature_config['entropy_rate_daily']['save'])
def entropy_rate_daily(self):
return get_entropy_rate(df=self.formatter.activity_data.sort_values('time'),
sensors=feature_config['entropy_rate_daily']['sensors'],
name='entropy_rate_daily',
week_or_day='day')
@property
@load_save(**feature_config['raw_activity']['save'])
def raw_activity(self):
return get_weekly_activity_data(self.formatter.activity_data)
@property
@load_save(**feature_config['activity']['save'])
def activity(self):
data = []
if feature_config['activity']['features'] is None:
features = self.info.keys()
else:
features = feature_config['activity']['features']
for feat in features:
data.append(getattr(self, feat)[['id', 'week', 'location', 'value']])
data = pd.concat(data)
if self.agg_method == 'sum':
data = data.groupby(['id', 'week', 'location'])['value'].sum().reset_index()
elif self.agg_method == 'median':
data = data.groupby(['id', 'week', 'location'])['value'].median().reset_index()
elif self.agg_method == 'mean':
data = data.groupby(['id', 'week', 'location'])['value'].mean().reset_index()
else:
raise TypeError('agg_method={} is not implemented'.format(self.agg_method))
data = data.pivot_table(index=['id', 'week'], columns='location',
values='value').reset_index().replace(np.nan, 0)
data['time'] = week_to_date(data['week'])
return data
@property
@load_save(**feature_config['outlier_score_activity']['save'])
def outlier_score_activity(self):
return get_outlier_freq(self.formatter.activity_data.sort_values('time'), anomaly_detection_freq, 'outlier_score_activity')
@property
@load_save(**feature_config['rp_location_time_delta']['save'])
def rp_location_time_delta(self):
print('This might take a bit of time...')
return get_subject_rp_location_delta(data=self.formatter.activity_data.sort_values('time'),
columns = {'subject':'id', 'time':'time', 'location':'location'},
baseline_length_days = feature_config['rp_location_time_delta']['baseline_length_days'],
baseline_offset_days = feature_config['rp_location_time_delta']['baseline_offset_days'],
all_loc_as_baseline = feature_config['rp_location_time_delta']['all_loc_as_baseline'],
name='rp_location_time_delta')
def activity_specific_agg(self, agg='daily', load_smaller_aggs = False):
accepted_agg = ['evently', 'daily', 'weekly']
if not agg in accepted_agg:
raise TypeError('Please use an agg from the list {}'.format(accepted_agg))
data = []
'''
if feature_config['activity_{}'.format(agg)]['features'] is None:
features = self.info.keys()
else:
features = feature_config['activity_{}'.format(agg)]['features']
'''
features = self.agg_info[agg]
if load_smaller_aggs:
if agg == 'weekly':
features.extend(self.agg_info['daily'])
#features.extend(self.agg_info['evently'])
elif agg == 'daily':
features.extend(self.agg_info['evently'])
for feat in features:
if agg =='weekly':
feat_data = getattr(self, feat)[['id', 'week', 'location', 'value']]
elif agg =='daily':
feat_data = getattr(self, feat)[['id', 'week', 'time', 'location', 'value']]
elif agg =='evently':
feat_data = getattr(self, feat)
data.append(feat_data)
data = pd.concat(data)
if agg == 'weekly':
data['time'] = week_to_date(data['week'])
data['time'] = pd.to_datetime(data['time'], utc=True)
if not agg == 'evently':
columns_agg = ['id', 'week', 'location']
grouper = pd.Grouper(key = 'time', freq = '1d' if agg=='daily' else '1W', dropna = False)
columns_agg.append(grouper)
if self.agg_method == 'sum':
data = data.groupby(columns_agg)['value'].sum().reset_index()
elif self.agg_method == 'median':
data = data.groupby(columns_agg)['value'].median().reset_index()
elif self.agg_method == 'mean':
data = data.groupby(columns_agg)['value'].mean().reset_index()
else:
raise TypeError('agg_method={} is not implemented'.format(self.agg_method))
return data
@property
@load_save(**feature_config['activity_daily']['save'])
def activity_daily(self):
return self.activity_specific_agg(agg='daily')
@property
@load_save(**feature_config['activity_evently']['save'])
def activity_evently(self):
return self.activity_specific_agg(agg='evently')
@property
@load_save(**feature_config['activity_weekly']['save'])
def activity_weekly(self):
return self.activity_specific_agg(agg='weekly')
Classes
class Feature_engineer (formatter, agg_method='sum')
-
Take the formatting as input, re-construct the data in weekly format
- needing to pee more often than usual during the night (nocturia)
-
Compare the frequency of bathroom (Night)
-
needing to pee suddenly or more urgently than usual
- Compare the time difference between the triggered last sensor and bathroom sensor
-
needing to pee more often than usual
- Compare the frequency of bathroom (Daytime)
-
a high temperature, or feeling hot and shivery
- Body temperature (High)
-
a very low temperature below 36C
- Body temperature (Low)
To view the descriptions of each of the attributes, please use the property
.info
Expand source code
class Feature_engineer: ''' Take the formatting as input, re-construct the data in weekly format 1. needing to pee more often than usual during the night (nocturia) - Compare the frequency of bathroom (Night) 2. needing to pee suddenly or more urgently than usual - Compare the time difference between the triggered last sensor and bathroom sensor 3. needing to pee more often than usual - Compare the frequency of bathroom (Daytime) 4. a high temperature, or feeling hot and shivery - Body temperature (High) 5. a very low temperature below 36C - Body temperature (Low) To view the descriptions of each of the attributes, please use the property ```.info``` ''' def __init__(self, formatter, agg_method = 'sum'): self.formatter = formatter self.agg_method = agg_method @property def info(self): ''' Descriptions of the attributes. ''' return { 'bathroom_night': 'Bathroom activity during the night', 'bathroom_daytime': 'Bathroom activity during the day', # 'bathroom_urgent': 'TODO', 'body_temperature': 'Mean of body temperature of the participant during the week', 'entropy': 'Entropy of the activity', # 'raw_activity': 'Raw activity data (weekly)', 'entropy_rate': 'Entropy rate of markov chain over the week', 'entropy_rate_daily': 'Entropy rate of markov chain over the day', 'bathroom_night_ma': 'Moving average of bathroom activity during the night', 'bathroom_night_ma_delta': 'Delta in the moving average of bathroom activity during the night', 'bathroom_daytime_ma': 'Moving average of bathroom activity during the day', 'bathroom_daytime_ma_delta': 'Delta in the moving average of bathroom activity during the day', 'bathroom_urgent_reverse_percentage': 'Reverse percentile of the time to the bathroom', 'outlier_score_activity': 'Outlier score of the activity', 'rp_location_time_delta': 'Reverse percentile of the time between activities', 'snore_freq': 'Number of snoring activities detected', 'state_awake_freq' : 'Number of awake sleep instances', 'state_deep_freq' : 'Number of deep sleep instances', 'state_light_freq' : 'Number of light sleep instances', 'state_rem_freq' : 'Number of REM sleep instances', 'sleep_hr_mean' : 'Mean of sleeping heart rate', 'sleep_hr_std' : 'Standard Deviation of sleeping heart rate', 'sleep_rr_mean' : 'Mean of sleeping respiratory rate', 'sleep_rr_std' : 'Standard Deviation of sleeping respiratory rate', } @property def agg_info(self): ''' These are the time aggregations of each of the datasets. ''' return {'evently': ['rp_location_time_delta'], 'daily': [ 'bathroom_night', 'bathroom_daytime', 'outlier_score_activity', 'bathroom_night_ma', 'bathroom_night_ma_delta', 'bathroom_daytime_ma', #'bathroom_urgent', 'bathroom_urgent_reverse_percentage', 'bathroom_daytime_ma_delta', 'entropy_rate_daily', 'snore_freq', 'state_awake_freq', 'state_deep_freq', 'state_rem_freq', 'state_light_freq', 'sleep_hr_mean', 'sleep_hr_std', 'sleep_rr_mean', 'sleep_rr_std' ], 'weekly': ['body_temperature', 'entropy', 'entropy_rate']} @property @load_save(**feature_config['bathroom_night']['save']) def bathroom_night(self): return get_bathroom_activity(self.formatter.activity_data.sort_values('time'), feature_config['nocturia']['time_range'], 'bathroom_night') @property @load_save(**feature_config['bathroom_night_ma']['save']) def bathroom_night_ma(self): def get_moving_average_groupby(x): x = get_moving_average(x, w=feature_config['bathroom_night_ma']['w'], name='bathroom_night_ma') return x return (self.bathroom_night).groupby('id').apply(get_moving_average_groupby) @property @load_save(**feature_config['bathroom_night_ma_delta']['save']) def bathroom_night_ma_delta(self): def get_value_delta_groupby(x): x = get_value_delta(x, name='bathroom_night_ma_delta') return x return (self.bathroom_night_ma).groupby('id').apply(get_value_delta_groupby) @property @load_save(**feature_config['bathroom_daytime']['save']) def bathroom_daytime(self): return get_bathroom_activity(self.formatter.activity_data.sort_values('time'), feature_config['nocturia']['time_range'][::-1], 'bathroom_daytime') @property @load_save(**feature_config['bathroom_daytime_ma']['save']) def bathroom_daytime_ma(self): def get_moving_average_groupby(x): x = get_moving_average(x, w=feature_config['bathroom_daytime_ma']['w'], name='bathroom_daytime_ma') return x return (self.bathroom_daytime).groupby(by='id').apply(get_moving_average_groupby) @property @load_save(**feature_config['bathroom_daytime_ma_delta']['save']) def bathroom_daytime_ma_delta(self): def get_value_delta_groupby(x): x = get_value_delta(x, name='bathroom_daytime_ma_delta') return x return (self.bathroom_daytime_ma).groupby('id').apply(get_value_delta_groupby) @property @load_save(**feature_config['bedroom_activity']['save']) def bedroom_activity(self): return get_bedroom_activity(self.formatter.activity_data.sort_values('time'), time_range=None, name='bedroom_activity') @property @load_save(**feature_config['bedroom_activity_ma']['save']) def bedroom_activity_ma(self): def get_moving_average_groupby(x): x = get_moving_average(x, w=feature_config['bedroom_activity_ma']['w'], name='bedroom_activity_ma') return x return (self.bedroom_activity).groupby('id').apply(get_moving_average_groupby) @property @load_save(**feature_config['bedroom_activity_ma_delta']['save']) def bedroom_activity_ma_delta(self): def get_value_delta_groupby(x): x = get_value_delta(x, name='bedroom_activity_ma_delta') return x return (self.bedroom_activity_ma).groupby('id').apply(get_value_delta_groupby) @property @load_save(**feature_config['bathroom_urgent']['save']) def bathroom_urgent(self): return get_bathroom_delta(self.formatter.activity_data.sort_values('time'), single_location_delta, 'bathroom_urgent') @property @load_save(**feature_config['bathroom_urgent_reverse_percentage']['save']) def bathroom_urgent_reverse_percentage(self): data = get_bathroom_delta(self.formatter.activity_data.sort_values('time'), rp_single_location_delta, 'bathroom_urgent_reverse_percentage') def value_group_by(x): x[np.where(x == -1)] = np.nan x = np.nanmean(x) return x data['value'] = data['value'].apply(value_group_by) return data @property @load_save(**feature_config['sleep_freq']['save']) def state_awake_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_AWAKE', location_name='state_awake_freq', agg_func=feature_config['sleep_freq']['agg_type']) @property @load_save(**feature_config['sleep_freq']['save']) def state_light_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_LIGHT', location_name='state_light_freq', agg_func=feature_config['sleep_freq']['agg_type']) @property @load_save(**feature_config['sleep_freq']['save']) def state_deep_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_DEEP', location_name='state_deep_freq', agg_func=feature_config['sleep_freq']['agg_type']) @property @load_save(**feature_config['sleep_freq']['save']) def state_rem_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_REM', location_name='state_rem_freq', agg_func=feature_config['sleep_freq']['agg_type']) @property @load_save(**feature_config['snore_freq']['save']) def snore_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='snoring', location_name='snore_freq', agg_func=feature_config['snore_freq']['agg_type']) @property @load_save(**feature_config['bed_freq']['save']) def bed_in_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='value_bed_in', location_name='bed_in_freq', agg_func=feature_config['bed_freq']['agg_type']) @property @load_save(**feature_config['bed_freq']['save']) def bed_out_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='value_bed_out', location_name='bed_out_freq', agg_func=feature_config['bed_freq']['agg_type']) @property @load_save(**feature_config['sleep_hr_mean']['save']) def sleep_hr_mean(self): return get_daily_agg(self.formatter.sleep_data, value_name='heart_rate', location_name='sleep_hr_mean', agg_func=feature_config['sleep_hr_mean']['agg_type']) @property @load_save(**feature_config['sleep_hr_std']['save']) def sleep_hr_std(self): return get_daily_agg(self.formatter.sleep_data, value_name='heart_rate', location_name='sleep_hr_std', agg_func=feature_config['sleep_hr_std']['agg_type']) @property @load_save(**feature_config['sleep_rr_mean']['save']) def sleep_rr_mean(self): return get_daily_agg(self.formatter.sleep_data, value_name='respiratory_rate', location_name='sleep_rr_mean', agg_func=feature_config['sleep_rr_mean']['agg_type']) @property @load_save(**feature_config['sleep_rr_std']['save']) def sleep_rr_std(self): return get_daily_agg(self.formatter.sleep_data, value_name='respiratory_rate', location_name='sleep_rr_std', agg_func=feature_config['sleep_rr_std']['agg_type']) @property @load_save(**feature_config['body_temperature']['save']) def body_temperature(self): return get_body_temperature(self.formatter.physiological_data) @property @load_save(**feature_config['entropy']['save']) def entropy(self): return calculate_entropy(self.formatter.activity_data.sort_values('time'), feature_config['entropy']['sensors']) @property @load_save(**feature_config['entropy_rate']['save']) def entropy_rate(self): return get_entropy_rate(df=self.formatter.activity_data.sort_values('time'), sensors=feature_config['entropy_rate']['sensors'], name='entropy_rate', week_or_day='week') @property @load_save(**feature_config['entropy_rate_daily']['save']) def entropy_rate_daily(self): return get_entropy_rate(df=self.formatter.activity_data.sort_values('time'), sensors=feature_config['entropy_rate_daily']['sensors'], name='entropy_rate_daily', week_or_day='day') @property @load_save(**feature_config['raw_activity']['save']) def raw_activity(self): return get_weekly_activity_data(self.formatter.activity_data) @property @load_save(**feature_config['activity']['save']) def activity(self): data = [] if feature_config['activity']['features'] is None: features = self.info.keys() else: features = feature_config['activity']['features'] for feat in features: data.append(getattr(self, feat)[['id', 'week', 'location', 'value']]) data = pd.concat(data) if self.agg_method == 'sum': data = data.groupby(['id', 'week', 'location'])['value'].sum().reset_index() elif self.agg_method == 'median': data = data.groupby(['id', 'week', 'location'])['value'].median().reset_index() elif self.agg_method == 'mean': data = data.groupby(['id', 'week', 'location'])['value'].mean().reset_index() else: raise TypeError('agg_method={} is not implemented'.format(self.agg_method)) data = data.pivot_table(index=['id', 'week'], columns='location', values='value').reset_index().replace(np.nan, 0) data['time'] = week_to_date(data['week']) return data @property @load_save(**feature_config['outlier_score_activity']['save']) def outlier_score_activity(self): return get_outlier_freq(self.formatter.activity_data.sort_values('time'), anomaly_detection_freq, 'outlier_score_activity') @property @load_save(**feature_config['rp_location_time_delta']['save']) def rp_location_time_delta(self): print('This might take a bit of time...') return get_subject_rp_location_delta(data=self.formatter.activity_data.sort_values('time'), columns = {'subject':'id', 'time':'time', 'location':'location'}, baseline_length_days = feature_config['rp_location_time_delta']['baseline_length_days'], baseline_offset_days = feature_config['rp_location_time_delta']['baseline_offset_days'], all_loc_as_baseline = feature_config['rp_location_time_delta']['all_loc_as_baseline'], name='rp_location_time_delta') def activity_specific_agg(self, agg='daily', load_smaller_aggs = False): accepted_agg = ['evently', 'daily', 'weekly'] if not agg in accepted_agg: raise TypeError('Please use an agg from the list {}'.format(accepted_agg)) data = [] ''' if feature_config['activity_{}'.format(agg)]['features'] is None: features = self.info.keys() else: features = feature_config['activity_{}'.format(agg)]['features'] ''' features = self.agg_info[agg] if load_smaller_aggs: if agg == 'weekly': features.extend(self.agg_info['daily']) #features.extend(self.agg_info['evently']) elif agg == 'daily': features.extend(self.agg_info['evently']) for feat in features: if agg =='weekly': feat_data = getattr(self, feat)[['id', 'week', 'location', 'value']] elif agg =='daily': feat_data = getattr(self, feat)[['id', 'week', 'time', 'location', 'value']] elif agg =='evently': feat_data = getattr(self, feat) data.append(feat_data) data = pd.concat(data) if agg == 'weekly': data['time'] = week_to_date(data['week']) data['time'] = pd.to_datetime(data['time'], utc=True) if not agg == 'evently': columns_agg = ['id', 'week', 'location'] grouper = pd.Grouper(key = 'time', freq = '1d' if agg=='daily' else '1W', dropna = False) columns_agg.append(grouper) if self.agg_method == 'sum': data = data.groupby(columns_agg)['value'].sum().reset_index() elif self.agg_method == 'median': data = data.groupby(columns_agg)['value'].median().reset_index() elif self.agg_method == 'mean': data = data.groupby(columns_agg)['value'].mean().reset_index() else: raise TypeError('agg_method={} is not implemented'.format(self.agg_method)) return data @property @load_save(**feature_config['activity_daily']['save']) def activity_daily(self): return self.activity_specific_agg(agg='daily') @property @load_save(**feature_config['activity_evently']['save']) def activity_evently(self): return self.activity_specific_agg(agg='evently') @property @load_save(**feature_config['activity_weekly']['save']) def activity_weekly(self): return self.activity_specific_agg(agg='weekly')
Instance variables
var activity
-
Expand source code
@property @load_save(**feature_config['activity']['save']) def activity(self): data = [] if feature_config['activity']['features'] is None: features = self.info.keys() else: features = feature_config['activity']['features'] for feat in features: data.append(getattr(self, feat)[['id', 'week', 'location', 'value']]) data = pd.concat(data) if self.agg_method == 'sum': data = data.groupby(['id', 'week', 'location'])['value'].sum().reset_index() elif self.agg_method == 'median': data = data.groupby(['id', 'week', 'location'])['value'].median().reset_index() elif self.agg_method == 'mean': data = data.groupby(['id', 'week', 'location'])['value'].mean().reset_index() else: raise TypeError('agg_method={} is not implemented'.format(self.agg_method)) data = data.pivot_table(index=['id', 'week'], columns='location', values='value').reset_index().replace(np.nan, 0) data['time'] = week_to_date(data['week']) return data
var activity_daily
-
Expand source code
@property @load_save(**feature_config['activity_daily']['save']) def activity_daily(self): return self.activity_specific_agg(agg='daily')
var activity_evently
-
Expand source code
@property @load_save(**feature_config['activity_evently']['save']) def activity_evently(self): return self.activity_specific_agg(agg='evently')
var activity_weekly
-
Expand source code
@property @load_save(**feature_config['activity_weekly']['save']) def activity_weekly(self): return self.activity_specific_agg(agg='weekly')
var agg_info
-
These are the time aggregations of each of the datasets.
Expand source code
@property def agg_info(self): ''' These are the time aggregations of each of the datasets. ''' return {'evently': ['rp_location_time_delta'], 'daily': [ 'bathroom_night', 'bathroom_daytime', 'outlier_score_activity', 'bathroom_night_ma', 'bathroom_night_ma_delta', 'bathroom_daytime_ma', #'bathroom_urgent', 'bathroom_urgent_reverse_percentage', 'bathroom_daytime_ma_delta', 'entropy_rate_daily', 'snore_freq', 'state_awake_freq', 'state_deep_freq', 'state_rem_freq', 'state_light_freq', 'sleep_hr_mean', 'sleep_hr_std', 'sleep_rr_mean', 'sleep_rr_std' ], 'weekly': ['body_temperature', 'entropy', 'entropy_rate']}
var bathroom_daytime
-
Expand source code
@property @load_save(**feature_config['bathroom_daytime']['save']) def bathroom_daytime(self): return get_bathroom_activity(self.formatter.activity_data.sort_values('time'), feature_config['nocturia']['time_range'][::-1], 'bathroom_daytime')
var bathroom_daytime_ma
-
Expand source code
@property @load_save(**feature_config['bathroom_daytime_ma']['save']) def bathroom_daytime_ma(self): def get_moving_average_groupby(x): x = get_moving_average(x, w=feature_config['bathroom_daytime_ma']['w'], name='bathroom_daytime_ma') return x return (self.bathroom_daytime).groupby(by='id').apply(get_moving_average_groupby)
var bathroom_daytime_ma_delta
-
Expand source code
@property @load_save(**feature_config['bathroom_daytime_ma_delta']['save']) def bathroom_daytime_ma_delta(self): def get_value_delta_groupby(x): x = get_value_delta(x, name='bathroom_daytime_ma_delta') return x return (self.bathroom_daytime_ma).groupby('id').apply(get_value_delta_groupby)
var bathroom_night
-
Expand source code
@property @load_save(**feature_config['bathroom_night']['save']) def bathroom_night(self): return get_bathroom_activity(self.formatter.activity_data.sort_values('time'), feature_config['nocturia']['time_range'], 'bathroom_night')
var bathroom_night_ma
-
Expand source code
@property @load_save(**feature_config['bathroom_night_ma']['save']) def bathroom_night_ma(self): def get_moving_average_groupby(x): x = get_moving_average(x, w=feature_config['bathroom_night_ma']['w'], name='bathroom_night_ma') return x return (self.bathroom_night).groupby('id').apply(get_moving_average_groupby)
var bathroom_night_ma_delta
-
Expand source code
@property @load_save(**feature_config['bathroom_night_ma_delta']['save']) def bathroom_night_ma_delta(self): def get_value_delta_groupby(x): x = get_value_delta(x, name='bathroom_night_ma_delta') return x return (self.bathroom_night_ma).groupby('id').apply(get_value_delta_groupby)
var bathroom_urgent
-
Expand source code
@property @load_save(**feature_config['bathroom_urgent']['save']) def bathroom_urgent(self): return get_bathroom_delta(self.formatter.activity_data.sort_values('time'), single_location_delta, 'bathroom_urgent')
var bathroom_urgent_reverse_percentage
-
Expand source code
@property @load_save(**feature_config['bathroom_urgent_reverse_percentage']['save']) def bathroom_urgent_reverse_percentage(self): data = get_bathroom_delta(self.formatter.activity_data.sort_values('time'), rp_single_location_delta, 'bathroom_urgent_reverse_percentage') def value_group_by(x): x[np.where(x == -1)] = np.nan x = np.nanmean(x) return x data['value'] = data['value'].apply(value_group_by) return data
var bed_in_freq
-
Expand source code
@property @load_save(**feature_config['bed_freq']['save']) def bed_in_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='value_bed_in', location_name='bed_in_freq', agg_func=feature_config['bed_freq']['agg_type'])
var bed_out_freq
-
Expand source code
@property @load_save(**feature_config['bed_freq']['save']) def bed_out_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='value_bed_out', location_name='bed_out_freq', agg_func=feature_config['bed_freq']['agg_type'])
var bedroom_activity
-
Expand source code
@property @load_save(**feature_config['bedroom_activity']['save']) def bedroom_activity(self): return get_bedroom_activity(self.formatter.activity_data.sort_values('time'), time_range=None, name='bedroom_activity')
var bedroom_activity_ma
-
Expand source code
@property @load_save(**feature_config['bedroom_activity_ma']['save']) def bedroom_activity_ma(self): def get_moving_average_groupby(x): x = get_moving_average(x, w=feature_config['bedroom_activity_ma']['w'], name='bedroom_activity_ma') return x return (self.bedroom_activity).groupby('id').apply(get_moving_average_groupby)
var bedroom_activity_ma_delta
-
Expand source code
@property @load_save(**feature_config['bedroom_activity_ma_delta']['save']) def bedroom_activity_ma_delta(self): def get_value_delta_groupby(x): x = get_value_delta(x, name='bedroom_activity_ma_delta') return x return (self.bedroom_activity_ma).groupby('id').apply(get_value_delta_groupby)
var body_temperature
-
Expand source code
@property @load_save(**feature_config['body_temperature']['save']) def body_temperature(self): return get_body_temperature(self.formatter.physiological_data)
var entropy
-
Expand source code
@property @load_save(**feature_config['entropy']['save']) def entropy(self): return calculate_entropy(self.formatter.activity_data.sort_values('time'), feature_config['entropy']['sensors'])
var entropy_rate
-
Expand source code
@property @load_save(**feature_config['entropy_rate']['save']) def entropy_rate(self): return get_entropy_rate(df=self.formatter.activity_data.sort_values('time'), sensors=feature_config['entropy_rate']['sensors'], name='entropy_rate', week_or_day='week')
var entropy_rate_daily
-
Expand source code
@property @load_save(**feature_config['entropy_rate_daily']['save']) def entropy_rate_daily(self): return get_entropy_rate(df=self.formatter.activity_data.sort_values('time'), sensors=feature_config['entropy_rate_daily']['sensors'], name='entropy_rate_daily', week_or_day='day')
var info
-
Descriptions of the attributes.
Expand source code
@property def info(self): ''' Descriptions of the attributes. ''' return { 'bathroom_night': 'Bathroom activity during the night', 'bathroom_daytime': 'Bathroom activity during the day', # 'bathroom_urgent': 'TODO', 'body_temperature': 'Mean of body temperature of the participant during the week', 'entropy': 'Entropy of the activity', # 'raw_activity': 'Raw activity data (weekly)', 'entropy_rate': 'Entropy rate of markov chain over the week', 'entropy_rate_daily': 'Entropy rate of markov chain over the day', 'bathroom_night_ma': 'Moving average of bathroom activity during the night', 'bathroom_night_ma_delta': 'Delta in the moving average of bathroom activity during the night', 'bathroom_daytime_ma': 'Moving average of bathroom activity during the day', 'bathroom_daytime_ma_delta': 'Delta in the moving average of bathroom activity during the day', 'bathroom_urgent_reverse_percentage': 'Reverse percentile of the time to the bathroom', 'outlier_score_activity': 'Outlier score of the activity', 'rp_location_time_delta': 'Reverse percentile of the time between activities', 'snore_freq': 'Number of snoring activities detected', 'state_awake_freq' : 'Number of awake sleep instances', 'state_deep_freq' : 'Number of deep sleep instances', 'state_light_freq' : 'Number of light sleep instances', 'state_rem_freq' : 'Number of REM sleep instances', 'sleep_hr_mean' : 'Mean of sleeping heart rate', 'sleep_hr_std' : 'Standard Deviation of sleeping heart rate', 'sleep_rr_mean' : 'Mean of sleeping respiratory rate', 'sleep_rr_std' : 'Standard Deviation of sleeping respiratory rate', }
var outlier_score_activity
-
Expand source code
@property @load_save(**feature_config['outlier_score_activity']['save']) def outlier_score_activity(self): return get_outlier_freq(self.formatter.activity_data.sort_values('time'), anomaly_detection_freq, 'outlier_score_activity')
var raw_activity
-
Expand source code
@property @load_save(**feature_config['raw_activity']['save']) def raw_activity(self): return get_weekly_activity_data(self.formatter.activity_data)
var rp_location_time_delta
-
Expand source code
@property @load_save(**feature_config['rp_location_time_delta']['save']) def rp_location_time_delta(self): print('This might take a bit of time...') return get_subject_rp_location_delta(data=self.formatter.activity_data.sort_values('time'), columns = {'subject':'id', 'time':'time', 'location':'location'}, baseline_length_days = feature_config['rp_location_time_delta']['baseline_length_days'], baseline_offset_days = feature_config['rp_location_time_delta']['baseline_offset_days'], all_loc_as_baseline = feature_config['rp_location_time_delta']['all_loc_as_baseline'], name='rp_location_time_delta')
var sleep_hr_mean
-
Expand source code
@property @load_save(**feature_config['sleep_hr_mean']['save']) def sleep_hr_mean(self): return get_daily_agg(self.formatter.sleep_data, value_name='heart_rate', location_name='sleep_hr_mean', agg_func=feature_config['sleep_hr_mean']['agg_type'])
var sleep_hr_std
-
Expand source code
@property @load_save(**feature_config['sleep_hr_std']['save']) def sleep_hr_std(self): return get_daily_agg(self.formatter.sleep_data, value_name='heart_rate', location_name='sleep_hr_std', agg_func=feature_config['sleep_hr_std']['agg_type'])
var sleep_rr_mean
-
Expand source code
@property @load_save(**feature_config['sleep_rr_mean']['save']) def sleep_rr_mean(self): return get_daily_agg(self.formatter.sleep_data, value_name='respiratory_rate', location_name='sleep_rr_mean', agg_func=feature_config['sleep_rr_mean']['agg_type'])
var sleep_rr_std
-
Expand source code
@property @load_save(**feature_config['sleep_rr_std']['save']) def sleep_rr_std(self): return get_daily_agg(self.formatter.sleep_data, value_name='respiratory_rate', location_name='sleep_rr_std', agg_func=feature_config['sleep_rr_std']['agg_type'])
var snore_freq
-
Expand source code
@property @load_save(**feature_config['snore_freq']['save']) def snore_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='snoring', location_name='snore_freq', agg_func=feature_config['snore_freq']['agg_type'])
var state_awake_freq
-
Expand source code
@property @load_save(**feature_config['sleep_freq']['save']) def state_awake_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_AWAKE', location_name='state_awake_freq', agg_func=feature_config['sleep_freq']['agg_type'])
var state_deep_freq
-
Expand source code
@property @load_save(**feature_config['sleep_freq']['save']) def state_deep_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_DEEP', location_name='state_deep_freq', agg_func=feature_config['sleep_freq']['agg_type'])
var state_light_freq
-
Expand source code
@property @load_save(**feature_config['sleep_freq']['save']) def state_light_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_LIGHT', location_name='state_light_freq', agg_func=feature_config['sleep_freq']['agg_type'])
var state_rem_freq
-
Expand source code
@property @load_save(**feature_config['sleep_freq']['save']) def state_rem_freq(self): return get_daily_agg(self.formatter.sleep_data, value_name='state_REM', location_name='state_rem_freq', agg_func=feature_config['sleep_freq']['agg_type'])
Methods
def activity_specific_agg(self, agg='daily', load_smaller_aggs=False)
-
Expand source code
def activity_specific_agg(self, agg='daily', load_smaller_aggs = False): accepted_agg = ['evently', 'daily', 'weekly'] if not agg in accepted_agg: raise TypeError('Please use an agg from the list {}'.format(accepted_agg)) data = [] ''' if feature_config['activity_{}'.format(agg)]['features'] is None: features = self.info.keys() else: features = feature_config['activity_{}'.format(agg)]['features'] ''' features = self.agg_info[agg] if load_smaller_aggs: if agg == 'weekly': features.extend(self.agg_info['daily']) #features.extend(self.agg_info['evently']) elif agg == 'daily': features.extend(self.agg_info['evently']) for feat in features: if agg =='weekly': feat_data = getattr(self, feat)[['id', 'week', 'location', 'value']] elif agg =='daily': feat_data = getattr(self, feat)[['id', 'week', 'time', 'location', 'value']] elif agg =='evently': feat_data = getattr(self, feat) data.append(feat_data) data = pd.concat(data) if agg == 'weekly': data['time'] = week_to_date(data['week']) data['time'] = pd.to_datetime(data['time'], utc=True) if not agg == 'evently': columns_agg = ['id', 'week', 'location'] grouper = pd.Grouper(key = 'time', freq = '1d' if agg=='daily' else '1W', dropna = False) columns_agg.append(grouper) if self.agg_method == 'sum': data = data.groupby(columns_agg)['value'].sum().reset_index() elif self.agg_method == 'median': data = data.groupby(columns_agg)['value'].median().reset_index() elif self.agg_method == 'mean': data = data.groupby(columns_agg)['value'].mean().reset_index() else: raise TypeError('agg_method={} is not implemented'.format(self.agg_method)) return data