Module minder_utils.formatting.format_tihm
Expand source code
import pandas as pd
import os
from minder_utils.formatting.map_utils import map_random_ids
from minder_utils.util.decorators import load_save
from minder_utils.configurations import config, tihm_data_path
@load_save(os.path.join('./data', 'pkl', 'raw_data'), 'TIHM')
def format_tihm_data():
'''
This function will change the TIHM data to the same format as DRI
Args:
path_to_tihm: string, Path to TIHM folder
Returns:
df: Dataframe, same as pir sensors in DRI data
'''
with open(tihm_data_path, 'r') as file_read:
path_to_tihm = file_read.read()
def add_location(df, location_name):
df['location'] = location_name
df = df[['subject', 'datetimeObserved', 'location', 'valueQuantity']]
df.columns = config['physiological']['columns']
return df
sensors = {'Kettle': 'kettle', 'Bedroom': 'bedroom1', 'Kitchen': 'kitchen',
'Bathroom': 'bathroom1', 'Hallway': 'hallway',
'Fridge Door': 'fridge door', 'Front Door': 'front door',
'Lounge': 'lounge', 'Back Door': 'back door', 'Toaster': 'toaster',
'Microwave': 'microwave', 'Study': 'study',
'Dining Room': 'dining room', 'Living Room': 'living room'}
data = pd.read_csv(os.path.join(path_to_tihm, 'Observations.csv'))
# deleting rows that have times and dates before the year 2000
data = data[(pd.to_datetime(data.datetimeObserved) > pd.to_datetime('2000-01-01 00:00:00'))]
#data.subject = map_random_ids(data.subject, True)
activity_data = data[data.location.isin(list(sensors.keys()))] \
[['subject', 'datetimeObserved', 'location', 'valueQuantity']]
body_temperature = data[data.device == 27991004] \
[['subject', 'datetimeObserved', 'valueQuantity']]
blood_pressure = data[data.device == 70665002] \
[['subject', 'datetimeObserved', 'valueQuantity']]
scale = data[data.device == 19892000] \
[['subject', 'datetimeObserved', 'valueQuantity']]
activity_data.valueQuantity = 1
activity_data.columns = config['activity']['columns']
body_temperature = add_location(body_temperature, 'body_temperature')
blood_pressure = add_location(blood_pressure, 'blood_pressure')
scale = add_location(scale, 'scale')
physiological = pd.concat([body_temperature, blood_pressure, scale])
activity_data.location = activity_data.location.map(sensors)
activity_data.time = pd.to_datetime(activity_data.time, utc=True)
physiological.time = pd.to_datetime(physiological.time, utc=True)
data = {'activity': activity_data,
'physiological': physiological}
return data
Functions
def format_tihm_data()
-
This function will change the TIHM data to the same format as DRI
Args
path_to_tihm
- string, Path to TIHM folder
Returns
df
- Dataframe, same as pir sensors in DRI data
Expand source code
@load_save(os.path.join('./data', 'pkl', 'raw_data'), 'TIHM') def format_tihm_data(): ''' This function will change the TIHM data to the same format as DRI Args: path_to_tihm: string, Path to TIHM folder Returns: df: Dataframe, same as pir sensors in DRI data ''' with open(tihm_data_path, 'r') as file_read: path_to_tihm = file_read.read() def add_location(df, location_name): df['location'] = location_name df = df[['subject', 'datetimeObserved', 'location', 'valueQuantity']] df.columns = config['physiological']['columns'] return df sensors = {'Kettle': 'kettle', 'Bedroom': 'bedroom1', 'Kitchen': 'kitchen', 'Bathroom': 'bathroom1', 'Hallway': 'hallway', 'Fridge Door': 'fridge door', 'Front Door': 'front door', 'Lounge': 'lounge', 'Back Door': 'back door', 'Toaster': 'toaster', 'Microwave': 'microwave', 'Study': 'study', 'Dining Room': 'dining room', 'Living Room': 'living room'} data = pd.read_csv(os.path.join(path_to_tihm, 'Observations.csv')) # deleting rows that have times and dates before the year 2000 data = data[(pd.to_datetime(data.datetimeObserved) > pd.to_datetime('2000-01-01 00:00:00'))] #data.subject = map_random_ids(data.subject, True) activity_data = data[data.location.isin(list(sensors.keys()))] \ [['subject', 'datetimeObserved', 'location', 'valueQuantity']] body_temperature = data[data.device == 27991004] \ [['subject', 'datetimeObserved', 'valueQuantity']] blood_pressure = data[data.device == 70665002] \ [['subject', 'datetimeObserved', 'valueQuantity']] scale = data[data.device == 19892000] \ [['subject', 'datetimeObserved', 'valueQuantity']] activity_data.valueQuantity = 1 activity_data.columns = config['activity']['columns'] body_temperature = add_location(body_temperature, 'body_temperature') blood_pressure = add_location(blood_pressure, 'blood_pressure') scale = add_location(scale, 'scale') physiological = pd.concat([body_temperature, blood_pressure, scale]) activity_data.location = activity_data.location.map(sensors) activity_data.time = pd.to_datetime(activity_data.time, utc=True) physiological.time = pd.to_datetime(physiological.time, utc=True) data = {'activity': activity_data, 'physiological': physiological} return data