Module minder_utils.formatting.formatting
Expand source code
import pandas as pd
import os
import time
from minder_utils.configurations import config
from .format_util import iter_dir
from minder_utils.download.download import Downloader
from minder_utils.util.decorators import load_save
from minder_utils.formatting.format_tihm import format_tihm_data
import numpy as np
from minder_utils.util.util import reformat_path
from .label import label_dataframe
class Formatting:
"""
Process the data to the following dataframe:
Patient id, device type, time, value
"""
def __init__(self, path=os.path.join('./data', 'raw_data'), add_tihm=None):
self.path = reformat_path(path)
self.add_tihm = add_tihm
self.activity_nice_locations = config['activity_nice_locations']
categories_check = ['device_types', 'homes', 'patients']
if not np.all([os.path.exists(os.path.join(path, category + '.csv')) for category in categories_check]):
print('Downloading required files for formatting')
dl = Downloader()
dl.export(categories=['device_types', 'homes', 'patients'],
reload=True, since=None, until=None, save_path=path, append=False)
print('Required files downloaded')
self.device_type = \
pd.read_csv(os.path.join(self.path, 'device_types.csv'))[['id', 'type']].set_index('id').to_dict()['type']
self.config = config
@property
@load_save(**config['physiological']['save'])
def physiological_data(self):
add_tihm = config['physiological']['add_tihm'] if self.add_tihm is None else self.add_tihm
if add_tihm:
data = self.process_data('physiological')
tihm_data = format_tihm_data()
return label_dataframe(pd.concat([data, tihm_data['physiological']]))
return label_dataframe(self.process_data('physiological').drop_duplicates())
@property
@load_save(**config['activity']['save'])
def activity_data(self):
add_tihm = config['activity']['add_tihm'] if self.add_tihm is None else self.add_tihm
if add_tihm:
data = self.process_data('activity')
tihm_data = format_tihm_data()
return label_dataframe(pd.concat([data, tihm_data['activity']]).drop_duplicates().sort_values('time'))
return label_dataframe(self.process_data('activity')).sort_values('time')
@property
@load_save(**config['environmental']['save'])
def environmental_data(self):
return label_dataframe(self.process_data('environmental'))
@property
@load_save(**config['sleep']['save'])
def sleep_data(self):
return label_dataframe(self.process_data('sleep')).sort_values('time').reset_index(drop=True)
def process_data(self, datatype):
assert datatype in ['physiological', 'activity', 'environmental', 'sleep'], 'not a valid type'
process_func = getattr(self, 'process_{}_data'.format(datatype))
dataframe = pd.DataFrame(columns=self.config[datatype]['columns'])
for name in iter_dir(self.path):
start_time = time.time()
print('Processing: {} -------> {}'.format(datatype, name).ljust(80, ' '), end='')
if name in self.config[datatype]['type']:
dataframe = process_func(name, dataframe)
end_time = time.time()
print('Finished in {:.2f} seconds'.format(end_time - start_time))
return dataframe
def process_sleep_data(self, name, df):
'''
This function will process the sleep data.
'''
col_filter = ['patient_id', 'start_date']
categorical_columns = self.config['sleep']['categorical_columns']
value_columns = self.config['sleep']['value_columns']
data_adding = pd.read_csv(os.path.join(self.path, name + '.csv'))
categorical_columns = [column for column in categorical_columns if column in list(data_adding.columns)]
if len(categorical_columns) != 0:
data_cat = data_adding[col_filter+categorical_columns].copy()
data_cat = pd.melt(data_cat.merge(
pd.get_dummies(data_cat[categorical_columns]),
left_index=True, right_index=True
).drop(categorical_columns,
axis=1),
id_vars=col_filter,
var_name='location',
value_name='value')
data_cat = data_cat[data_cat.value != 0]
data_cat = data_cat[data_cat['value'].notna()]
data_cat.value = data_cat.value.astype(float)
else:
data_cat = None
value_columns = [column for column in value_columns if column in list(data_adding.columns)]
if len(value_columns) != 0:
data_val = data_adding[col_filter+value_columns].copy()
data_val = pd.melt(data_val,
id_vars=col_filter,
var_name='location',
value_name='value')
data_val = data_val[data_val['value'].notna()]
data_val.value = data_val.value.astype(float)
else:
data_val = None
if (data_val is None) and (data_cat is None):
return df
data_out = pd.concat([data_cat, data_val])
data_out.columns = self.config['sleep']['columns']
data_out.time = pd.to_datetime(data_out.time, utc=True)
return pd.concat([df, data_out])
def process_physiological_data(self, name, df):
"""
process the physiological data, the data will be append to the self.physiological_data
NOTE:
the data will be averaged by date and patient id
:param name: string, file name to load the data.
data: the data must contains ['patient_id', 'start_date', 'device_type', 'value', 'unit']
:param df: dataframe, dataframe to append the data
:return: append to the self.physiological_data
"""
col_filter = ['patient_id', 'start_date', 'device_type', 'value']
data = pd.read_csv(os.path.join(self.path, name + '.csv'))
data = data[data.start_date != 'start_date']
data.loc[:, 'device_type'] = data.device_type.map(self.device_type)
try:
data = getattr(self, 'process_' + name)(data)[col_filter]
except AttributeError:
data.device_type += '->' + name[4:]
data.start_date = pd.to_datetime(data.start_date).dt.date
data = data[col_filter]
data.value = data.value.astype(float)
data = data.groupby(['patient_id', 'start_date', 'device_type']).mean().reset_index()
data.columns = self.config['physiological']['columns']
data.location = data.location.apply(lambda x: x.split('->')[-1])
data.time = pd.to_datetime(data.time, utc=True)
return df.append(data)
def process_activity_data(self, name, df):
"""
process the activity data, the data will be append to the self.activity_data
NOTE:
Door -> the values will be set to one (either open or close)
application -> the values will be set to 1, the location_name will be set based on the
the names of the values, e.g. iron-use -> location_name: iron, value: 1
:param name: the file name to load the data
the data must contains ['patient_id', 'start_date', 'location_name', 'value']
:param df: dataframe, dataframe to append the data
:return: append to the self.activity_data
"""
col_filter = ['patient_id', 'start_date', 'location_name', 'value']
data = pd.read_csv(os.path.join(self.path, name + '.csv'))
data = data[data.start_date != 'start_date']
data = data[data['location_name'] != 'location_name']
data = getattr(self, 'process_' + name)(data)[col_filter]
data.columns = self.config['activity']['columns']
data.time = pd.to_datetime(data.time, utc=True)
return df.append(data)
def process_environmental_data(self, name, df):
"""
process the environmental data, the data will be append to the self.environmental_data
NOTE:
the data will be averaged by date and patient id
:param name: file name to load the data.
data: the data must contains ['patient_id', 'start_date', 'location_name', 'device_type', 'value', 'unit']
:param df: dataframe, dataframe to append the data
:return: append to the self.environmental_data
"""
col_filter = ['patient_id', 'start_date', 'location_name', 'value']
data = pd.read_csv(os.path.join(self.path, name + '.csv'))
data = data[data.start_date != 'start_date']
data.start_date = pd.to_datetime(data.start_date).dt.date
data = data[col_filter]
# data.loc[:, 'device_type'] = data.device_type.map(self.device_type)
data.value = data.value.astype(float)
data = data.groupby(['patient_id', 'start_date', 'location_name']).mean().reset_index()
data.columns = self.config['environmental']['columns']
data.time = pd.to_datetime(data.time, utc=True)
data['type'] = name
return df.append(data)
@staticmethod
def process_raw_door_sensor(data):
data['value'] = 1
return data
@staticmethod
def process_raw_activity_pir(data):
data['value'] = 1
return data
@staticmethod
def process_raw_appliance_use(data):
data.location_name = data.value.apply(lambda x: x.split('-')[0])
data.value = 1
return data
@staticmethod
def process_raw_blood_pressure(data):
col_filter = ['patient_id', 'start_date', 'device_type', 'value', 'unit']
blood_pressure = []
for value in ['systolic_value', 'diastolic_value']:
tmp_filter = col_filter.copy()
tmp_filter[3] = value
tmp_data = data[tmp_filter]
tmp_data.device_type += '->' + value.split('_')[0]
tmp_data['value'] = tmp_data[value]
blood_pressure.append(tmp_data[col_filter])
return pd.concat(blood_pressure)
def process_observation_notes(self):
"""
TODO
need to be process by NLP maybe
:return:
"""
return False
def process_procedure(self):
"""
TODO
need to be process by NLP maybe
:return:
"""
return False
def process_raw_behavioural(self):
"""
TODO
need to be process by NLP maybe
:return:
"""
return False
def process_issue(self):
"""
TODO
:return:
"""
return False
def process_encounter(self):
"""
TODO
:return:
"""
return False
def process_homes(self):
"""
TODO
Data:
homd id - patient id
:return:
"""
return False
if __name__ == '__main__':
loader = Formatting('./raw_data/')
Classes
class Formatting (path='./data\\raw_data', add_tihm=None)
-
Process the data to the following dataframe:
Patient id, device type, time, value
Expand source code
class Formatting: """ Process the data to the following dataframe: Patient id, device type, time, value """ def __init__(self, path=os.path.join('./data', 'raw_data'), add_tihm=None): self.path = reformat_path(path) self.add_tihm = add_tihm self.activity_nice_locations = config['activity_nice_locations'] categories_check = ['device_types', 'homes', 'patients'] if not np.all([os.path.exists(os.path.join(path, category + '.csv')) for category in categories_check]): print('Downloading required files for formatting') dl = Downloader() dl.export(categories=['device_types', 'homes', 'patients'], reload=True, since=None, until=None, save_path=path, append=False) print('Required files downloaded') self.device_type = \ pd.read_csv(os.path.join(self.path, 'device_types.csv'))[['id', 'type']].set_index('id').to_dict()['type'] self.config = config @property @load_save(**config['physiological']['save']) def physiological_data(self): add_tihm = config['physiological']['add_tihm'] if self.add_tihm is None else self.add_tihm if add_tihm: data = self.process_data('physiological') tihm_data = format_tihm_data() return label_dataframe(pd.concat([data, tihm_data['physiological']])) return label_dataframe(self.process_data('physiological').drop_duplicates()) @property @load_save(**config['activity']['save']) def activity_data(self): add_tihm = config['activity']['add_tihm'] if self.add_tihm is None else self.add_tihm if add_tihm: data = self.process_data('activity') tihm_data = format_tihm_data() return label_dataframe(pd.concat([data, tihm_data['activity']]).drop_duplicates().sort_values('time')) return label_dataframe(self.process_data('activity')).sort_values('time') @property @load_save(**config['environmental']['save']) def environmental_data(self): return label_dataframe(self.process_data('environmental')) @property @load_save(**config['sleep']['save']) def sleep_data(self): return label_dataframe(self.process_data('sleep')).sort_values('time').reset_index(drop=True) def process_data(self, datatype): assert datatype in ['physiological', 'activity', 'environmental', 'sleep'], 'not a valid type' process_func = getattr(self, 'process_{}_data'.format(datatype)) dataframe = pd.DataFrame(columns=self.config[datatype]['columns']) for name in iter_dir(self.path): start_time = time.time() print('Processing: {} -------> {}'.format(datatype, name).ljust(80, ' '), end='') if name in self.config[datatype]['type']: dataframe = process_func(name, dataframe) end_time = time.time() print('Finished in {:.2f} seconds'.format(end_time - start_time)) return dataframe def process_sleep_data(self, name, df): ''' This function will process the sleep data. ''' col_filter = ['patient_id', 'start_date'] categorical_columns = self.config['sleep']['categorical_columns'] value_columns = self.config['sleep']['value_columns'] data_adding = pd.read_csv(os.path.join(self.path, name + '.csv')) categorical_columns = [column for column in categorical_columns if column in list(data_adding.columns)] if len(categorical_columns) != 0: data_cat = data_adding[col_filter+categorical_columns].copy() data_cat = pd.melt(data_cat.merge( pd.get_dummies(data_cat[categorical_columns]), left_index=True, right_index=True ).drop(categorical_columns, axis=1), id_vars=col_filter, var_name='location', value_name='value') data_cat = data_cat[data_cat.value != 0] data_cat = data_cat[data_cat['value'].notna()] data_cat.value = data_cat.value.astype(float) else: data_cat = None value_columns = [column for column in value_columns if column in list(data_adding.columns)] if len(value_columns) != 0: data_val = data_adding[col_filter+value_columns].copy() data_val = pd.melt(data_val, id_vars=col_filter, var_name='location', value_name='value') data_val = data_val[data_val['value'].notna()] data_val.value = data_val.value.astype(float) else: data_val = None if (data_val is None) and (data_cat is None): return df data_out = pd.concat([data_cat, data_val]) data_out.columns = self.config['sleep']['columns'] data_out.time = pd.to_datetime(data_out.time, utc=True) return pd.concat([df, data_out]) def process_physiological_data(self, name, df): """ process the physiological data, the data will be append to the self.physiological_data NOTE: the data will be averaged by date and patient id :param name: string, file name to load the data. data: the data must contains ['patient_id', 'start_date', 'device_type', 'value', 'unit'] :param df: dataframe, dataframe to append the data :return: append to the self.physiological_data """ col_filter = ['patient_id', 'start_date', 'device_type', 'value'] data = pd.read_csv(os.path.join(self.path, name + '.csv')) data = data[data.start_date != 'start_date'] data.loc[:, 'device_type'] = data.device_type.map(self.device_type) try: data = getattr(self, 'process_' + name)(data)[col_filter] except AttributeError: data.device_type += '->' + name[4:] data.start_date = pd.to_datetime(data.start_date).dt.date data = data[col_filter] data.value = data.value.astype(float) data = data.groupby(['patient_id', 'start_date', 'device_type']).mean().reset_index() data.columns = self.config['physiological']['columns'] data.location = data.location.apply(lambda x: x.split('->')[-1]) data.time = pd.to_datetime(data.time, utc=True) return df.append(data) def process_activity_data(self, name, df): """ process the activity data, the data will be append to the self.activity_data NOTE: Door -> the values will be set to one (either open or close) application -> the values will be set to 1, the location_name will be set based on the the names of the values, e.g. iron-use -> location_name: iron, value: 1 :param name: the file name to load the data the data must contains ['patient_id', 'start_date', 'location_name', 'value'] :param df: dataframe, dataframe to append the data :return: append to the self.activity_data """ col_filter = ['patient_id', 'start_date', 'location_name', 'value'] data = pd.read_csv(os.path.join(self.path, name + '.csv')) data = data[data.start_date != 'start_date'] data = data[data['location_name'] != 'location_name'] data = getattr(self, 'process_' + name)(data)[col_filter] data.columns = self.config['activity']['columns'] data.time = pd.to_datetime(data.time, utc=True) return df.append(data) def process_environmental_data(self, name, df): """ process the environmental data, the data will be append to the self.environmental_data NOTE: the data will be averaged by date and patient id :param name: file name to load the data. data: the data must contains ['patient_id', 'start_date', 'location_name', 'device_type', 'value', 'unit'] :param df: dataframe, dataframe to append the data :return: append to the self.environmental_data """ col_filter = ['patient_id', 'start_date', 'location_name', 'value'] data = pd.read_csv(os.path.join(self.path, name + '.csv')) data = data[data.start_date != 'start_date'] data.start_date = pd.to_datetime(data.start_date).dt.date data = data[col_filter] # data.loc[:, 'device_type'] = data.device_type.map(self.device_type) data.value = data.value.astype(float) data = data.groupby(['patient_id', 'start_date', 'location_name']).mean().reset_index() data.columns = self.config['environmental']['columns'] data.time = pd.to_datetime(data.time, utc=True) data['type'] = name return df.append(data) @staticmethod def process_raw_door_sensor(data): data['value'] = 1 return data @staticmethod def process_raw_activity_pir(data): data['value'] = 1 return data @staticmethod def process_raw_appliance_use(data): data.location_name = data.value.apply(lambda x: x.split('-')[0]) data.value = 1 return data @staticmethod def process_raw_blood_pressure(data): col_filter = ['patient_id', 'start_date', 'device_type', 'value', 'unit'] blood_pressure = [] for value in ['systolic_value', 'diastolic_value']: tmp_filter = col_filter.copy() tmp_filter[3] = value tmp_data = data[tmp_filter] tmp_data.device_type += '->' + value.split('_')[0] tmp_data['value'] = tmp_data[value] blood_pressure.append(tmp_data[col_filter]) return pd.concat(blood_pressure) def process_observation_notes(self): """ TODO need to be process by NLP maybe :return: """ return False def process_procedure(self): """ TODO need to be process by NLP maybe :return: """ return False def process_raw_behavioural(self): """ TODO need to be process by NLP maybe :return: """ return False def process_issue(self): """ TODO :return: """ return False def process_encounter(self): """ TODO :return: """ return False def process_homes(self): """ TODO Data: homd id - patient id :return: """ return False
Static methods
def process_raw_activity_pir(data)
-
Expand source code
@staticmethod def process_raw_activity_pir(data): data['value'] = 1 return data
def process_raw_appliance_use(data)
-
Expand source code
@staticmethod def process_raw_appliance_use(data): data.location_name = data.value.apply(lambda x: x.split('-')[0]) data.value = 1 return data
def process_raw_blood_pressure(data)
-
Expand source code
@staticmethod def process_raw_blood_pressure(data): col_filter = ['patient_id', 'start_date', 'device_type', 'value', 'unit'] blood_pressure = [] for value in ['systolic_value', 'diastolic_value']: tmp_filter = col_filter.copy() tmp_filter[3] = value tmp_data = data[tmp_filter] tmp_data.device_type += '->' + value.split('_')[0] tmp_data['value'] = tmp_data[value] blood_pressure.append(tmp_data[col_filter]) return pd.concat(blood_pressure)
def process_raw_door_sensor(data)
-
Expand source code
@staticmethod def process_raw_door_sensor(data): data['value'] = 1 return data
Instance variables
var activity_data
-
Expand source code
@property @load_save(**config['activity']['save']) def activity_data(self): add_tihm = config['activity']['add_tihm'] if self.add_tihm is None else self.add_tihm if add_tihm: data = self.process_data('activity') tihm_data = format_tihm_data() return label_dataframe(pd.concat([data, tihm_data['activity']]).drop_duplicates().sort_values('time')) return label_dataframe(self.process_data('activity')).sort_values('time')
var environmental_data
-
Expand source code
@property @load_save(**config['environmental']['save']) def environmental_data(self): return label_dataframe(self.process_data('environmental'))
var physiological_data
-
Expand source code
@property @load_save(**config['physiological']['save']) def physiological_data(self): add_tihm = config['physiological']['add_tihm'] if self.add_tihm is None else self.add_tihm if add_tihm: data = self.process_data('physiological') tihm_data = format_tihm_data() return label_dataframe(pd.concat([data, tihm_data['physiological']])) return label_dataframe(self.process_data('physiological').drop_duplicates())
var sleep_data
-
Expand source code
@property @load_save(**config['sleep']['save']) def sleep_data(self): return label_dataframe(self.process_data('sleep')).sort_values('time').reset_index(drop=True)
Methods
def process_activity_data(self, name, df)
-
process the activity data, the data will be append to the self.activity_data
Note
Door -> the values will be set to one (either open or close) application -> the values will be set to 1, the location_name will be set based on the the names of the values, e.g. iron-use -> location_name: iron, value: 1 :param name: the file name to load the data the data must contains ['patient_id', 'start_date', 'location_name', 'value'] :param df: dataframe, dataframe to append the data :return: append to the self.activity_data
Expand source code
def process_activity_data(self, name, df): """ process the activity data, the data will be append to the self.activity_data NOTE: Door -> the values will be set to one (either open or close) application -> the values will be set to 1, the location_name will be set based on the the names of the values, e.g. iron-use -> location_name: iron, value: 1 :param name: the file name to load the data the data must contains ['patient_id', 'start_date', 'location_name', 'value'] :param df: dataframe, dataframe to append the data :return: append to the self.activity_data """ col_filter = ['patient_id', 'start_date', 'location_name', 'value'] data = pd.read_csv(os.path.join(self.path, name + '.csv')) data = data[data.start_date != 'start_date'] data = data[data['location_name'] != 'location_name'] data = getattr(self, 'process_' + name)(data)[col_filter] data.columns = self.config['activity']['columns'] data.time = pd.to_datetime(data.time, utc=True) return df.append(data)
def process_data(self, datatype)
-
Expand source code
def process_data(self, datatype): assert datatype in ['physiological', 'activity', 'environmental', 'sleep'], 'not a valid type' process_func = getattr(self, 'process_{}_data'.format(datatype)) dataframe = pd.DataFrame(columns=self.config[datatype]['columns']) for name in iter_dir(self.path): start_time = time.time() print('Processing: {} -------> {}'.format(datatype, name).ljust(80, ' '), end='') if name in self.config[datatype]['type']: dataframe = process_func(name, dataframe) end_time = time.time() print('Finished in {:.2f} seconds'.format(end_time - start_time)) return dataframe
def process_encounter(self)
-
TODO :return:
Expand source code
def process_encounter(self): """ TODO :return: """ return False
def process_environmental_data(self, name, df)
-
process the environmental data, the data will be append to the self.environmental_data
Note
the data will be averaged by date and patient id :param name: file name to load the data. data: the data must contains ['patient_id', 'start_date', 'location_name', 'device_type', 'value', 'unit'] :param df: dataframe, dataframe to append the data :return: append to the self.environmental_data
Expand source code
def process_environmental_data(self, name, df): """ process the environmental data, the data will be append to the self.environmental_data NOTE: the data will be averaged by date and patient id :param name: file name to load the data. data: the data must contains ['patient_id', 'start_date', 'location_name', 'device_type', 'value', 'unit'] :param df: dataframe, dataframe to append the data :return: append to the self.environmental_data """ col_filter = ['patient_id', 'start_date', 'location_name', 'value'] data = pd.read_csv(os.path.join(self.path, name + '.csv')) data = data[data.start_date != 'start_date'] data.start_date = pd.to_datetime(data.start_date).dt.date data = data[col_filter] # data.loc[:, 'device_type'] = data.device_type.map(self.device_type) data.value = data.value.astype(float) data = data.groupby(['patient_id', 'start_date', 'location_name']).mean().reset_index() data.columns = self.config['environmental']['columns'] data.time = pd.to_datetime(data.time, utc=True) data['type'] = name return df.append(data)
def process_homes(self)
-
TODO Data: homd id - patient id :return:
Expand source code
def process_homes(self): """ TODO Data: homd id - patient id :return: """ return False
def process_issue(self)
-
TODO :return:
Expand source code
def process_issue(self): """ TODO :return: """ return False
def process_observation_notes(self)
-
TODO need to be process by NLP maybe :return:
Expand source code
def process_observation_notes(self): """ TODO need to be process by NLP maybe :return: """ return False
def process_physiological_data(self, name, df)
-
process the physiological data, the data will be append to the self.physiological_data
Note
the data will be averaged by date and patient id
:param name: string, file name to load the data. data: the data must contains ['patient_id', 'start_date', 'device_type', 'value', 'unit'] :param df: dataframe, dataframe to append the data :return: append to the self.physiological_data
Expand source code
def process_physiological_data(self, name, df): """ process the physiological data, the data will be append to the self.physiological_data NOTE: the data will be averaged by date and patient id :param name: string, file name to load the data. data: the data must contains ['patient_id', 'start_date', 'device_type', 'value', 'unit'] :param df: dataframe, dataframe to append the data :return: append to the self.physiological_data """ col_filter = ['patient_id', 'start_date', 'device_type', 'value'] data = pd.read_csv(os.path.join(self.path, name + '.csv')) data = data[data.start_date != 'start_date'] data.loc[:, 'device_type'] = data.device_type.map(self.device_type) try: data = getattr(self, 'process_' + name)(data)[col_filter] except AttributeError: data.device_type += '->' + name[4:] data.start_date = pd.to_datetime(data.start_date).dt.date data = data[col_filter] data.value = data.value.astype(float) data = data.groupby(['patient_id', 'start_date', 'device_type']).mean().reset_index() data.columns = self.config['physiological']['columns'] data.location = data.location.apply(lambda x: x.split('->')[-1]) data.time = pd.to_datetime(data.time, utc=True) return df.append(data)
def process_procedure(self)
-
TODO need to be process by NLP maybe :return:
Expand source code
def process_procedure(self): """ TODO need to be process by NLP maybe :return: """ return False
def process_raw_behavioural(self)
-
TODO need to be process by NLP maybe :return:
Expand source code
def process_raw_behavioural(self): """ TODO need to be process by NLP maybe :return: """ return False
def process_sleep_data(self, name, df)
-
This function will process the sleep data.
Expand source code
def process_sleep_data(self, name, df): ''' This function will process the sleep data. ''' col_filter = ['patient_id', 'start_date'] categorical_columns = self.config['sleep']['categorical_columns'] value_columns = self.config['sleep']['value_columns'] data_adding = pd.read_csv(os.path.join(self.path, name + '.csv')) categorical_columns = [column for column in categorical_columns if column in list(data_adding.columns)] if len(categorical_columns) != 0: data_cat = data_adding[col_filter+categorical_columns].copy() data_cat = pd.melt(data_cat.merge( pd.get_dummies(data_cat[categorical_columns]), left_index=True, right_index=True ).drop(categorical_columns, axis=1), id_vars=col_filter, var_name='location', value_name='value') data_cat = data_cat[data_cat.value != 0] data_cat = data_cat[data_cat['value'].notna()] data_cat.value = data_cat.value.astype(float) else: data_cat = None value_columns = [column for column in value_columns if column in list(data_adding.columns)] if len(value_columns) != 0: data_val = data_adding[col_filter+value_columns].copy() data_val = pd.melt(data_val, id_vars=col_filter, var_name='location', value_name='value') data_val = data_val[data_val['value'].notna()] data_val.value = data_val.value.astype(float) else: data_val = None if (data_val is None) and (data_cat is None): return df data_out = pd.concat([data_cat, data_val]) data_out.columns = self.config['sleep']['columns'] data_out.time = pd.to_datetime(data_out.time, utc=True) return pd.concat([df, data_out])