Module minder_utils.evaluate.evaluate_models

Expand source code
from minder_utils.evaluate.eval_utils import split_by_ids, get_scores
from minder_utils.formatting.format_util import format_mean_std
from minder_utils.models.classifiers.classifiers import Classifiers as keras_clf
from minder_utils.models.utils.util import train_test_scale
from minder_utils.feature_engineering.util import compute_week_number
import pandas as pd
import numpy as np


def evaluate(model, X, y, p_ids, num_runs=10, valid_only=True, return_raw=False, scale_data=False):
    '''
    This function is used to evaluate the performance of your model
    Parameters
    ----------
    model
    X
    y
    p_ids
    num_runs
    valid_only
    return_raw

    Returns
    -------

    '''
    raw_results, results, sen, spe, accs, f1s = [], [], [], [], [], []
    header = ['model', 'sensitivity', 'specificity', 'acc', 'f1']
    for run in range(num_runs):
        X_train, y_train, X_test, y_test = split_by_ids(X, y, p_ids, seed=run, cat=valid_only, valid_only=valid_only)

        if scale_data:
            X_train, X_test = train_test_scale(X_train, X_test)

        model.reset()
        model.fit(X_train, y_train)
        sensitivity, specificity, acc, f1 = get_scores(y_test, model.predict(X_test))
        if sensitivity is not None and str(sensitivity) != 'nan':
            sen.append(sensitivity)
            spe.append(specificity)
            accs.append(acc)
            f1s.append(f1)
            if return_raw:
                raw_results.append([model.model_type, sensitivity, specificity, acc, f1])
    row = [model.model_type, format_mean_std(sen), format_mean_std(spe), format_mean_std(accs), format_mean_std(f1s)]
    results.append(row)

    if return_raw:
        return pd.DataFrame(raw_results, columns=header)
    df_results = pd.DataFrame(results, columns=header)
    return df_results


def evaluate_features(X, y, p_ids, num_runs=10, valid_only=True, return_raw=False, verbose=True, scale_data=False):
    '''
    This function is to evaluate your features on the baseline models
    Parameters
    ----------
    X
    y
    p_ids
    num_runs
    valid_only
    return_raw

    Returns Dataframe, contains the performance of the models
    -------

    '''
    results = []
    for model_type in keras_clf(boosting=False).get_info():
        if verbose:
            print('Evaluating ', model_type)
        clf = keras_clf(model_type, boosting=False)
        results.append(evaluate(clf, X, y, p_ids, valid_only=valid_only, num_runs=num_runs, return_raw=return_raw,
                                scale_data=scale_data))
    return pd.concat(results)


def evaluate_features_loo(X, y, p_ids, num_runs=10, nice_names_X_columns=None, scale_data=True):
    '''
    This function makes use of the above two functions to calculate the relative metrics
    when one of the features is left out.


    '''

    results_all = evaluate_features(X=X, y=y, p_ids=p_ids, num_runs=num_runs, return_raw=True, verbose=False,
                                    scale_data=scale_data)
    results_all_mean = results_all.groupby('model').mean()

    dividing_values = results_all_mean.to_dict('index')

    relative_result_list = []

    def relative_group_by(x):
        model_name = x['model'][0]
        divide_vector = np.asarray(list(dividing_values[model_name].values()))
        x = x[['sensitivity', 'specificity', 'acc', 'f1']] / divide_vector
        return x

    for col_index_out in range(X.shape[1]):
        X_to_test = np.delete(X, obj=col_index_out, axis=1)

        results_to_test = evaluate_features(X=X_to_test, y=y, p_ids=p_ids, num_runs=num_runs, return_raw=True,
                                            verbose=False)

        results_to_test['column_out'] = col_index_out if nice_names_X_columns is None else nice_names_X_columns[
            col_index_out]

        results_to_test[['sensitivity', 'specificity', 'acc', 'f1']] = results_to_test.groupby(by=['model']).apply(
            relative_group_by)

        relative_result_list.append(results_to_test)

    relative_result_all = pd.concat(relative_result_list)
    relative_result_all_melt = relative_result_all.melt(id_vars=['model', 'column_out'], var_name='metric',
                                                        value_name='value')

    return relative_result_all_melt


def evaluate_split(model, X_train, y_train, X_test, y_test, num_runs=10, return_raw=False,
                   scale_data=False):
    raw_results, results, sen, spe, accs, f1s = [], [], [], [], [], []
    header = ['model', 'sensitivity', 'specificity', 'acc', 'f1']
    for run in range(num_runs):
        if scale_data:
            X_train, X_test = train_test_scale(X_train, X_test)
        model.reset()
        model.fit(X_train, y_train)
        sensitivity, specificity, acc, f1 = get_scores(y_test, model.predict(X_test))
        if sensitivity is not None and str(sensitivity) != 'nan':
            sen.append(sensitivity)
            spe.append(specificity)
            accs.append(acc)
            f1s.append(f1)
            if return_raw:
                raw_results.append([model.model_type, sensitivity, specificity, acc, f1])
    row = [model.model_type, format_mean_std(sen), format_mean_std(spe), format_mean_std(accs), format_mean_std(f1s)]
    results.append(row)

    if return_raw:
        return pd.DataFrame(raw_results, columns=header)
    df_results = pd.DataFrame(results, columns=header)
    return df_results


def evaluate_sequentially(X, y, dates, num_runs=10, valid_only=True, return_raw=False, verbose=True, scale_data=False,
                          validate_next=True):
    '''
    This function is to evalate the model by simulating real-world scenario
    Parameters
    ----------
    X
    y
    dates
    num_runs
    valid_only
    return_raw
    verbose
    scale_data
    validate_next

    Returns
    -------

    '''
    def filter_samples(data, label):
        indices = np.isin(label, [-1, 1])
        return data[indices], label[indices]

    dates = pd.DataFrame(dates, columns=['date'])
    dates = dates.reset_index().sort_values('date')
    dates['week'] = compute_week_number(dates['date'])
    dates['train'] = False

    results = []
    week_counter = 1
    for dates_idx, train_dates in enumerate(dates['week'].unique()):
        if dates_idx == len(dates['week'].unique()) - 1:
            break
        dates.loc[dates.week == train_dates, 'train'] = True

        # Training data
        train_idx = dates[dates.train].index
        X_train, y_train = np.concatenate(X[train_idx]), np.concatenate(y[train_idx])

        if valid_only:
            X_train, y_train = filter_samples(X_train, y_train)
        y_train[y_train > 0] = 1
        y_train[y_train < 0] = -1

        # Test data
        if validate_next:
            test_idx = dates[dates.week == dates['week'].unique()[dates_idx + 1]].index
        else:
            test_idx = dates[~dates.train].index
        X_test, y_test = filter_samples(np.concatenate(X[test_idx]), np.concatenate(y[test_idx]))
        y_test[y_test > 0] = 1
        y_test[y_test < 0] = -1

        for model_type in keras_clf(boosting=False).get_info():
            if verbose:
                print('Evaluating ', model_type)
            clf = keras_clf(model_type, boosting=False)
            tmp_res = evaluate_split(clf, X_train, y_train, X_test, y_test, num_runs=num_runs, return_raw=return_raw,
                   scale_data=scale_data)
            tmp_res['week'] = week_counter
            results.append(tmp_res)
        week_counter += 1
    return pd.concat(results)

Functions

def evaluate(model, X, y, p_ids, num_runs=10, valid_only=True, return_raw=False, scale_data=False)

This function is used to evaluate the performance of your model Parameters


model
 
X
 
y
 
p_ids
 
num_runs
 
valid_only
 
return_raw
 

Returns

Expand source code
def evaluate(model, X, y, p_ids, num_runs=10, valid_only=True, return_raw=False, scale_data=False):
    '''
    This function is used to evaluate the performance of your model
    Parameters
    ----------
    model
    X
    y
    p_ids
    num_runs
    valid_only
    return_raw

    Returns
    -------

    '''
    raw_results, results, sen, spe, accs, f1s = [], [], [], [], [], []
    header = ['model', 'sensitivity', 'specificity', 'acc', 'f1']
    for run in range(num_runs):
        X_train, y_train, X_test, y_test = split_by_ids(X, y, p_ids, seed=run, cat=valid_only, valid_only=valid_only)

        if scale_data:
            X_train, X_test = train_test_scale(X_train, X_test)

        model.reset()
        model.fit(X_train, y_train)
        sensitivity, specificity, acc, f1 = get_scores(y_test, model.predict(X_test))
        if sensitivity is not None and str(sensitivity) != 'nan':
            sen.append(sensitivity)
            spe.append(specificity)
            accs.append(acc)
            f1s.append(f1)
            if return_raw:
                raw_results.append([model.model_type, sensitivity, specificity, acc, f1])
    row = [model.model_type, format_mean_std(sen), format_mean_std(spe), format_mean_std(accs), format_mean_std(f1s)]
    results.append(row)

    if return_raw:
        return pd.DataFrame(raw_results, columns=header)
    df_results = pd.DataFrame(results, columns=header)
    return df_results
def evaluate_features(X, y, p_ids, num_runs=10, valid_only=True, return_raw=False, verbose=True, scale_data=False)

This function is to evaluate your features on the baseline models Parameters


X
 
y
 
p_ids
 
num_runs
 
valid_only
 
return_raw
 

Returns Dataframe, contains the performance of the models

Expand source code
def evaluate_features(X, y, p_ids, num_runs=10, valid_only=True, return_raw=False, verbose=True, scale_data=False):
    '''
    This function is to evaluate your features on the baseline models
    Parameters
    ----------
    X
    y
    p_ids
    num_runs
    valid_only
    return_raw

    Returns Dataframe, contains the performance of the models
    -------

    '''
    results = []
    for model_type in keras_clf(boosting=False).get_info():
        if verbose:
            print('Evaluating ', model_type)
        clf = keras_clf(model_type, boosting=False)
        results.append(evaluate(clf, X, y, p_ids, valid_only=valid_only, num_runs=num_runs, return_raw=return_raw,
                                scale_data=scale_data))
    return pd.concat(results)
def evaluate_features_loo(X, y, p_ids, num_runs=10, nice_names_X_columns=None, scale_data=True)

This function makes use of the above two functions to calculate the relative metrics when one of the features is left out.

Expand source code
def evaluate_features_loo(X, y, p_ids, num_runs=10, nice_names_X_columns=None, scale_data=True):
    '''
    This function makes use of the above two functions to calculate the relative metrics
    when one of the features is left out.


    '''

    results_all = evaluate_features(X=X, y=y, p_ids=p_ids, num_runs=num_runs, return_raw=True, verbose=False,
                                    scale_data=scale_data)
    results_all_mean = results_all.groupby('model').mean()

    dividing_values = results_all_mean.to_dict('index')

    relative_result_list = []

    def relative_group_by(x):
        model_name = x['model'][0]
        divide_vector = np.asarray(list(dividing_values[model_name].values()))
        x = x[['sensitivity', 'specificity', 'acc', 'f1']] / divide_vector
        return x

    for col_index_out in range(X.shape[1]):
        X_to_test = np.delete(X, obj=col_index_out, axis=1)

        results_to_test = evaluate_features(X=X_to_test, y=y, p_ids=p_ids, num_runs=num_runs, return_raw=True,
                                            verbose=False)

        results_to_test['column_out'] = col_index_out if nice_names_X_columns is None else nice_names_X_columns[
            col_index_out]

        results_to_test[['sensitivity', 'specificity', 'acc', 'f1']] = results_to_test.groupby(by=['model']).apply(
            relative_group_by)

        relative_result_list.append(results_to_test)

    relative_result_all = pd.concat(relative_result_list)
    relative_result_all_melt = relative_result_all.melt(id_vars=['model', 'column_out'], var_name='metric',
                                                        value_name='value')

    return relative_result_all_melt
def evaluate_sequentially(X, y, dates, num_runs=10, valid_only=True, return_raw=False, verbose=True, scale_data=False, validate_next=True)

This function is to evalate the model by simulating real-world scenario Parameters


X
 
y
 
dates
 
num_runs
 
valid_only
 
return_raw
 
verbose
 
scale_data
 
validate_next
 

Returns

Expand source code
def evaluate_sequentially(X, y, dates, num_runs=10, valid_only=True, return_raw=False, verbose=True, scale_data=False,
                          validate_next=True):
    '''
    This function is to evalate the model by simulating real-world scenario
    Parameters
    ----------
    X
    y
    dates
    num_runs
    valid_only
    return_raw
    verbose
    scale_data
    validate_next

    Returns
    -------

    '''
    def filter_samples(data, label):
        indices = np.isin(label, [-1, 1])
        return data[indices], label[indices]

    dates = pd.DataFrame(dates, columns=['date'])
    dates = dates.reset_index().sort_values('date')
    dates['week'] = compute_week_number(dates['date'])
    dates['train'] = False

    results = []
    week_counter = 1
    for dates_idx, train_dates in enumerate(dates['week'].unique()):
        if dates_idx == len(dates['week'].unique()) - 1:
            break
        dates.loc[dates.week == train_dates, 'train'] = True

        # Training data
        train_idx = dates[dates.train].index
        X_train, y_train = np.concatenate(X[train_idx]), np.concatenate(y[train_idx])

        if valid_only:
            X_train, y_train = filter_samples(X_train, y_train)
        y_train[y_train > 0] = 1
        y_train[y_train < 0] = -1

        # Test data
        if validate_next:
            test_idx = dates[dates.week == dates['week'].unique()[dates_idx + 1]].index
        else:
            test_idx = dates[~dates.train].index
        X_test, y_test = filter_samples(np.concatenate(X[test_idx]), np.concatenate(y[test_idx]))
        y_test[y_test > 0] = 1
        y_test[y_test < 0] = -1

        for model_type in keras_clf(boosting=False).get_info():
            if verbose:
                print('Evaluating ', model_type)
            clf = keras_clf(model_type, boosting=False)
            tmp_res = evaluate_split(clf, X_train, y_train, X_test, y_test, num_runs=num_runs, return_raw=return_raw,
                   scale_data=scale_data)
            tmp_res['week'] = week_counter
            results.append(tmp_res)
        week_counter += 1
    return pd.concat(results)
def evaluate_split(model, X_train, y_train, X_test, y_test, num_runs=10, return_raw=False, scale_data=False)
Expand source code
def evaluate_split(model, X_train, y_train, X_test, y_test, num_runs=10, return_raw=False,
                   scale_data=False):
    raw_results, results, sen, spe, accs, f1s = [], [], [], [], [], []
    header = ['model', 'sensitivity', 'specificity', 'acc', 'f1']
    for run in range(num_runs):
        if scale_data:
            X_train, X_test = train_test_scale(X_train, X_test)
        model.reset()
        model.fit(X_train, y_train)
        sensitivity, specificity, acc, f1 = get_scores(y_test, model.predict(X_test))
        if sensitivity is not None and str(sensitivity) != 'nan':
            sen.append(sensitivity)
            spe.append(specificity)
            accs.append(acc)
            f1s.append(f1)
            if return_raw:
                raw_results.append([model.model_type, sensitivity, specificity, acc, f1])
    row = [model.model_type, format_mean_std(sen), format_mean_std(spe), format_mean_std(accs), format_mean_std(f1s)]
    results.append(row)

    if return_raw:
        return pd.DataFrame(raw_results, columns=header)
    df_results = pd.DataFrame(results, columns=header)
    return df_results