Module `minder_utils.models.utils`

Expand source code

from .early_stopping import EarlyStopping
from .util import *
from .feature_selector import *
from .feature_extractor import *


__all__ = ['EarlyStopping', 'get_device', 'Feature_selector', 'Feature_extractor']

Sub-modules

minder_utils.models.utils.early_stopping
minder_utils.models.utils.feature_extractor
minder_utils.models.utils.feature_selector
minder_utils.models.utils.util

Functions

def get_device()

Expand source code

def get_device():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print("Running on:", device)
    return device

Classes

class EarlyStopping (patience=20, verbose=False, delta=0, path='./ckpt', save_model=False, trace_func=<built-in function print>, **kwargs)

Early stops the training if validation loss doesn't improve after a given patience.

Args

patience : int: How long to wait after last time validation loss improved. Default: 7
verbose : bool: If True, prints a message for each validation loss improvement. Default: False
delta : float: Minimum change in the monitored quantity to qualify as an improvement. Default: 0
path : str: Path for the checkpoint to be saved to. Default: 'checkpoint.pt'
trace_func : function: trace print function. Default: print

Expand source code

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""

    def __init__(self, patience=20, verbose=False, delta=0, path='./ckpt', save_model=False,
                 trace_func=print, **kwargs):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        self.save_model = save_model

    def __call__(self, val_loss, model, save_name):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            if self.save_model:
                self.save_checkpoint(val_loss, model, save_name)
        elif score < self.best_score + self.delta:
            self.counter += 1
            # self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
                self.trace_func('Training is stopped due to early stopping')
        else:
            self.best_score = score
            if self.save_model:
                self.save_checkpoint(val_loss, model, save_name)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, save_name):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(
                f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        save_mkdir(self.path)
        torch.save(model.state_dict(), os.path.join(self.path, save_name))
        self.val_loss_min = val_loss

Methods

def save_checkpoint(self, val_loss, model, save_name)

Saves model when validation loss decrease.

Expand source code

def save_checkpoint(self, val_loss, model, save_name):
    '''Saves model when validation loss decrease.'''
    if self.verbose:
        self.trace_func(
            f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
    save_mkdir(self.path)
    torch.save(model.state_dict(), os.path.join(self.path, save_name))
    self.val_loss_min = val_loss

class Feature_extractor

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code

class Feature_extractor(ABC, nn.Module):
    def __init__(self):
        super(Feature_extractor, self).__init__()
        self.early_stop = EarlyStopping(**self.config['early_stop'])
        self.device = get_device()

    @property
    def config(self) -> dict:
        return feature_extractor_config[self.__class__.__name__.lower()]

    @abstractmethod
    def step(self, data):
        pass

    def get_info(self, config=None, indent=0):
        if config is None:
            config = self.config
        for key, value in config.items():
            if isinstance(value, dict):
                print(' ' * indent + str(key))
                self.get_info(value, indent + 1)
            else:
                print(' ' * indent + str(key).ljust(10, ' '), str(value))

    def create_loader(self, data, training=True):
        if isinstance(data, torch.utils.data.DataLoader):
            return data
        elif not isinstance(data, (np.ndarray, list, tuple)):
            raise TypeError('the input must be dataloader / numpy array, or a list/tuple'
                            'containing the data and label')
        if training:
            return self._custom_loader(data)
        else:
            return create_unlabelled_loader(data, batch_size=1, shuffle=False, augmentation=False)

    def _custom_loader(self, data):
        return create_unlabelled_loader(data, **self.config['loader'])

    def fit(self, train_loader, save_name=None):
        if save_name is None:
            save_name = self.__class__.__name__
        if not self.config['train']['retrain']:
            if self.load_pre_trained_weights(save_name):
                return self

        train_loader = self.create_loader(train_loader, training=True)
        self.model = self.model.to(self.device)

        optimizer = torch.optim.Adam(self.model.parameters(), **self.config['optimiser'])
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0,
                                                               last_epoch=-1)
        for epoch_counter in range(self.config['train']['epochs']):
            for data in train_loader:
                optimizer.zero_grad()
                loss = self.step(data).to(self.device)
                loss.backward()
                if self.config['train']['verbose']:
                    print('Epoch {}/{}, Loss: '.format(epoch_counter,
                                                       self.config['train']['epochs']), loss.item(), end='\n')
                optimizer.step()
                scheduler.step()
                self.early_stop(loss.item(), self.model, save_name)
                if self.early_stop.early_stop and self.config['early_stop']['enable']:
                    break
            if self.early_stop.early_stop and self.config['early_stop']['enable']:
                break
        return self

    def load_pre_trained_weights(self, save_name):
        try:
            checkpoints_folder = os.path.join(self.config['early_stop']['path'], save_name)
            state_dict = torch.load(checkpoints_folder)
            self.model.load_state_dict(state_dict)
            print("Loaded pre-trained model with success.")
            return True
        except FileNotFoundError:
            print("Pre-trained weights not found. Training from scratch.")
            return False

    @staticmethod
    def which_data(data):
        return data[0]

    def transform(self, test_loader):
        """
        :param test_loader: sample validated date only
        :return:
        """
        test_loader = self.create_loader(test_loader, training=False)
        # validation steps
        with torch.no_grad():
            self.model.eval()
            features = []
            for data in test_loader:
                if not isinstance(data, torch.Tensor):
                    data = self.which_data(data)
                feat = self.model(data)
                if not isinstance(feat, torch.Tensor):
                    feat = feat[0]
                features.append(feat.numpy())

        if self.config['test']['save']:
            save_mkdir(self.config['test']['save_path'])
            np.save(os.path.join(self.config['test']['save_path'], self.__class__.__name__.lower() + '.npy'),
                    np.concatenate(features))
            print('Test data has been transformed and saved to ',
                  os.path.join(self.config['test']['save_path'], self.__class__.__name__).lower() + '.npy')

        return np.concatenate(features)

Ancestors

abc.ABC
torch.nn.modules.module.Module

Subclasses

Class variables

var dump_patches : bool
var training : bool

Static methods

def which_data(data)

Expand source code

@staticmethod
def which_data(data):
    return data[0]

Instance variables

var config : dict

Expand source code

@property
def config(self) -> dict:
    return feature_extractor_config[self.__class__.__name__.lower()]

Methods

def create_loader(self, data, training=True)

Expand source code

def create_loader(self, data, training=True):
    if isinstance(data, torch.utils.data.DataLoader):
        return data
    elif not isinstance(data, (np.ndarray, list, tuple)):
        raise TypeError('the input must be dataloader / numpy array, or a list/tuple'
                        'containing the data and label')
    if training:
        return self._custom_loader(data)
    else:
        return create_unlabelled_loader(data, batch_size=1, shuffle=False, augmentation=False)

def fit(self, train_loader, save_name=None)

Expand source code

def fit(self, train_loader, save_name=None):
    if save_name is None:
        save_name = self.__class__.__name__
    if not self.config['train']['retrain']:
        if self.load_pre_trained_weights(save_name):
            return self

    train_loader = self.create_loader(train_loader, training=True)
    self.model = self.model.to(self.device)

    optimizer = torch.optim.Adam(self.model.parameters(), **self.config['optimiser'])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0,
                                                           last_epoch=-1)
    for epoch_counter in range(self.config['train']['epochs']):
        for data in train_loader:
            optimizer.zero_grad()
            loss = self.step(data).to(self.device)
            loss.backward()
            if self.config['train']['verbose']:
                print('Epoch {}/{}, Loss: '.format(epoch_counter,
                                                   self.config['train']['epochs']), loss.item(), end='\n')
            optimizer.step()
            scheduler.step()
            self.early_stop(loss.item(), self.model, save_name)
            if self.early_stop.early_stop and self.config['early_stop']['enable']:
                break
        if self.early_stop.early_stop and self.config['early_stop']['enable']:
            break
    return self

def forward(self, *input: Any) ‑> None

Defines the computation performed at every call.

Should be overridden by all subclasses.

Note

Although the recipe for forward pass needs to be defined within this function, one should call the :class:Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.

Expand source code

def _forward_unimplemented(self, *input: Any) -> None:
    r"""Defines the computation performed at every call.

    Should be overridden by all subclasses.

    .. note::
        Although the recipe for forward pass needs to be defined within
        this function, one should call the :class:`Module` instance afterwards
        instead of this since the former takes care of running the
        registered hooks while the latter silently ignores them.
    """
    raise NotImplementedError

def get_info(self, config=None, indent=0)

Expand source code

def get_info(self, config=None, indent=0):
    if config is None:
        config = self.config
    for key, value in config.items():
        if isinstance(value, dict):
            print(' ' * indent + str(key))
            self.get_info(value, indent + 1)
        else:
            print(' ' * indent + str(key).ljust(10, ' '), str(value))

def load_pre_trained_weights(self, save_name)

Expand source code

def load_pre_trained_weights(self, save_name):
    try:
        checkpoints_folder = os.path.join(self.config['early_stop']['path'], save_name)
        state_dict = torch.load(checkpoints_folder)
        self.model.load_state_dict(state_dict)
        print("Loaded pre-trained model with success.")
        return True
    except FileNotFoundError:
        print("Pre-trained weights not found. Training from scratch.")
        return False

def step(self, data)

Expand source code

@abstractmethod
def step(self, data):
    pass

def transform(self, test_loader)

:param test_loader: sample validated date only :return:

Expand source code

def transform(self, test_loader):
    """
    :param test_loader: sample validated date only
    :return:
    """
    test_loader = self.create_loader(test_loader, training=False)
    # validation steps
    with torch.no_grad():
        self.model.eval()
        features = []
        for data in test_loader:
            if not isinstance(data, torch.Tensor):
                data = self.which_data(data)
            feat = self.model(data)
            if not isinstance(feat, torch.Tensor):
                feat = feat[0]
            features.append(feat.numpy())

    if self.config['test']['save']:
        save_mkdir(self.config['test']['save_path'])
        np.save(os.path.join(self.config['test']['save_path'], self.__class__.__name__.lower() + '.npy'),
                np.concatenate(features))
        print('Test data has been transformed and saved to ',
              os.path.join(self.config['test']['save_path'], self.__class__.__name__).lower() + '.npy')

    return np.concatenate(features)

class Feature_selector (model)

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code

class Feature_selector(ABC):
    def __init__(self, model):
        self.name = self.methods[model]
        self.model = getattr(self, model)()

    @property
    def config(self) -> dict:
        return feature_selector_config[self.__class__.__name__.lower()]

    @property
    @abstractmethod
    def methods(self):
        pass

    def reset_model(self, model_name):
        self.name = self.methods[model_name]
        self.model = getattr(self, model_name)()

    def get_info(self, verbose=False):
        if verbose:
            print('Available methods:')
            for idx, key in enumerate(self.methods):
                print(str(idx).ljust(10, ' '), key.ljust(10, ' '), self.methods[key].ljust(10, ' '))
        return self.methods

    @abstractmethod
    def fit(self, X, y):
        pass

    @abstractmethod
    def transform(self, X):
        pass

Ancestors

abc.ABC

Subclasses

Instance variables

var config : dict

Expand source code

@property
def config(self) -> dict:
    return feature_selector_config[self.__class__.__name__.lower()]

var methods

Expand source code

@property
@abstractmethod
def methods(self):
    pass

Methods

def fit(self, X, y)

Expand source code

@abstractmethod
def fit(self, X, y):
    pass

def get_info(self, verbose=False)

Expand source code

def get_info(self, verbose=False):
    if verbose:
        print('Available methods:')
        for idx, key in enumerate(self.methods):
            print(str(idx).ljust(10, ' '), key.ljust(10, ' '), self.methods[key].ljust(10, ' '))
    return self.methods

def reset_model(self, model_name)

Expand source code

def reset_model(self, model_name):
    self.name = self.methods[model_name]
    self.model = getattr(self, model_name)()

def transform(self, X)

Expand source code

@abstractmethod
def transform(self, X):
    pass