Module minder_utils.models.feature_selectors

Expand source code
from .supervised.wrapper import *
from .supervised.filter import *
from .supervised.intrinsic import *
from .unsupervised.filter import *

__all__ = ['Supervised_Filter', 'Intrinsic_Selector', 'Wrapper_Selector', 'Unsupervised_Filter']

Sub-modules

minder_utils.models.feature_selectors.supervised
minder_utils.models.feature_selectors.unsupervised

Classes

class Intrinsic_Selector (classifier, model_name, num_features, freeze_classifier=False, temperature=5)

This class provide a set of supervised feature selection methods. Particularly, it contains a set of intrinsic methods, which will perform automatic feature selection DURING TRAINING.

Currently, it contains: - Linear feature selector Example

Expand source code
class Intrinsic_Selector(Feature_selector):
    '''
    This class provide a set of supervised feature selection methods.
    Particularly, it contains a set of intrinsic methods, which will perform automatic feature selection
     DURING TRAINING.

    Currently, it contains:
        - Linear feature selector
    ```Example```
    ```
    ```
    '''

    def __init__(self, classifier, model_name, num_features, freeze_classifier=False, temperature=5):
        self.classifier = classifier
        self.num_features = num_features
        super().__init__(model_name)
        self.name = self.methods[model_name]
        self.early_stop = EarlyStopping(**self.config['early_stop'])
        self.freeze_classifier = freeze_classifier
        self.discrete = 'discrete' in model_name
        self.temperature = temperature

    def reset_model(self, model_name, discrete=True):
        self.discrete = discrete
        self.name = self.methods[model_name]
        self.model = getattr(self, model_name)()

    @property
    def methods(self):
        return {
            'linear': 'linear feature selector',
            'discrete_linear': 'discrete linear feature selector'
        }

    def linear(self):
        return nn.Linear(self.num_features, self.num_features, bias=False)

    def discrete_linear(self):
        return nn.ModuleList([
            nn.Linear(self.num_features, self.num_features, bias=False),
            nn.Linear(self.num_features, self.num_features, bias=False)])

    def fit(self, dataloader, num_epoch=50):
        parameters = self.model.parameters() if self.freeze_classifier \
            else list(self.model.parameters()) + list(self.classifier.parameters())
        optimiser = optim.Adam(parameters, lr=0.001)
        criterion = nn.CrossEntropyLoss()
        for e in range(num_epoch):
            if self.early_stop.early_stop:
                break
            for X, y in dataloader:
                optimiser.zero_grad()
                if self.discrete:
                    features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1)
                    features_importance = F.gumbel_softmax(features_importance, tau=self.temperature, hard=True, dim=-1)[:, :, 1]
                    features_importance = features_importance
                else:
                    features_importance = self.model(X)
                    features_importance = F.softmax(features_importance, dim=1)
                X = X * features_importance
                outputs = self.classifier(X)
                loss = criterion(outputs, y) + torch.sum(features_importance)
                loss.backward()
                optimiser.step()
                print('Epoch: %d / %5d,  Loss: %.3f' %
                      (e + 1, num_epoch, loss.item()), end='\n')
                self.early_stop(loss.item(), self.model, self.__class__.__name__)
                if self.early_stop.early_stop and self.config['early_stop']['enable']:
                    break
            if self.early_stop.early_stop and self.config['early_stop']['enable']:
                break
            return self

    def test(self, dataloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for X, y in dataloader:
                if self.discrete:
                    features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1)
                    features_importance = F.softmax(features_importance / self.temperature, dim=-1)[:, :, 1]
                else:
                    features_importance = self.model(X)
                    features_importance = F.softmax(features_importance, dim=1)
                X *= features_importance
                outputs = self.classifier(X)
                _, predicted = torch.max(outputs.data, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()

        print('Accuracy: %d %%' % (100 * correct / total))
        return 100 * correct / total

    def transform(self, X):
        pass

    def __name__(self):
        return 'Supervised Intrinsic Selector', self.name

    def get_importance(self, dataloader, normalise=True):
        importance = []
        with torch.no_grad():
            for X, y in dataloader:
                if self.discrete:
                    features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1)
                    importance.extend(list(F.softmax(features_importance / self.temperature, dim=-1).detach().numpy()[:, :, 1]))
                else:
                    features_importance = self.model(X)
                    importance.extend(list(F.softmax(features_importance, dim=1).detach().numpy()))
        importance = np.array(importance)
        if normalise:
            importance -= np.min(importance, axis=1, keepdims=True) - 1e-5
            importance /= np.max(importance, axis=1, keepdims=True)
        importance = np.mean(importance, axis=0)
        return importance

Ancestors

Instance variables

var methods
Expand source code
@property
def methods(self):
    return {
        'linear': 'linear feature selector',
        'discrete_linear': 'discrete linear feature selector'
    }

Methods

def discrete_linear(self)
Expand source code
def discrete_linear(self):
    return nn.ModuleList([
        nn.Linear(self.num_features, self.num_features, bias=False),
        nn.Linear(self.num_features, self.num_features, bias=False)])
def fit(self, dataloader, num_epoch=50)
Expand source code
def fit(self, dataloader, num_epoch=50):
    parameters = self.model.parameters() if self.freeze_classifier \
        else list(self.model.parameters()) + list(self.classifier.parameters())
    optimiser = optim.Adam(parameters, lr=0.001)
    criterion = nn.CrossEntropyLoss()
    for e in range(num_epoch):
        if self.early_stop.early_stop:
            break
        for X, y in dataloader:
            optimiser.zero_grad()
            if self.discrete:
                features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1)
                features_importance = F.gumbel_softmax(features_importance, tau=self.temperature, hard=True, dim=-1)[:, :, 1]
                features_importance = features_importance
            else:
                features_importance = self.model(X)
                features_importance = F.softmax(features_importance, dim=1)
            X = X * features_importance
            outputs = self.classifier(X)
            loss = criterion(outputs, y) + torch.sum(features_importance)
            loss.backward()
            optimiser.step()
            print('Epoch: %d / %5d,  Loss: %.3f' %
                  (e + 1, num_epoch, loss.item()), end='\n')
            self.early_stop(loss.item(), self.model, self.__class__.__name__)
            if self.early_stop.early_stop and self.config['early_stop']['enable']:
                break
        if self.early_stop.early_stop and self.config['early_stop']['enable']:
            break
        return self
def get_importance(self, dataloader, normalise=True)
Expand source code
def get_importance(self, dataloader, normalise=True):
    importance = []
    with torch.no_grad():
        for X, y in dataloader:
            if self.discrete:
                features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1)
                importance.extend(list(F.softmax(features_importance / self.temperature, dim=-1).detach().numpy()[:, :, 1]))
            else:
                features_importance = self.model(X)
                importance.extend(list(F.softmax(features_importance, dim=1).detach().numpy()))
    importance = np.array(importance)
    if normalise:
        importance -= np.min(importance, axis=1, keepdims=True) - 1e-5
        importance /= np.max(importance, axis=1, keepdims=True)
    importance = np.mean(importance, axis=0)
    return importance
def linear(self)
Expand source code
def linear(self):
    return nn.Linear(self.num_features, self.num_features, bias=False)
def reset_model(self, model_name, discrete=True)
Expand source code
def reset_model(self, model_name, discrete=True):
    self.discrete = discrete
    self.name = self.methods[model_name]
    self.model = getattr(self, model_name)()
def test(self, dataloader)
Expand source code
def test(self, dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for X, y in dataloader:
            if self.discrete:
                features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1)
                features_importance = F.softmax(features_importance / self.temperature, dim=-1)[:, :, 1]
            else:
                features_importance = self.model(X)
                features_importance = F.softmax(features_importance, dim=1)
            X *= features_importance
            outputs = self.classifier(X)
            _, predicted = torch.max(outputs.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()

    print('Accuracy: %d %%' % (100 * correct / total))
    return 100 * correct / total
def transform(self, X)
Expand source code
def transform(self, X):
    pass
class Supervised_Filter (model_name='chi', proportion=90)

This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.

Currently, it contains: - chi-squared stats - ANOVA F-value - mutual information

Example

from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter

selector = Unsupervised_Filter(model='vt')
# show the available methods:
selector.get_info(verbose=True)

# train the selector. Note the X is the data, y is None and will not be used
selector.fit(X, y)

# do the selection
X = selector.transform(X)

Select a proportion of features

Args

model
method to calculate the score for feature selection
proportion
percentage of features to keep
Expand source code
class Supervised_Filter(Feature_selector):
    '''
    This class provide a set of supervised feature selection methods.
    Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.

    Currently, it contains:
        - chi-squared stats
        - ANOVA F-value
        - mutual information

    ```Example```
    ```
    from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter

    selector = Unsupervised_Filter(model='vt')
    # show the available methods:
    selector.get_info(verbose=True)

    # train the selector. Note the X is the data, y is None and will not be used
    selector.fit(X, y)

    # do the selection
    X = selector.transform(X)
    ```
    '''
    def __init__(self, model_name='chi', proportion=90):
        '''
        Select a proportion of features
        Args:
            model: method to calculate the score for feature selection
            proportion: percentage of features to keep
        '''
        super().__init__(model_name)
        self.selector = SelectPercentile(self.model, percentile=proportion)
        self.proportion = proportion

    def reset_model(self, model_name, proportion=None):
        proportion = self.proportion if proportion is None else proportion
        self.name = self.methods[model_name]
        self.model = getattr(self, model_name)()
        self.selector = SelectPercentile(self.model, percentile=proportion)

    @property
    def methods(self):
        return {
            'chi': 'chi-squared stats',
            'f_class': 'ANOVA F-value',
            'mi': 'mutual information',
        }

    @staticmethod
    def chi():
        return chi2

    @staticmethod
    def f_class():
        return f_classif

    @staticmethod
    def mi():
        return mutual_info_classif

    def fit(self, X, y):
        if y.ndim > 1:
            y = np.argmax(y, axis=1)
        y[y < 0] = 0
        y[y > 0] = 1
        return self.selector.fit(X, y.astype(float))

    def transform(self, X):
        return self.selector.transform(X)

    def __name__(self):
        return 'Supervised Filter', self.name

    def get_importance(self):
        return self.selector.scores_

Ancestors

Static methods

def chi()
Expand source code
@staticmethod
def chi():
    return chi2
def f_class()
Expand source code
@staticmethod
def f_class():
    return f_classif
def mi()
Expand source code
@staticmethod
def mi():
    return mutual_info_classif

Instance variables

var methods
Expand source code
@property
def methods(self):
    return {
        'chi': 'chi-squared stats',
        'f_class': 'ANOVA F-value',
        'mi': 'mutual information',
    }

Methods

def fit(self, X, y)
Expand source code
def fit(self, X, y):
    if y.ndim > 1:
        y = np.argmax(y, axis=1)
    y[y < 0] = 0
    y[y > 0] = 1
    return self.selector.fit(X, y.astype(float))
def get_importance(self)
Expand source code
def get_importance(self):
    return self.selector.scores_
def reset_model(self, model_name, proportion=None)
Expand source code
def reset_model(self, model_name, proportion=None):
    proportion = self.proportion if proportion is None else proportion
    self.name = self.methods[model_name]
    self.model = getattr(self, model_name)()
    self.selector = SelectPercentile(self.model, percentile=proportion)
def transform(self, X)
Expand source code
def transform(self, X):
    return self.selector.transform(X)
class Unsupervised_Filter (model_name='vt')

This class provide a set of unsupervised feature selection methods.

Currently, it contains: - VarianceThreshold

Example

from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter

selector = Unsupervised_Filter(model='vt')
# show the available methods:
selector.get_info(verbose=True)

# train the selector. Note the X is the data, y is None and will not be used
selector.fit(X, y)

# do the selection
X = selector.transform(X)
Expand source code
class Unsupervised_Filter(Feature_selector):
    '''
    This class provide a set of unsupervised feature selection methods.

    Currently, it contains:
        - VarianceThreshold

    ```Example```
    ```
    from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter

    selector = Unsupervised_Filter(model='vt')
    # show the available methods:
    selector.get_info(verbose=True)

    # train the selector. Note the X is the data, y is None and will not be used
    selector.fit(X, y)

    # do the selection
    X = selector.transform(X)
    ```
    '''
    def __init__(self, model_name='vt'):
        super().__init__(model_name)

    @property
    def methods(self):
        return {
            'vt': 'VarianceThreshold',
        }

    @staticmethod
    def vt():
        return VarianceThreshold()

    def __name__(self):
        return 'Unsupervised Filter', self.name

    def fit(self, X, y=None):
        return self.model.fit(X)

    def transform(self, X):
        return self.model.transform(X)

Ancestors

Static methods

def vt()
Expand source code
@staticmethod
def vt():
    return VarianceThreshold()

Instance variables

var methods
Expand source code
@property
def methods(self):
    return {
        'vt': 'VarianceThreshold',
    }

Methods

def fit(self, X, y=None)
Expand source code
def fit(self, X, y=None):
    return self.model.fit(X)
def transform(self, X)
Expand source code
def transform(self, X):
    return self.model.transform(X)
class Wrapper_Selector (estimator, model_name='rfe', num_features=10)

This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.

Currently, it contains: - REF: Recursive feature elimination

Example

from minder_utils.models.feature_selectors.supervised.wrapper import Supervised_wrapper
from sklearn.svm import SVC

selector = Supervised_wrapper(SVC(kernel='linear'), model_name='rfe')
# show the available methods:
selector.get_info(verbose=True)

# train the selector
selector.fit(X, y)

# do the selection
X = selector.transform(X)

Parameters

estimator : sklearn estimator
 
model_name : 'rfe' or 'refcv'
 
num_features : int / float, number / percentage of features to be selected
 
Expand source code
class Wrapper_Selector(Feature_selector):
    '''
    This class provide a set of supervised feature selection methods.
    Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.

    Currently, it contains:
        - REF: Recursive feature elimination

    ```Example```
    ```
    from minder_utils.models.feature_selectors.supervised.wrapper import Supervised_wrapper
    from sklearn.svm import SVC

    selector = Supervised_wrapper(SVC(kernel='linear'), model_name='rfe')
    # show the available methods:
    selector.get_info(verbose=True)

    # train the selector
    selector.fit(X, y)

    # do the selection
    X = selector.transform(X)
    ```
    '''

    def __init__(self, estimator, model_name='rfe', num_features=10):
        '''

        Parameters
        ----------
        estimator: sklearn estimator
        model_name: 'rfe' or 'refcv'
        num_features: int / float, number / percentage of features to be selected
        '''
        self.estimator = estimator
        self.num_features = num_features
        super().__init__(model_name)

    def reset_model(self, model_name, num_features=None):
        self.num_features = self.num_features if num_features is None else num_features
        self.name = self.methods[model_name]
        self.model = getattr(self, model_name)()

    @property
    def methods(self):
        return {
            'rfe': 'Recursive feature elimination',
            'rfecv': 'Recursive feature elimination with cross-validation ',
        }

    def rfe(self):
        return RFE(self.estimator, n_features_to_select=self.num_features)

    def rfecv(self):
        return RFECV(self.estimator, min_features_to_select=self.num_features, cv=5)

    def fit(self, X, y):
        if y.ndim > 1:
            y = np.argmax(y, axis=1)
        return self.model.fit(X, y)

    def transform(self, X):
        return self.model.transform(X)

    def mask_of_features(self):
        return self.model.support_

    def __name__(self):
        return 'Supervised Filter', self.name

Ancestors

Instance variables

var methods
Expand source code
@property
def methods(self):
    return {
        'rfe': 'Recursive feature elimination',
        'rfecv': 'Recursive feature elimination with cross-validation ',
    }

Methods

def fit(self, X, y)
Expand source code
def fit(self, X, y):
    if y.ndim > 1:
        y = np.argmax(y, axis=1)
    return self.model.fit(X, y)
def mask_of_features(self)
Expand source code
def mask_of_features(self):
    return self.model.support_
def reset_model(self, model_name, num_features=None)
Expand source code
def reset_model(self, model_name, num_features=None):
    self.num_features = self.num_features if num_features is None else num_features
    self.name = self.methods[model_name]
    self.model = getattr(self, model_name)()
def rfe(self)
Expand source code
def rfe(self):
    return RFE(self.estimator, n_features_to_select=self.num_features)
def rfecv(self)
Expand source code
def rfecv(self):
    return RFECV(self.estimator, min_features_to_select=self.num_features, cv=5)
def transform(self, X)
Expand source code
def transform(self, X):
    return self.model.transform(X)