Module minder_utils.models.feature_selectors
Expand source code
from .supervised.wrapper import *
from .supervised.filter import *
from .supervised.intrinsic import *
from .unsupervised.filter import *
__all__ = ['Supervised_Filter', 'Intrinsic_Selector', 'Wrapper_Selector', 'Unsupervised_Filter']
Sub-modules
minder_utils.models.feature_selectors.supervised
minder_utils.models.feature_selectors.unsupervised
Classes
class Intrinsic_Selector (classifier, model_name, num_features, freeze_classifier=False, temperature=5)
-
This class provide a set of supervised feature selection methods. Particularly, it contains a set of intrinsic methods, which will perform automatic feature selection DURING TRAINING.
Currently, it contains: - Linear feature selector
Example
Expand source code
class Intrinsic_Selector(Feature_selector): ''' This class provide a set of supervised feature selection methods. Particularly, it contains a set of intrinsic methods, which will perform automatic feature selection DURING TRAINING. Currently, it contains: - Linear feature selector ```Example``` ``` ``` ''' def __init__(self, classifier, model_name, num_features, freeze_classifier=False, temperature=5): self.classifier = classifier self.num_features = num_features super().__init__(model_name) self.name = self.methods[model_name] self.early_stop = EarlyStopping(**self.config['early_stop']) self.freeze_classifier = freeze_classifier self.discrete = 'discrete' in model_name self.temperature = temperature def reset_model(self, model_name, discrete=True): self.discrete = discrete self.name = self.methods[model_name] self.model = getattr(self, model_name)() @property def methods(self): return { 'linear': 'linear feature selector', 'discrete_linear': 'discrete linear feature selector' } def linear(self): return nn.Linear(self.num_features, self.num_features, bias=False) def discrete_linear(self): return nn.ModuleList([ nn.Linear(self.num_features, self.num_features, bias=False), nn.Linear(self.num_features, self.num_features, bias=False)]) def fit(self, dataloader, num_epoch=50): parameters = self.model.parameters() if self.freeze_classifier \ else list(self.model.parameters()) + list(self.classifier.parameters()) optimiser = optim.Adam(parameters, lr=0.001) criterion = nn.CrossEntropyLoss() for e in range(num_epoch): if self.early_stop.early_stop: break for X, y in dataloader: optimiser.zero_grad() if self.discrete: features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1) features_importance = F.gumbel_softmax(features_importance, tau=self.temperature, hard=True, dim=-1)[:, :, 1] features_importance = features_importance else: features_importance = self.model(X) features_importance = F.softmax(features_importance, dim=1) X = X * features_importance outputs = self.classifier(X) loss = criterion(outputs, y) + torch.sum(features_importance) loss.backward() optimiser.step() print('Epoch: %d / %5d, Loss: %.3f' % (e + 1, num_epoch, loss.item()), end='\n') self.early_stop(loss.item(), self.model, self.__class__.__name__) if self.early_stop.early_stop and self.config['early_stop']['enable']: break if self.early_stop.early_stop and self.config['early_stop']['enable']: break return self def test(self, dataloader): correct = 0 total = 0 with torch.no_grad(): for X, y in dataloader: if self.discrete: features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1) features_importance = F.softmax(features_importance / self.temperature, dim=-1)[:, :, 1] else: features_importance = self.model(X) features_importance = F.softmax(features_importance, dim=1) X *= features_importance outputs = self.classifier(X) _, predicted = torch.max(outputs.data, 1) total += y.size(0) correct += (predicted == y).sum().item() print('Accuracy: %d %%' % (100 * correct / total)) return 100 * correct / total def transform(self, X): pass def __name__(self): return 'Supervised Intrinsic Selector', self.name def get_importance(self, dataloader, normalise=True): importance = [] with torch.no_grad(): for X, y in dataloader: if self.discrete: features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1) importance.extend(list(F.softmax(features_importance / self.temperature, dim=-1).detach().numpy()[:, :, 1])) else: features_importance = self.model(X) importance.extend(list(F.softmax(features_importance, dim=1).detach().numpy())) importance = np.array(importance) if normalise: importance -= np.min(importance, axis=1, keepdims=True) - 1e-5 importance /= np.max(importance, axis=1, keepdims=True) importance = np.mean(importance, axis=0) return importance
Ancestors
- Feature_selector
- abc.ABC
Instance variables
var methods
-
Expand source code
@property def methods(self): return { 'linear': 'linear feature selector', 'discrete_linear': 'discrete linear feature selector' }
Methods
def discrete_linear(self)
-
Expand source code
def discrete_linear(self): return nn.ModuleList([ nn.Linear(self.num_features, self.num_features, bias=False), nn.Linear(self.num_features, self.num_features, bias=False)])
def fit(self, dataloader, num_epoch=50)
-
Expand source code
def fit(self, dataloader, num_epoch=50): parameters = self.model.parameters() if self.freeze_classifier \ else list(self.model.parameters()) + list(self.classifier.parameters()) optimiser = optim.Adam(parameters, lr=0.001) criterion = nn.CrossEntropyLoss() for e in range(num_epoch): if self.early_stop.early_stop: break for X, y in dataloader: optimiser.zero_grad() if self.discrete: features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1) features_importance = F.gumbel_softmax(features_importance, tau=self.temperature, hard=True, dim=-1)[:, :, 1] features_importance = features_importance else: features_importance = self.model(X) features_importance = F.softmax(features_importance, dim=1) X = X * features_importance outputs = self.classifier(X) loss = criterion(outputs, y) + torch.sum(features_importance) loss.backward() optimiser.step() print('Epoch: %d / %5d, Loss: %.3f' % (e + 1, num_epoch, loss.item()), end='\n') self.early_stop(loss.item(), self.model, self.__class__.__name__) if self.early_stop.early_stop and self.config['early_stop']['enable']: break if self.early_stop.early_stop and self.config['early_stop']['enable']: break return self
def get_importance(self, dataloader, normalise=True)
-
Expand source code
def get_importance(self, dataloader, normalise=True): importance = [] with torch.no_grad(): for X, y in dataloader: if self.discrete: features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1) importance.extend(list(F.softmax(features_importance / self.temperature, dim=-1).detach().numpy()[:, :, 1])) else: features_importance = self.model(X) importance.extend(list(F.softmax(features_importance, dim=1).detach().numpy())) importance = np.array(importance) if normalise: importance -= np.min(importance, axis=1, keepdims=True) - 1e-5 importance /= np.max(importance, axis=1, keepdims=True) importance = np.mean(importance, axis=0) return importance
def linear(self)
-
Expand source code
def linear(self): return nn.Linear(self.num_features, self.num_features, bias=False)
def reset_model(self, model_name, discrete=True)
-
Expand source code
def reset_model(self, model_name, discrete=True): self.discrete = discrete self.name = self.methods[model_name] self.model = getattr(self, model_name)()
def test(self, dataloader)
-
Expand source code
def test(self, dataloader): correct = 0 total = 0 with torch.no_grad(): for X, y in dataloader: if self.discrete: features_importance = torch.stack([self.model[0](X), self.model[1](X)], dim=-1) features_importance = F.softmax(features_importance / self.temperature, dim=-1)[:, :, 1] else: features_importance = self.model(X) features_importance = F.softmax(features_importance, dim=1) X *= features_importance outputs = self.classifier(X) _, predicted = torch.max(outputs.data, 1) total += y.size(0) correct += (predicted == y).sum().item() print('Accuracy: %d %%' % (100 * correct / total)) return 100 * correct / total
def transform(self, X)
-
Expand source code
def transform(self, X): pass
class Supervised_Filter (model_name='chi', proportion=90)
-
This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.
Currently, it contains: - chi-squared stats - ANOVA F-value - mutual information
Example
from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter selector = Unsupervised_Filter(model='vt') # show the available methods: selector.get_info(verbose=True) # train the selector. Note the X is the data, y is None and will not be used selector.fit(X, y) # do the selection X = selector.transform(X)
Select a proportion of features
Args
model
- method to calculate the score for feature selection
proportion
- percentage of features to keep
Expand source code
class Supervised_Filter(Feature_selector): ''' This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier. Currently, it contains: - chi-squared stats - ANOVA F-value - mutual information ```Example``` ``` from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter selector = Unsupervised_Filter(model='vt') # show the available methods: selector.get_info(verbose=True) # train the selector. Note the X is the data, y is None and will not be used selector.fit(X, y) # do the selection X = selector.transform(X) ``` ''' def __init__(self, model_name='chi', proportion=90): ''' Select a proportion of features Args: model: method to calculate the score for feature selection proportion: percentage of features to keep ''' super().__init__(model_name) self.selector = SelectPercentile(self.model, percentile=proportion) self.proportion = proportion def reset_model(self, model_name, proportion=None): proportion = self.proportion if proportion is None else proportion self.name = self.methods[model_name] self.model = getattr(self, model_name)() self.selector = SelectPercentile(self.model, percentile=proportion) @property def methods(self): return { 'chi': 'chi-squared stats', 'f_class': 'ANOVA F-value', 'mi': 'mutual information', } @staticmethod def chi(): return chi2 @staticmethod def f_class(): return f_classif @staticmethod def mi(): return mutual_info_classif def fit(self, X, y): if y.ndim > 1: y = np.argmax(y, axis=1) y[y < 0] = 0 y[y > 0] = 1 return self.selector.fit(X, y.astype(float)) def transform(self, X): return self.selector.transform(X) def __name__(self): return 'Supervised Filter', self.name def get_importance(self): return self.selector.scores_
Ancestors
- Feature_selector
- abc.ABC
Static methods
def chi()
-
Expand source code
@staticmethod def chi(): return chi2
def f_class()
-
Expand source code
@staticmethod def f_class(): return f_classif
def mi()
-
Expand source code
@staticmethod def mi(): return mutual_info_classif
Instance variables
var methods
-
Expand source code
@property def methods(self): return { 'chi': 'chi-squared stats', 'f_class': 'ANOVA F-value', 'mi': 'mutual information', }
Methods
def fit(self, X, y)
-
Expand source code
def fit(self, X, y): if y.ndim > 1: y = np.argmax(y, axis=1) y[y < 0] = 0 y[y > 0] = 1 return self.selector.fit(X, y.astype(float))
def get_importance(self)
-
Expand source code
def get_importance(self): return self.selector.scores_
def reset_model(self, model_name, proportion=None)
-
Expand source code
def reset_model(self, model_name, proportion=None): proportion = self.proportion if proportion is None else proportion self.name = self.methods[model_name] self.model = getattr(self, model_name)() self.selector = SelectPercentile(self.model, percentile=proportion)
def transform(self, X)
-
Expand source code
def transform(self, X): return self.selector.transform(X)
class Unsupervised_Filter (model_name='vt')
-
This class provide a set of unsupervised feature selection methods.
Currently, it contains: - VarianceThreshold
Example
from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter selector = Unsupervised_Filter(model='vt') # show the available methods: selector.get_info(verbose=True) # train the selector. Note the X is the data, y is None and will not be used selector.fit(X, y) # do the selection X = selector.transform(X)
Expand source code
class Unsupervised_Filter(Feature_selector): ''' This class provide a set of unsupervised feature selection methods. Currently, it contains: - VarianceThreshold ```Example``` ``` from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter selector = Unsupervised_Filter(model='vt') # show the available methods: selector.get_info(verbose=True) # train the selector. Note the X is the data, y is None and will not be used selector.fit(X, y) # do the selection X = selector.transform(X) ``` ''' def __init__(self, model_name='vt'): super().__init__(model_name) @property def methods(self): return { 'vt': 'VarianceThreshold', } @staticmethod def vt(): return VarianceThreshold() def __name__(self): return 'Unsupervised Filter', self.name def fit(self, X, y=None): return self.model.fit(X) def transform(self, X): return self.model.transform(X)
Ancestors
- Feature_selector
- abc.ABC
Static methods
def vt()
-
Expand source code
@staticmethod def vt(): return VarianceThreshold()
Instance variables
var methods
-
Expand source code
@property def methods(self): return { 'vt': 'VarianceThreshold', }
Methods
def fit(self, X, y=None)
-
Expand source code
def fit(self, X, y=None): return self.model.fit(X)
def transform(self, X)
-
Expand source code
def transform(self, X): return self.model.transform(X)
class Wrapper_Selector (estimator, model_name='rfe', num_features=10)
-
This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.
Currently, it contains: - REF: Recursive feature elimination
Example
from minder_utils.models.feature_selectors.supervised.wrapper import Supervised_wrapper from sklearn.svm import SVC selector = Supervised_wrapper(SVC(kernel='linear'), model_name='rfe') # show the available methods: selector.get_info(verbose=True) # train the selector selector.fit(X, y) # do the selection X = selector.transform(X)
Parameters
estimator
:sklearn estimator
model_name
:'rfe'
or'refcv'
num_features
:int / float, number / percentage
offeatures to be selected
Expand source code
class Wrapper_Selector(Feature_selector): ''' This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier. Currently, it contains: - REF: Recursive feature elimination ```Example``` ``` from minder_utils.models.feature_selectors.supervised.wrapper import Supervised_wrapper from sklearn.svm import SVC selector = Supervised_wrapper(SVC(kernel='linear'), model_name='rfe') # show the available methods: selector.get_info(verbose=True) # train the selector selector.fit(X, y) # do the selection X = selector.transform(X) ``` ''' def __init__(self, estimator, model_name='rfe', num_features=10): ''' Parameters ---------- estimator: sklearn estimator model_name: 'rfe' or 'refcv' num_features: int / float, number / percentage of features to be selected ''' self.estimator = estimator self.num_features = num_features super().__init__(model_name) def reset_model(self, model_name, num_features=None): self.num_features = self.num_features if num_features is None else num_features self.name = self.methods[model_name] self.model = getattr(self, model_name)() @property def methods(self): return { 'rfe': 'Recursive feature elimination', 'rfecv': 'Recursive feature elimination with cross-validation ', } def rfe(self): return RFE(self.estimator, n_features_to_select=self.num_features) def rfecv(self): return RFECV(self.estimator, min_features_to_select=self.num_features, cv=5) def fit(self, X, y): if y.ndim > 1: y = np.argmax(y, axis=1) return self.model.fit(X, y) def transform(self, X): return self.model.transform(X) def mask_of_features(self): return self.model.support_ def __name__(self): return 'Supervised Filter', self.name
Ancestors
- Feature_selector
- abc.ABC
Instance variables
var methods
-
Expand source code
@property def methods(self): return { 'rfe': 'Recursive feature elimination', 'rfecv': 'Recursive feature elimination with cross-validation ', }
Methods
def fit(self, X, y)
-
Expand source code
def fit(self, X, y): if y.ndim > 1: y = np.argmax(y, axis=1) return self.model.fit(X, y)
def mask_of_features(self)
-
Expand source code
def mask_of_features(self): return self.model.support_
def reset_model(self, model_name, num_features=None)
-
Expand source code
def reset_model(self, model_name, num_features=None): self.num_features = self.num_features if num_features is None else num_features self.name = self.methods[model_name] self.model = getattr(self, model_name)()
def rfe(self)
-
Expand source code
def rfe(self): return RFE(self.estimator, n_features_to_select=self.num_features)
def rfecv(self)
-
Expand source code
def rfecv(self): return RFECV(self.estimator, min_features_to_select=self.num_features, cv=5)
def transform(self, X)
-
Expand source code
def transform(self, X): return self.model.transform(X)