Module minder_utils.models.feature_selectors.supervised.filter
Expand source code
from sklearn.feature_selection import SelectPercentile, chi2, f_classif, mutual_info_classif
from minder_utils.models.utils import Feature_selector
import numpy as np
class Supervised_Filter(Feature_selector):
'''
This class provide a set of supervised feature selection methods.
Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.
Currently, it contains:
- chi-squared stats
- ANOVA F-value
- mutual information
```Example```
```
from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter
selector = Unsupervised_Filter(model='vt')
# show the available methods:
selector.get_info(verbose=True)
# train the selector. Note the X is the data, y is None and will not be used
selector.fit(X, y)
# do the selection
X = selector.transform(X)
```
'''
def __init__(self, model_name='chi', proportion=90):
'''
Select a proportion of features
Args:
model: method to calculate the score for feature selection
proportion: percentage of features to keep
'''
super().__init__(model_name)
self.selector = SelectPercentile(self.model, percentile=proportion)
self.proportion = proportion
def reset_model(self, model_name, proportion=None):
proportion = self.proportion if proportion is None else proportion
self.name = self.methods[model_name]
self.model = getattr(self, model_name)()
self.selector = SelectPercentile(self.model, percentile=proportion)
@property
def methods(self):
return {
'chi': 'chi-squared stats',
'f_class': 'ANOVA F-value',
'mi': 'mutual information',
}
@staticmethod
def chi():
return chi2
@staticmethod
def f_class():
return f_classif
@staticmethod
def mi():
return mutual_info_classif
def fit(self, X, y):
if y.ndim > 1:
y = np.argmax(y, axis=1)
y[y < 0] = 0
y[y > 0] = 1
return self.selector.fit(X, y.astype(float))
def transform(self, X):
return self.selector.transform(X)
def __name__(self):
return 'Supervised Filter', self.name
def get_importance(self):
return self.selector.scores_
Classes
class Supervised_Filter (model_name='chi', proportion=90)
-
This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.
Currently, it contains: - chi-squared stats - ANOVA F-value - mutual information
Example
from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter selector = Unsupervised_Filter(model='vt') # show the available methods: selector.get_info(verbose=True) # train the selector. Note the X is the data, y is None and will not be used selector.fit(X, y) # do the selection X = selector.transform(X)
Select a proportion of features
Args
model
- method to calculate the score for feature selection
proportion
- percentage of features to keep
Expand source code
class Supervised_Filter(Feature_selector): ''' This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier. Currently, it contains: - chi-squared stats - ANOVA F-value - mutual information ```Example``` ``` from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter selector = Unsupervised_Filter(model='vt') # show the available methods: selector.get_info(verbose=True) # train the selector. Note the X is the data, y is None and will not be used selector.fit(X, y) # do the selection X = selector.transform(X) ``` ''' def __init__(self, model_name='chi', proportion=90): ''' Select a proportion of features Args: model: method to calculate the score for feature selection proportion: percentage of features to keep ''' super().__init__(model_name) self.selector = SelectPercentile(self.model, percentile=proportion) self.proportion = proportion def reset_model(self, model_name, proportion=None): proportion = self.proportion if proportion is None else proportion self.name = self.methods[model_name] self.model = getattr(self, model_name)() self.selector = SelectPercentile(self.model, percentile=proportion) @property def methods(self): return { 'chi': 'chi-squared stats', 'f_class': 'ANOVA F-value', 'mi': 'mutual information', } @staticmethod def chi(): return chi2 @staticmethod def f_class(): return f_classif @staticmethod def mi(): return mutual_info_classif def fit(self, X, y): if y.ndim > 1: y = np.argmax(y, axis=1) y[y < 0] = 0 y[y > 0] = 1 return self.selector.fit(X, y.astype(float)) def transform(self, X): return self.selector.transform(X) def __name__(self): return 'Supervised Filter', self.name def get_importance(self): return self.selector.scores_
Ancestors
- Feature_selector
- abc.ABC
Static methods
def chi()
-
Expand source code
@staticmethod def chi(): return chi2
def f_class()
-
Expand source code
@staticmethod def f_class(): return f_classif
def mi()
-
Expand source code
@staticmethod def mi(): return mutual_info_classif
Instance variables
var methods
-
Expand source code
@property def methods(self): return { 'chi': 'chi-squared stats', 'f_class': 'ANOVA F-value', 'mi': 'mutual information', }
Methods
def fit(self, X, y)
-
Expand source code
def fit(self, X, y): if y.ndim > 1: y = np.argmax(y, axis=1) y[y < 0] = 0 y[y > 0] = 1 return self.selector.fit(X, y.astype(float))
def get_importance(self)
-
Expand source code
def get_importance(self): return self.selector.scores_
def reset_model(self, model_name, proportion=None)
-
Expand source code
def reset_model(self, model_name, proportion=None): proportion = self.proportion if proportion is None else proportion self.name = self.methods[model_name] self.model = getattr(self, model_name)() self.selector = SelectPercentile(self.model, percentile=proportion)
def transform(self, X)
-
Expand source code
def transform(self, X): return self.selector.transform(X)