Module minder_utils.models.feature_selectors.supervised.filter

Expand source code
from sklearn.feature_selection import SelectPercentile, chi2, f_classif, mutual_info_classif
from minder_utils.models.utils import Feature_selector
import numpy as np


class Supervised_Filter(Feature_selector):
    '''
    This class provide a set of supervised feature selection methods.
    Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.

    Currently, it contains:
        - chi-squared stats
        - ANOVA F-value
        - mutual information

    ```Example```
    ```
    from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter

    selector = Unsupervised_Filter(model='vt')
    # show the available methods:
    selector.get_info(verbose=True)

    # train the selector. Note the X is the data, y is None and will not be used
    selector.fit(X, y)

    # do the selection
    X = selector.transform(X)
    ```
    '''
    def __init__(self, model_name='chi', proportion=90):
        '''
        Select a proportion of features
        Args:
            model: method to calculate the score for feature selection
            proportion: percentage of features to keep
        '''
        super().__init__(model_name)
        self.selector = SelectPercentile(self.model, percentile=proportion)
        self.proportion = proportion

    def reset_model(self, model_name, proportion=None):
        proportion = self.proportion if proportion is None else proportion
        self.name = self.methods[model_name]
        self.model = getattr(self, model_name)()
        self.selector = SelectPercentile(self.model, percentile=proportion)

    @property
    def methods(self):
        return {
            'chi': 'chi-squared stats',
            'f_class': 'ANOVA F-value',
            'mi': 'mutual information',
        }

    @staticmethod
    def chi():
        return chi2

    @staticmethod
    def f_class():
        return f_classif

    @staticmethod
    def mi():
        return mutual_info_classif

    def fit(self, X, y):
        if y.ndim > 1:
            y = np.argmax(y, axis=1)
        y[y < 0] = 0
        y[y > 0] = 1
        return self.selector.fit(X, y.astype(float))

    def transform(self, X):
        return self.selector.transform(X)

    def __name__(self):
        return 'Supervised Filter', self.name

    def get_importance(self):
        return self.selector.scores_

Classes

class Supervised_Filter (model_name='chi', proportion=90)

This class provide a set of supervised feature selection methods. Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.

Currently, it contains: - chi-squared stats - ANOVA F-value - mutual information

Example

from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter

selector = Unsupervised_Filter(model='vt')
# show the available methods:
selector.get_info(verbose=True)

# train the selector. Note the X is the data, y is None and will not be used
selector.fit(X, y)

# do the selection
X = selector.transform(X)

Select a proportion of features

Args

model
method to calculate the score for feature selection
proportion
percentage of features to keep
Expand source code
class Supervised_Filter(Feature_selector):
    '''
    This class provide a set of supervised feature selection methods.
    Particularly, it contains a set of filter methods, which will perform SEPARATELY with the classifier.

    Currently, it contains:
        - chi-squared stats
        - ANOVA F-value
        - mutual information

    ```Example```
    ```
    from minder_utils.models.feature_selectors.unsupervised.filter import Unsupervised_Filter

    selector = Unsupervised_Filter(model='vt')
    # show the available methods:
    selector.get_info(verbose=True)

    # train the selector. Note the X is the data, y is None and will not be used
    selector.fit(X, y)

    # do the selection
    X = selector.transform(X)
    ```
    '''
    def __init__(self, model_name='chi', proportion=90):
        '''
        Select a proportion of features
        Args:
            model: method to calculate the score for feature selection
            proportion: percentage of features to keep
        '''
        super().__init__(model_name)
        self.selector = SelectPercentile(self.model, percentile=proportion)
        self.proportion = proportion

    def reset_model(self, model_name, proportion=None):
        proportion = self.proportion if proportion is None else proportion
        self.name = self.methods[model_name]
        self.model = getattr(self, model_name)()
        self.selector = SelectPercentile(self.model, percentile=proportion)

    @property
    def methods(self):
        return {
            'chi': 'chi-squared stats',
            'f_class': 'ANOVA F-value',
            'mi': 'mutual information',
        }

    @staticmethod
    def chi():
        return chi2

    @staticmethod
    def f_class():
        return f_classif

    @staticmethod
    def mi():
        return mutual_info_classif

    def fit(self, X, y):
        if y.ndim > 1:
            y = np.argmax(y, axis=1)
        y[y < 0] = 0
        y[y > 0] = 1
        return self.selector.fit(X, y.astype(float))

    def transform(self, X):
        return self.selector.transform(X)

    def __name__(self):
        return 'Supervised Filter', self.name

    def get_importance(self):
        return self.selector.scores_

Ancestors

Static methods

def chi()
Expand source code
@staticmethod
def chi():
    return chi2
def f_class()
Expand source code
@staticmethod
def f_class():
    return f_classif
def mi()
Expand source code
@staticmethod
def mi():
    return mutual_info_classif

Instance variables

var methods
Expand source code
@property
def methods(self):
    return {
        'chi': 'chi-squared stats',
        'f_class': 'ANOVA F-value',
        'mi': 'mutual information',
    }

Methods

def fit(self, X, y)
Expand source code
def fit(self, X, y):
    if y.ndim > 1:
        y = np.argmax(y, axis=1)
    y[y < 0] = 0
    y[y > 0] = 1
    return self.selector.fit(X, y.astype(float))
def get_importance(self)
Expand source code
def get_importance(self):
    return self.selector.scores_
def reset_model(self, model_name, proportion=None)
Expand source code
def reset_model(self, model_name, proportion=None):
    proportion = self.proportion if proportion is None else proportion
    self.name = self.methods[model_name]
    self.model = getattr(self, model_name)()
    self.selector = SelectPercentile(self.model, percentile=proportion)
def transform(self, X)
Expand source code
def transform(self, X):
    return self.selector.transform(X)