Module minder_utils.dataloader.simclr_loader
Expand source code
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import torch
import torchvision.transforms as transforms
from torch.nn.functional import normalize
class DataTransform(object):
    def __init__(self, transform):
        self.transform = transform
    def __call__(self, sample):
        xi = self.transform(sample)
        xj = self.transform(sample)
        return xi, xj
def augmentation_transformers():
    return transforms.Compose([transforms.RandomResizedCrop([8, 14]),
                               transforms.RandomHorizontalFlip()])
class CustomTensorDataset(Dataset):
    """TensorDataset with support of transforms.
    """
    def __init__(self, tensors, transform=None, normalise_data=True):
        assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
        self.tensors = tensors
        self.transform = transform
        self.normalise_data = normalise_data
    def __getitem__(self, index):
        x = self.tensors[0][index]
        if self.normalise_data:
            try:
                x = normalize(x.view(24, -1), dim=0).view(x.size())
            except RuntimeError:
                x = normalize(x.view(-1, ), dim=0).view(x.size())
        if self.transform:
            x = self.transform(x)
        y = self.tensors[1][index]
        return x, y
    def __len__(self):
        return self.tensors[0].size(0)
def create_labelled_loader(X, y, batch_size=10, normalise_data=True, shuffle=True, seed=0, split=True, augmentation=False):
    '''
    Create a dataloader for labelled data
    Parameters
    ----------
    X: numpy array, data
    y: numpy array, label
    batch_size
    normalise_data
    shuffle
    seed
    split
    augmentation: augment data or not
    Returns torch dataloader
    -------
    '''
    transformers = DataTransform(augmentation_transformers()) if augmentation else None
    if split:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seed, stratify=y)
        train_dataset = CustomTensorDataset([torch.Tensor(X_train), torch.tensor(y_train)], transformers, normalise_data)
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
        test_dataset = CustomTensorDataset([torch.Tensor(X_test), torch.tensor(y_test)], transformers, normalise_data)
        test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)
        return train_dataloader, test_dataloader
    else:
        train_dataset = CustomTensorDataset([torch.Tensor(X), torch.tensor(y)], transformers, normalise_data)
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
        return train_dataloader
def create_unlabelled_loader(X, batch_size=10, shuffle=True, augmentation=False,
                             normalise_data=True):
    '''
    Create a dataloader for unlabelled data, note this function will label every datapoint
    with one.
    Parameters
    ----------
    X: unlabelled data
    batch_size
    shuffle
    augmentation
    normalise_data: normalise the data or not
    Returns torch dataloader
    -------
    '''
    transformers = DataTransform(augmentation_transformers()) if augmentation else None
    train_dataset = CustomTensorDataset([torch.Tensor(X), torch.ones(X.shape[0])], transformers,
                                        normalise_data=normalise_data)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, drop_last=True)
    return train_dataloaderFunctions
- def augmentation_transformers()
- 
Expand source codedef augmentation_transformers(): return transforms.Compose([transforms.RandomResizedCrop([8, 14]), transforms.RandomHorizontalFlip()])
- def create_labelled_loader(X, y, batch_size=10, normalise_data=True, shuffle=True, seed=0, split=True, augmentation=False)
- 
Create a dataloader for labelled data Parameters 
 - X:- numpy array, data
- y:- numpy array, label
- batch_size
- normalise_data
- shuffle
- seed
- split
- augmentation:- augment dataor- not
 Returns torch dataloaderExpand source codedef create_labelled_loader(X, y, batch_size=10, normalise_data=True, shuffle=True, seed=0, split=True, augmentation=False): ''' Create a dataloader for labelled data Parameters ---------- X: numpy array, data y: numpy array, label batch_size normalise_data shuffle seed split augmentation: augment data or not Returns torch dataloader ------- ''' transformers = DataTransform(augmentation_transformers()) if augmentation else None if split: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seed, stratify=y) train_dataset = CustomTensorDataset([torch.Tensor(X_train), torch.tensor(y_train)], transformers, normalise_data) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle) test_dataset = CustomTensorDataset([torch.Tensor(X_test), torch.tensor(y_test)], transformers, normalise_data) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle) return train_dataloader, test_dataloader else: train_dataset = CustomTensorDataset([torch.Tensor(X), torch.tensor(y)], transformers, normalise_data) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle) return train_dataloader
- def create_unlabelled_loader(X, batch_size=10, shuffle=True, augmentation=False, normalise_data=True)
- 
Create a dataloader for unlabelled data, note this function will label every datapoint with one. Parameters 
 - X:- unlabelled data
- batch_size
- shuffle
- augmentation
- normalise_data:- normalise the dataor- not
 Returns torch dataloaderExpand source codedef create_unlabelled_loader(X, batch_size=10, shuffle=True, augmentation=False, normalise_data=True): ''' Create a dataloader for unlabelled data, note this function will label every datapoint with one. Parameters ---------- X: unlabelled data batch_size shuffle augmentation normalise_data: normalise the data or not Returns torch dataloader ------- ''' transformers = DataTransform(augmentation_transformers()) if augmentation else None train_dataset = CustomTensorDataset([torch.Tensor(X), torch.ones(X.shape[0])], transformers, normalise_data=normalise_data) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, drop_last=True) return train_dataloader
Classes
- class CustomTensorDataset (tensors, transform=None, normalise_data=True)
- 
TensorDataset with support of transforms. Expand source codeclass CustomTensorDataset(Dataset): """TensorDataset with support of transforms. """ def __init__(self, tensors, transform=None, normalise_data=True): assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors) self.tensors = tensors self.transform = transform self.normalise_data = normalise_data def __getitem__(self, index): x = self.tensors[0][index] if self.normalise_data: try: x = normalize(x.view(24, -1), dim=0).view(x.size()) except RuntimeError: x = normalize(x.view(-1, ), dim=0).view(x.size()) if self.transform: x = self.transform(x) y = self.tensors[1][index] return x, y def __len__(self): return self.tensors[0].size(0)Ancestors- torch.utils.data.dataset.Dataset
- typing.Generic
 Class variables- var functions : Dict[str, Callable]
 
- class DataTransform (transform)
- 
Expand source codeclass DataTransform(object): def __init__(self, transform): self.transform = transform def __call__(self, sample): xi = self.transform(sample) xj = self.transform(sample) return xi, xj