Module minder_utils.dataloader.simclr_loader
Expand source code
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import torch
import torchvision.transforms as transforms
from torch.nn.functional import normalize
class DataTransform(object):
def __init__(self, transform):
self.transform = transform
def __call__(self, sample):
xi = self.transform(sample)
xj = self.transform(sample)
return xi, xj
def augmentation_transformers():
return transforms.Compose([transforms.RandomResizedCrop([8, 14]),
transforms.RandomHorizontalFlip()])
class CustomTensorDataset(Dataset):
"""TensorDataset with support of transforms.
"""
def __init__(self, tensors, transform=None, normalise_data=True):
assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
self.tensors = tensors
self.transform = transform
self.normalise_data = normalise_data
def __getitem__(self, index):
x = self.tensors[0][index]
if self.normalise_data:
try:
x = normalize(x.view(24, -1), dim=0).view(x.size())
except RuntimeError:
x = normalize(x.view(-1, ), dim=0).view(x.size())
if self.transform:
x = self.transform(x)
y = self.tensors[1][index]
return x, y
def __len__(self):
return self.tensors[0].size(0)
def create_labelled_loader(X, y, batch_size=10, normalise_data=True, shuffle=True, seed=0, split=True, augmentation=False):
'''
Create a dataloader for labelled data
Parameters
----------
X: numpy array, data
y: numpy array, label
batch_size
normalise_data
shuffle
seed
split
augmentation: augment data or not
Returns torch dataloader
-------
'''
transformers = DataTransform(augmentation_transformers()) if augmentation else None
if split:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seed, stratify=y)
train_dataset = CustomTensorDataset([torch.Tensor(X_train), torch.tensor(y_train)], transformers, normalise_data)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
test_dataset = CustomTensorDataset([torch.Tensor(X_test), torch.tensor(y_test)], transformers, normalise_data)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)
return train_dataloader, test_dataloader
else:
train_dataset = CustomTensorDataset([torch.Tensor(X), torch.tensor(y)], transformers, normalise_data)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
return train_dataloader
def create_unlabelled_loader(X, batch_size=10, shuffle=True, augmentation=False,
normalise_data=True):
'''
Create a dataloader for unlabelled data, note this function will label every datapoint
with one.
Parameters
----------
X: unlabelled data
batch_size
shuffle
augmentation
normalise_data: normalise the data or not
Returns torch dataloader
-------
'''
transformers = DataTransform(augmentation_transformers()) if augmentation else None
train_dataset = CustomTensorDataset([torch.Tensor(X), torch.ones(X.shape[0])], transformers,
normalise_data=normalise_data)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, drop_last=True)
return train_dataloader
Functions
def augmentation_transformers()
-
Expand source code
def augmentation_transformers(): return transforms.Compose([transforms.RandomResizedCrop([8, 14]), transforms.RandomHorizontalFlip()])
def create_labelled_loader(X, y, batch_size=10, normalise_data=True, shuffle=True, seed=0, split=True, augmentation=False)
-
Create a dataloader for labelled data Parameters
X
:numpy array, data
y
:numpy array, label
batch_size
normalise_data
shuffle
seed
split
augmentation
:augment data
ornot
Returns torch dataloader
Expand source code
def create_labelled_loader(X, y, batch_size=10, normalise_data=True, shuffle=True, seed=0, split=True, augmentation=False): ''' Create a dataloader for labelled data Parameters ---------- X: numpy array, data y: numpy array, label batch_size normalise_data shuffle seed split augmentation: augment data or not Returns torch dataloader ------- ''' transformers = DataTransform(augmentation_transformers()) if augmentation else None if split: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seed, stratify=y) train_dataset = CustomTensorDataset([torch.Tensor(X_train), torch.tensor(y_train)], transformers, normalise_data) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle) test_dataset = CustomTensorDataset([torch.Tensor(X_test), torch.tensor(y_test)], transformers, normalise_data) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle) return train_dataloader, test_dataloader else: train_dataset = CustomTensorDataset([torch.Tensor(X), torch.tensor(y)], transformers, normalise_data) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle) return train_dataloader
def create_unlabelled_loader(X, batch_size=10, shuffle=True, augmentation=False, normalise_data=True)
-
Create a dataloader for unlabelled data, note this function will label every datapoint with one. Parameters
X
:unlabelled data
batch_size
shuffle
augmentation
normalise_data
:normalise the data
ornot
Returns torch dataloader
Expand source code
def create_unlabelled_loader(X, batch_size=10, shuffle=True, augmentation=False, normalise_data=True): ''' Create a dataloader for unlabelled data, note this function will label every datapoint with one. Parameters ---------- X: unlabelled data batch_size shuffle augmentation normalise_data: normalise the data or not Returns torch dataloader ------- ''' transformers = DataTransform(augmentation_transformers()) if augmentation else None train_dataset = CustomTensorDataset([torch.Tensor(X), torch.ones(X.shape[0])], transformers, normalise_data=normalise_data) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, drop_last=True) return train_dataloader
Classes
class CustomTensorDataset (tensors, transform=None, normalise_data=True)
-
TensorDataset with support of transforms.
Expand source code
class CustomTensorDataset(Dataset): """TensorDataset with support of transforms. """ def __init__(self, tensors, transform=None, normalise_data=True): assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors) self.tensors = tensors self.transform = transform self.normalise_data = normalise_data def __getitem__(self, index): x = self.tensors[0][index] if self.normalise_data: try: x = normalize(x.view(24, -1), dim=0).view(x.size()) except RuntimeError: x = normalize(x.view(-1, ), dim=0).view(x.size()) if self.transform: x = self.transform(x) y = self.tensors[1][index] return x, y def __len__(self): return self.tensors[0].size(0)
Ancestors
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
class DataTransform (transform)
-
Expand source code
class DataTransform(object): def __init__(self, transform): self.transform = transform def __call__(self, sample): xi = self.transform(sample) xj = self.transform(sample) return xi, xj