Module minder_utils.formatting.format_util
Expand source code
import numpy as np
import os
from sklearn.preprocessing import Normalizer
def iter_dir(directory, endwith='.csv', split=True):
"""
iterate csv files in a given directory
:param directory: path to the folder
:@param endwith:
:return: list of file names end with csv
"""
filenames = []
for name in os.listdir(directory):
if name.endswith(endwith):
if split:
filenames.append(name.split('.')[0])
else:
filenames.append(name)
return filenames
def y_to_categorical(y, smooth=False, valid_only=False):
if valid_only:
mask = np.isin(y, [-1, 1])
y = y[mask]
positives = y > 0
labels = np.zeros((y.shape[0], 2))
if smooth:
labels[:, 1][positives.reshape(-1, )] = y[y > 0]
labels[:, 0][~positives.reshape(-1, )] = np.abs(y[y < 0])
else:
labels[:, 1][positives.reshape(-1, )] = 1
labels[:, 0][~positives.reshape(-1, )] = 1
if valid_only:
return labels, mask
else:
return labels
def normalise(X, technique='l2'):
assert technique in ['z-score', 'max-min', 'l2', 'l1', 'max', None], 'not implemented ...'
if technique is None:
pass
elif technique == 'z-score':
for i in range(X.shape[1]):
data = X[:, i]
std = 1 if np.std(data) == 0 else np.std(data)
X[:, i] = (data - np.mean(data)) / std
elif technique == 'max-min':
for i in range(X.shape[1]):
data = X[:, i]
std = 1 if np.max(data) == np.min(data) else np.max(data) - np.min(data)
X[:, i] = (data - np.min(data)) / std
elif technique in ['l1', 'l2', 'max']:
# X.shape = (N, 24, F), where N is the number of samples, F is the features.
# X = X.reshape(X.shape[0], -1)
# X = Normalizer(technique).fit_transform(X.transpose(1, 0)).transpose(0, 1)
for i in range(X.shape[1]):
data = X[:, i, :]
X[:, i, :] = Normalizer(technique).fit_transform(X[:, i, :])
# X[:, i] = Normalizer(technique).fit_transform(data).reshape(-1)
return X
def format_mean_std(values):
return str(np.mean(values))[:6] + " +/- " + str(np.std(values))[:6]
def flatten(x, last_axis=False):
if last_axis:
return x.reshape(x.shape[0], x.shape[1], -1)
return x.reshape(x.shape[0], -1)
def l2_norm(x, epsilon=1e-10):
return x / np.sqrt(max(np.sum(x ** 2), epsilon))
Functions
def flatten(x, last_axis=False)
-
Expand source code
def flatten(x, last_axis=False): if last_axis: return x.reshape(x.shape[0], x.shape[1], -1) return x.reshape(x.shape[0], -1)
def format_mean_std(values)
-
Expand source code
def format_mean_std(values): return str(np.mean(values))[:6] + " +/- " + str(np.std(values))[:6]
def iter_dir(directory, endwith='.csv', split=True)
-
iterate csv files in a given directory :param directory: path to the folder :@param endwith: :return: list of file names end with csv
Expand source code
def iter_dir(directory, endwith='.csv', split=True): """ iterate csv files in a given directory :param directory: path to the folder :@param endwith: :return: list of file names end with csv """ filenames = [] for name in os.listdir(directory): if name.endswith(endwith): if split: filenames.append(name.split('.')[0]) else: filenames.append(name) return filenames
def l2_norm(x, epsilon=1e-10)
-
Expand source code
def l2_norm(x, epsilon=1e-10): return x / np.sqrt(max(np.sum(x ** 2), epsilon))
def normalise(X, technique='l2')
-
Expand source code
def normalise(X, technique='l2'): assert technique in ['z-score', 'max-min', 'l2', 'l1', 'max', None], 'not implemented ...' if technique is None: pass elif technique == 'z-score': for i in range(X.shape[1]): data = X[:, i] std = 1 if np.std(data) == 0 else np.std(data) X[:, i] = (data - np.mean(data)) / std elif technique == 'max-min': for i in range(X.shape[1]): data = X[:, i] std = 1 if np.max(data) == np.min(data) else np.max(data) - np.min(data) X[:, i] = (data - np.min(data)) / std elif technique in ['l1', 'l2', 'max']: # X.shape = (N, 24, F), where N is the number of samples, F is the features. # X = X.reshape(X.shape[0], -1) # X = Normalizer(technique).fit_transform(X.transpose(1, 0)).transpose(0, 1) for i in range(X.shape[1]): data = X[:, i, :] X[:, i, :] = Normalizer(technique).fit_transform(X[:, i, :]) # X[:, i] = Normalizer(technique).fit_transform(data).reshape(-1) return X
def y_to_categorical(y, smooth=False, valid_only=False)
-
Expand source code
def y_to_categorical(y, smooth=False, valid_only=False): if valid_only: mask = np.isin(y, [-1, 1]) y = y[mask] positives = y > 0 labels = np.zeros((y.shape[0], 2)) if smooth: labels[:, 1][positives.reshape(-1, )] = y[y > 0] labels[:, 0][~positives.reshape(-1, )] = np.abs(y[y < 0]) else: labels[:, 1][positives.reshape(-1, )] = 1 labels[:, 0][~positives.reshape(-1, )] = 1 if valid_only: return labels, mask else: return labels