def do_standardize(Z, axis = 0, center = True, scale = True):
'''
Standardize (divide by standard deviation)
and/or center (subtract mean) of a given numpy array Z
axis: the direction along which the std / mean is aggregated.
In other words, this axis is collapsed. For example,
axis = 0, means the rows will aggregated (collapsed).
In the output, the mean will be zero and std will be 1
along the remaining axes.
For a 2D array (matrix), use axis = 0 for column standardization
(with mean = 0 and std = 1 along the columns, axis = 1).
Simularly, use axis = 1 for row standardization
(with mean = 0 and std = 1 along the rows, axis = 0).
center: whether or not to subtract mean.
scale: whether or not to divide by std.
'''
if scale:
Znew = Z / np.std(Z, axis = axis, keepdims = True)
else:
Znew = Z.copy()
if center:
Znew = Znew - np.mean(Znew, axis = axis, keepdims = True)
return Znew
def get_principal_components(X):
X_cent = do_standardize(X, scale = False)
X_cent /= np.sqrt(np.prod(X_cent.shape))
U, S, Vt = np.linalg.svd(X_cent, full_matrices = False)
# loadings = U @ np.diag(S)
# factors = Vt.T
return U, S, Vt.T
def compute_cos(xmat):
xmat2 = xmat ** 2
return xmat2 / np.sum(xmat2, axis = 1, keepdims = True)
def compute_contribution(xmat):
xmat2 = xmat ** 2
return xmat2 / np.sum(xmat2, axis = 0, keepdims = True)