Basic comparison of ridge regression methods
import numpy as np
import scipy
from scipy import linalg as sc_linalg
from scipy import sparse as sc_sparse
from sklearn.linear_model import Ridge
import glmnet_python
from glmnet import glmnet
from glmnetPrint import glmnetPrint
from glmnetCoef import glmnetCoef
from glmnetPredict import glmnetPredict
import matplotlib.pyplot as plt
import sys
sys.path.append("../../utils/")
import mpl_stylesheet
mpl_stylesheet.banskt_presentation(fontfamily = 'latex-clearsans', fontsize = 18, colors = 'banskt', dpi = 72)
def standardize(X):
Xnorm = (X - np.mean(X, axis = 0))
#Xstd = Xnorm / np.std(Xnorm, axis = 0)
Xstd = Xnorm / np.sqrt((Xnorm * Xnorm).sum(axis = 0))
return Xstd
def ridge_data(nsample, nvar, errsigma):
X = np.random.normal(0, 1, nsample * nvar).reshape(nsample, nvar)
X = standardize(X)
btrue = np.random.normal(0, 1, nvar)
y = np.dot(X, btrue) + np.random.normal(0, errsigma, nsample)
y = y - np.mean(y)
y = y / np.std(y)
return X, y, btrue
def rsquare(ytrue, ypred):
sst = np.sum(np.square(ytrue - np.mean(ytrue)))
sse = np.sum(np.square(ytrue - ypred))
rsq = 1 - (sse / sst)
return rsq
def logpdf_multivariate_gauss(x, mu, cov):
'''
Caculate the multivariate normal density (pdf)
Keyword arguments:
x = numpy array of a "d x 1" sample vector
mu = numpy array of a "d x 1" mean vector
cov = "numpy array of a d x d" covariance matrix
'''
assert(mu.shape[0] > mu.shape[1]), 'mu must be a row vector'
assert(x.shape[0] > x.shape[1]), 'x must be a row vector'
assert(cov.shape[0] == cov.shape[1]), 'covariance matrix must be square'
assert(mu.shape[0] == cov.shape[0]), 'cov_mat and mu_vec must have the same dimensions'
assert(mu.shape[0] == x.shape[0]), 'mu and x must have the same dimensions'
part1 = - nsample * 0.5 * np.log(2. * np.pi) - 0.5 * np.linalg.slogdet(cov)[1]
xlm = x - mu
part2 = - 0.5 * np.dot(xlm.T, np.dot(np.linalg.inv(cov), xlm))
return float(part1 + part2)
def ridge_em(X, Y, s2, sb2, niter = 10):
XTX = np.dot(X.T, X)
XTY = np.dot(X.T, Y)
YTY = np.dot(Y.T, Y)
nsample = X.shape[0]
nvar = X.shape[1]
loglik = np.zeros(niter)
i = 0
while i < niter:
V = XTX + np.eye(nvar) * (s2 / sb2)
Vinv = sc_linalg.cho_solve(sc_linalg.cho_factor(V, lower=True), np.eye(nvar))
SigmaY = sb2 * np.dot(X, X.T) + np.eye(nsample) * s2
loglik[i] = logpdf_multivariate_gauss(Y.reshape(-1, 1), np.zeros((nsample, 1)), SigmaY)
Sigmab = s2 * Vinv # posterior variance of b
mub = np.dot(Vinv, XTY) # posterior mean of b
b2m = np.einsum('i,j->ij', mub, mub) + Sigmab
s2 = (YTY + np.dot(XTX, b2m).trace() - 2 * np.dot(XTY, mub)) / nsample
sb2 = np.sum(np.square(mub) + np.diag(Sigmab)) / nvar
i += 1
return s2, sb2, loglik, mub.reshape(-1), Sigmab
def ridge_ols(X, Y, lmbda):
XTX = np.dot(X.T, X)
XTY = np.dot(X.T, Y)
nvar = X.shape[1]
V = XTX + np.eye(nvar) * lmbda
Vinv = sc_linalg.cho_solve(sc_linalg.cho_factor(V, lower=True), np.eye(nvar))
bhat = np.dot(Vinv, XTY)
return bhat
def svd2XTX(svd):
U = svd[0]
S = svd[1]
Vh = svd[2]
nmax = max(S.shape[0], Vh.shape[0])
Sdiag = np.zeros((nmax, nmax))
Sdiag[np.diag_indices(S.shape[0])] = np.square(S)
return np.dot(Vh.T, np.dot(Sdiag, Vh))
def c_func(nsample, s2, ElogW):
val = - 0.5 * nsample * np.log(2. * np.pi * s2)
val += - 0.5 * np.sum(ElogW)
return val
def h1_func(X, Y, s2, mu, Wbar):
val = - (0.5 / s2) * (np.sum(np.square(Y - np.dot(X, mu))) \
+ np.sum(np.square(mu) / Wbar))
return val
def h2_func(svd, Sigma, Wbar):
XTX = svd2XTX(svd)
(sign, logdet) = np.linalg.slogdet(Sigma)
val = - 0.5 * np.trace(np.dot(XTX + np.diag(1 / Wbar), Sigma)) + 0.5 * logdet
return val
def ebmr_initialize(X, Y):
svd = sc_linalg.svd(X)
XTY = np.dot(X.T, Y)
mu = np.zeros(nvar)
Sigma = np.zeros((nvar, nvar))
return svd, XTY, mu, Sigma
def update_Sigma(svd, Wbar, nvar):
XTX = svd2XTX(svd)
Sigma = sc_linalg.cho_solve(sc_linalg.cho_factor(XTX + np.diag(1 / Wbar), lower=True), np.eye(nvar))
return Sigma
def update_mu(Sigma, XTY):
return np.dot(Sigma, XTY)
def update_s2(X, Y, mu, Wbar, nsample):
A = np.sum(np.square(Y - np.dot(X, mu)))
s2 = (A + np.sum(np.square(mu) / Wbar)) / nsample
return s2
def update_wg_ridge(mu, Sigma, s2, nvar):
bj2 = np.square(mu) + np.diag(Sigma) * s2
W = np.repeat(np.sum(bj2) / s2 / nvar, nvar)
KLW = 0.
return W, KLW
def update_elbo(X, Y, s2, mu, Sigma, Wbar, KLw, svd, nsample, nvar):
ElogW = np.log(Wbar)
elbo = c_func(nsample, s2, ElogW) \
+ h1_func(X, Y, s2, mu, Wbar) \
+ h2_func(svd, Sigma, Wbar) \
+ KLw
return elbo
def ebmr(X, Y, niter = 10, tol = 1e-4):
nvar = X.shape[1]
nsample = X.shape[0]
svdX, XTY, mu, Sigma = ebmr_initialize(X, Y)
s2 = np.var(Y)
Wbar = np.ones(nvar)
elbo = -np.inf
i = 0
while i < niter:
#print(i)
#Sigma = update_Sigma(svdX, Wbar, nvar)
XTX = svd2XTX(svdX)
Sigma = sc_linalg.cho_solve(sc_linalg.cho_factor(XTX + np.diag(1 / Wbar), lower=True), np.eye(nvar))
mu = update_mu(Sigma, XTY)
s2 = update_s2(X, Y, mu, Wbar, nsample)
Wbar, KLw = update_wg_ridge(mu, Sigma, s2, nvar)
elbo_new = update_elbo(X, Y, s2, mu, Sigma, Wbar, KLw, svdX, nsample, nvar)
if elbo_new - elbo < tol: break
elbo = elbo_new
i += 1
return s2, mu, Sigma, Wbar
nsample = 50
nvar = 100
nsim = 20
errsigmas = np.logspace(-0.1, 1, 5)
r2 = [None for i in errsigmas]
for i, sd in enumerate(errsigmas):
lmbda = np.square(sd)
r2[i] = dict()
r2[i]['ridge_mle'] = list()
r2[i]['ridge_em'] = list()
r2[i]['ebmr'] = list()
r2[i]['sklearn'] = list()
r2[i]['sp_lsqr'] = list()
r2[i]['glmnet'] = list()
for isim in range(nsim):
X, y, btrue = ridge_data(nsample, nvar, sd)
# Ridge_OLS
b_ridge_ols = ridge_ols(X, y, lmbda)
y_ridge_ols = np.dot(X, b_ridge_ols)
r2[i]['ridge_mle'].append(rsquare(y, y_ridge_ols))
#r2[i]['ridge_ols'].append(y_ridge_ols)
#r2[i]['ridge_ols'].append(np.square(y - y_ridge_ols))
#r2[i]['ridge_ols'].append(y)
# Ridge EM
_, _, _, b_ridge_em, _ = ridge_em(X, y, 1, 1, 500)
r2[i]['ridge_em'].append(rsquare(y, np.dot(X, b_ridge_em)))
# EBMR
_, b_ebmr, _, _ = ebmr(X, y, 1000)
y_ebmr = np.dot(X, b_ebmr)
r2[i]['ebmr'].append(rsquare(y, y_ebmr))
#Sklearn Ridge
clf = Ridge(alpha=lmbda, fit_intercept = False, normalize = False, solver = 'sparse_cg')
clf.fit(X, y)
b_sklearn = clf.coef_
y_sklearn = np.dot(X, b_sklearn)
r2[i]['sklearn'].append(rsquare(y, y_sklearn))
#Sparse Lsqr
b_sp_lsqr = sc_sparse.linalg.lsqr(X, y, damp=np.sqrt(lmbda))[0]
#b_sp_lsqr = my_lsqr(X, y, damp=1.0)[0]
y_sp_lsqr = np.dot(X, b_sp_lsqr)
r2[i]['sp_lsqr'].append(rsquare(y, y_sp_lsqr))
#r2[i]['sp_lsqr'].append(y_sp_lsqr)
#r2[i]['sp_lsqr'].append(np.square(y - y_sp_lsqr))
#r2[i]['sp_lsqr'].append(y)
#glmnet
lmbda_glmnet = lmbda / X.shape[0]
fit = glmnet(x = X.copy(), y = y.copy(), family = 'gaussian', alpha = 0.0,
intr = False, standardize = False,
lambdau = np.array([lmbda_glmnet, 1.0]))
b_glmnet = glmnetCoef(fit, s = np.float64([lmbda_glmnet]), exact = False)[1:].reshape(-1)
y_glmnet = np.dot(X, b_glmnet)
r2[i]['glmnet'].append(rsquare(y, y_glmnet))
#r2[i]['glmnet'].append(y_glmnet)
#r2[i]['glmnet'].append(np.square(y - y_glmnet))
#r2[i]['glmnet'].append(y)
fig = plt.figure(figsize = (16,6))
ax1 = fig.add_subplot(111)
colors = {'ridge_em': '#2D69C4',
'ebmr': '#CC2529',
'sklearn': '#93AA00',
'sp_lsqr': '#535154',
'glmnet': '#6B4C9A',
'ridge_mle': '#FFB300'}
facecolors = {'ridge_em': '#719ad8',
'ebmr': '#f2888b',
'sklearn': '#c4d64f',
'sp_lsqr': '#a6a3a7',
'glmnet': '#a98fd2',
'ridge_mle': '#fbd67e'}
barwidth = 0.1
nsigma = len(errsigmas)
xpos = [(k+1)*2 for k in range(nsigma)]
plot_methods = ['ridge_mle', 'sklearn', 'sp_lsqr', 'ridge_em', 'ebmr', 'glmnet']
bxplt = [None for x in plot_methods]
for i, method in enumerate(plot_methods):
#for i, method in enumerate(['ridge_ols', 'sp_lsqr', 'glmnet']):
#pdata = [np.hstack(r2[k][method]) for k in range(nsigma)]
pdata = [r2[k][method] for k in range(nsigma)]
xloc = [x + (i * barwidth) + (i * barwidth / 3) for x in xpos]
medianprops = dict(linewidth=2, color = colors[method])
whiskerprops = dict(linewidth=2, color = facecolors[method])
boxprops = dict(linewidth=2, color = colors[method], facecolor = facecolors[method])
bxplt[i] = ax1.boxplot(pdata, positions = xloc,
showfliers = False, showcaps = False, widths=barwidth,
patch_artist=True, notch = False,
boxprops = boxprops,
medianprops = medianprops, whiskerprops = whiskerprops,
)
leghandles = [x["boxes"][0] for x in bxplt]
ax1.legend(leghandles, plot_methods, loc='lower left', handlelength = 1.2, labelspacing = 0.2,)
ax1.set_xticks(xpos)
ax1.set_xticklabels([f'{x:.2f}' for x in errsigmas])
ax1.set_xlim(min(xpos) - 1, max(xpos) + 1)
ax1.set_xlabel(r'Prior $\sigma$')
ax1.set_ylabel(r'$R^2$')
ax1.set_title(r'n = 50, p = 100, fixed $\lambda$ estimated from prior')
#plt.savefig('compare_ridge_methods.png', bbox_inches='tight', facecolor='white', transparent=True)
plt.tight_layout()
plt.show()
def jitter(arr):
stdev = .1 * (max(arr) - min(arr))
return arr + abs(np.random.randn(len(arr)) * stdev)
fig = plt.figure(figsize = (8, 8))
ax1 = fig.add_subplot(111)
nshow = 2
for i, method in enumerate(plot_methods):
ydata = r2[nshow][method]
if method == 'sklearn' or method == 'sp_lsqr' or method == 'glmnet':
ydata = jitter(ydata)
ax1.scatter(r2[nshow]['ridge_mle'], ydata, color = colors[method])
ax1.set_title(f'Comparison of $R^2$ ($\sigma$ = {errsigmas[nshow]:.2f})', pad = 20)
ax1.set_xlabel('ridge_mle')
ax1.set_ylabel('All ridge regression methods')
#ax1.set_xticks([0.03, 0.04, 0.05])
ax1.set_xlim([0.25, 0.45])
ax1.set_ylim([0, 1.05])
ax1.plot([0,1],[0,1], ls = 'dashed', color = 'gray')
#plt.savefig('compare_ridge_methods_scatter.png', bbox_inches='tight', facecolor='white', transparent=True)
plt.show()