Multiple regression (two predictors) with product of normals
- About
- Toy model
- Variational approximation with fixed $\sigma$, $\sigma_b$ and $\sigma_w$
- VEB optimization of all parameters
- Variational approximation with fixed $\sigma$
- Variational approximation with fixed $\sigma_b$ and $\sigma_w$
- Initialization with very high $\sigma$ leads to well-behaved optimization
About
I am trying to understand the problem in the VEB implementation of multiple regression with product of normals. Here, I will numerically check the underlying distributions for a special case with only two predictors. For the single predictor ($p=1$) analysis, check here.
import numpy as np
import pandas as pd
from scipy import linalg as sc_linalg
from scipy import special as sc_special
import matplotlib.pyplot as plt
import mpl_stylesheet
import mpl_utils
from matplotlib import cm
from matplotlib import ticker as plticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
mpl_stylesheet.banskt_presentation(fontfamily = 'latex-clearsans', fontsize = 18, colors = 'banskt', dpi = 72)
Toy model
I defined a toy model with two predictors and 10 samples. I sampled the predictor from the standard normal distribution $\mathcal{N}(0, 1)$. The model used is described in the writeup.
I chose $\sigma = 10.0$, $\sigma_b = 2.0$ and $\sigma_w = 4.0$. Let the true values for $b$ and $w$ be denoted as $\hat{b}$ and $\hat{w}$. I calculated the log posterior of $bw$ from the sum of the log prior and the log likelihood and normalized it using a discrete grid of $bw$.
def variance_explained(y, ypred):
ss_err = np.sum(np.square(y - ypred))
ss_tot = np.sum(np.square(y - np.mean(y)))
r2 = 1 - (ss_err / ss_tot)
return r2
def prod_norm_prior_pdf(z, s1, s2):
x = np.abs(z) / s1 / s2
prob = sc_special.kn(0, x) / (np.pi * s1 * s2)
return prob
def prod_norm_prior_gridpdf(b1, b2, s1, s2):
n1 = b1.shape[0]
n2 = b2.shape[0]
prob = np.zeros((n2, n1))
pdf2 = prod_norm_prior_pdf(b2, s1, s2)
for i, _b1 in enumerate(b1):
pdf1 = prod_norm_prior_pdf(_b1, s1, s2)
prob[:, i] = pdf1 * pdf2
return prob
def normal_logdensity_onesample(z, m, s2):
logdensity = - 0.5 * np.log(2 * np.pi * s2) \
- 0.5 * (z - m) * (z - m) / s2
return logdensity
def normal_logdensity(y, mean, sigma2):
n = y.shape[0]
logdensity = - 0.5 * n * np.log(2 * np.pi * sigma2) \
- 0.5 * np.sum(np.square(y - mean)) / sigma2
return logdensity
def data_log_likelihood(X, y, b1, b2, sigma):
n1 = b1.shape[0]
n2 = b2.shape[0]
ll = np.zeros((n2, n1))
for i, _b1 in enumerate(b1):
for j, _b2 in enumerate(b2):
mean = np.dot(X, np.array([_b1, _b2]))
ll[j, i]= normal_logdensity(y, mean, sigma * sigma)
return ll
def normalize_by_volume(x, y, z):
'''
y is the row, x is the column of z
'''
grid_area = np.einsum('i, j -> ij', np.diff(y), np.diff(x))
volume = np.sum(np.multiply(z[1:, 1:], grid_area))
return z / volume
def normalize_logdensity_by_volume(x, y, lnz):
z = np.exp(lnz)
grid_area = np.einsum('i, j -> ij', np.diff(y), np.diff(x))
V = np.sum(np.multiply(z[1:, 1:], grid_area))
return lnz - np.log(V)
# Use Green's theorem to compute the area
# enclosed by a given contour.
def area_greens(vs):
xvs = vs[:, 0]
yvs = vs[:, 1]
area = 0.5 * np.sum(xvs[:-1] * np.diff(yvs) - yvs[:-1] * np.diff(xvs))
return np.abs(area)
def plot_contours(ax, X, Y, Z, beta, norm, cstep = 10,
xlabel = "", ylabel = "", zlabel = "",
nlevels = 5, nstd = 0, showContourArea=False,
showbeta=False, showZmax=False, showColbar=False):
zmin = np.min(Z) - nstd * np.std(Z)
zmax = np.max(Z) + nstd * np.std(Z)
ind = np.unravel_index(np.argmax(Z, axis=None), Z.shape)
levels = np.linspace(zmin, zmax, 200)
clevels = np.linspace(zmin, zmax, nlevels)
cmap = cm.YlOrRd
if norm:
cset1 = ax.contourf(X, Y, Z, levels, norm = norm,
cmap=cm.get_cmap(cmap, len(levels) - 1))
else:
cset1 = ax.contourf(X, Y, Z, levels,
cmap=cm.get_cmap(cmap, len(levels) - 1))
cset2 = ax.contour(X, Y, Z, clevels, colors='k')
if showContourArea:
for i, contour in enumerate(cset2.collections):
cpaths = contour.get_paths()
if len(cpaths) > 0:
vs = contour.get_paths()[0].vertices
# Compute area enclosed by vertices.
a = area_greens(vs)
print ("r = " + str(clevels[i]) + ": a =" + str(a))
ax.clabel(cset2, inline=1, fontsize=10)
for c in cset2.collections:
c.set_linestyle('solid')
ax.set_aspect("equal")
if showbeta:
ax.scatter(beta[0], beta[1], color = 'blue', s = 100)
if showZmax:
ax.scatter(X[ind[1]], Y[ind[0]], color = 'k', s = 100)
if xlabel:
ax.set_xlabel(xlabel)
if ylabel:
ax.set_ylabel(ylabel)
if showColbar:
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.2)
cbar = plt.colorbar(cset1, cax=cax)
ytickpos = np.arange(int(zmin / cstep) * cstep, zmax, cstep)
cbar.set_ticks(ytickpos)
if zlabel:
cax.set_ylabel(zlabel)
return cset1, cset2, clevels
def simulate_data(n, p, s, sb, sw, seed = 200):
np.random.seed(seed)
b = np.random.normal(0, sb, p)
w = np.random.normal(0, sw, p)
bw = np.multiply(b, w)
X = np.zeros((n, p))
for i in range(p):
X[:, i] = np.random.normal(0, 1, n)
err = np.random.normal(0, s, n)
y = np.dot(X, bw) + err
return X, y, b, w, bw
def print_matrix(matrix, fmt):
print('\n'.join(['\t'.join([fmt.format(cell) for cell in row]) for row in matrix]))
nsample = 50
npred = 2
sigbtrue = 2.0
sigwtrue = 4.0
sigtrue = 15.0
X, y, btrue, wtrue, bwtrue = simulate_data(nsample, npred, sigtrue, sigbtrue, sigwtrue, seed = 300)
ypredtrue = np.dot(X, bwtrue)
r2 = variance_explained(y, ypredtrue)
bwprior = np.multiply(np.random.normal(0, sigbtrue, 1000),
np.random.normal(0, sigwtrue, 1000))
# mcmc_trace, mcmc_posterior = gibbs(X, y, sigtrue, sigbtrue, sigwtrue,
# burn_iter = 1000, max_iter = 10000)
print(f"Fraction of variance explained by X: {r2:.2f}")
bwvals = np.linspace(-40, 40, 1000)
bwvals1 = np.linspace(13, 23, 100)
bwvals2 = np.linspace(-3, 7, 80)
numerical_prior_1d = prod_norm_prior_pdf(bwvals, sigbtrue, sigwtrue)
numerical_prior = prod_norm_prior_gridpdf(bwvals1, bwvals2, sigbtrue, sigwtrue)
numerical_logll = data_log_likelihood(X, y, bwvals1, bwvals2, sigtrue)
numerical_logll = normalize_logdensity_by_volume(bwvals1, bwvals2, numerical_logll)
numerical_logpost = numerical_logll + np.log(numerical_prior)
numerical_logpost = normalize_logdensity_by_volume(bwvals1, bwvals2, numerical_logpost)
fig = plt.figure(figsize = (12, 12))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
ax1.scatter(ypredtrue, y)
mpl_utils.plot_diag(ax1)
ax1.text(0.02, 0.94, f"bw = ({bwtrue[0]:.2f}, {bwtrue[1]:.2f})", transform = ax1.transAxes)
#ax1.set_title("y = Xbw + e", pad = 20.0)
ax1.set_xlabel("Xbw")
ax1.set_ylabel("y")
ax2.hist(bwprior, bins = 50, density = True, alpha = 0.4, label="Simulation")
ax2.plot(bwvals, numerical_prior_1d, label = "Analytical")
ax2.legend()
ax2.set_title("Prior distribution", pad = 20.0)
ax2.set_xlabel("bw")
ax2.set_ylabel("Density")
zdata = np.exp(numerical_logll)
norm = cm.colors.Normalize(vmin=np.min(zdata), vmax=np.max(zdata))
cs1, cs2, zlevels = \
plot_contours(ax3, bwvals1, bwvals2, zdata,
bwtrue, norm = norm, cstep = 10, nlevels = 5,
showbeta = True, showZmax = False,
xlabel = r"$b_1 w_1$",
ylabel = r"$b_2 w_2$",
zlabel = "Likelihood",
)
ax3.set_title("Likelihood", pad = 20.0)
zdata = np.exp(numerical_logpost)
norm = cm.colors.Normalize(vmin=np.min(zdata), vmax=np.max(zdata))
cs1, cs2, zlevels = \
plot_contours(ax4, bwvals1, bwvals2, zdata,
bwtrue, norm = norm, cstep = 10, nlevels = 5,
showbeta = True, showZmax = False,
xlabel = r"$b_1 w_1$",
ylabel = r"$b_2 w_2$",
zlabel = "Posterior",
)
ax4.set_title("Posterior", pad = 20.0)
plt.tight_layout()
plt.show()
def get_elbo(X, XTX, XTy, yTy, s2, sb2, sw2, mb, mw, covb, covw):
n, p = X.shape
elbo = cfunc(n, p, yTy, s2, sb2, sw2, covb, covw) \
- hfunc(mb, covb, sb2) \
- efunc(XTX, XTy, mb, mw, covb, covw, s2, sw2)
return elbo
def get_elbo_grid(X, y, s, sb, sw, mb, mw, covb, covw, bwvals1, bwvals2, debug = False):
k1 = bwvals1.shape[0]
k2 = bwvals2.shape[0]
elbo_grid = np.zeros((k2, k1))
s2 = s * s
sb2 = sb * sb
sw2 = sw * sw
XTX = np.dot(X.T, X)
XTy = np.dot(X.T, y)
yTy = np.dot(y.T, y)
for i in range(k1):
for j in range(k2):
_mb = np.array([bwvals1[i] / mw[0], bwvals2[j] / mw[1]])
if debug: print(np.multiply(_mb, mw))
elbo_grid[j, i] = get_elbo(X, XTX, XTy, yTy, s2, sb2, sw2, _mb, mw, covb, covw)
return elbo_grid
def cfunc(n, p, yTy, s2, sb2, sw2, covb, covw):
sign, det_covb = np.linalg.slogdet(covb)
sign, det_covw = np.linalg.slogdet(covw)
val = 0.5 * p
val += - 0.5 * p * np.log(sb2) - 0.5 * p * np.log(sw2)
val += - 0.5 * n * np.log(2.0 * np.pi * s2)
val += 0.5 * det_covb + 0.5 * det_covw
val += 0.5 * yTy / s2
return val
def hfunc(m, cov, s2):
h = np.sum(np.square(m)) + cov.trace()
return 0.5 * h / s2
def efunc(XTX, XTy, mb, mw, covb, covw, sigma2, sigmaw2):
p = mb.shape[0]
mmTb = np.einsum('i,j->ij', mb, mb)
EBTXTXB = np.multiply(XTX, mmTb + covb)
Rb = (EBTXTXB / sigma2) + (np.eye(p) / sigmaw2)
vb = np.einsum('i,i->i', mb, XTy) / sigma2
t1 = 0.5 * np.linalg.multi_dot([mw.T, Rb, mw])
t2 = np.dot(mw.T, vb)
t3 = 0.5 * np.dot(Rb, covw).trace()
return t1 - t2 + t3
def qposterior_hist2d(mb, mw, covb, covw, bwvals1, bwvals2, n=1e8, density=True):
'''
calculate n samples of b and w, from which create an array of bw
For the 2d histogram,
bwvals1 := bin edges on the x-axis
bwvals2 := bin edges on the y-axis
Returns
bw_counts := bin counts (or density) on a grid of bwvals1 on the x-axis (columns)
and bwvals2 on the y-axis (rows)
'''
bvals = np.random.multivariate_normal(mb, covb, int(n))
wvals = np.random.multivariate_normal(mw, covw, int(n))
bw_samples = np.multiply(bvals, wvals)
xbins = np.digitize(bw_samples[:, 0], bwvals1)
ybins = np.digitize(bw_samples[:, 1], bwvals2)
bwbins = np.array([xbins, ybins]).T
k1 = bwvals1.shape[0]
k2 = bwvals2.shape[0]
bw_counts = np.zeros((k2, k1))
for i in range(1, k1):
for j in range(1, k2):
bw_counts[j, i] = np.sum((bwbins[:, None] == np.array([i, j])).all(-1))
if density:
bw_counts = normalize_by_volume(bwvals1, bwvals2, bw_counts)
'''
Another way to do this would be:
bw_hist2d, _, _ = np.histogram2d(bw_samples[:, 0], bw_samples[:, 1], [bwvals1, bwvals2], density = density)
Implement after debugging
'''
return bw_counts
def veb_ridge_step(XTX, XTy, sigma2, sigmab2, mw, covw):
p = XTy.shape[0]
mmT = np.einsum('i,j->ij', mw, mw)
EWTXTXW = np.multiply(XTX, mmT + covw)
covbinv = (EWTXTXW / sigma2) + (np.eye(p) / sigmab2)
covb = np.linalg.inv(covbinv)
mb = np.linalg.multi_dot([covb, np.diag(mw).T, XTy]) / sigma2
return mb, covb
def get_sigma_updates(X, y, XTX, XTy, yTy, mb, mw, covb, covw):
n, p = X.shape
mmTw = np.einsum('i,j->ij', mw, mw)
mmTb = np.einsum('i,j->ij', mb, mb)
EWTXTXW = np.multiply(XTX, mmTw + covw)
mbmwXTy = np.dot(mb, np.einsum('i,i->i', mw, XTy))
trace_cov = np.dot(EWTXTXW, mmTb + covb).trace()
sigma2 = (yTy - 2 * mbmwXTy + trace_cov) / n
sigmab2 = np.sum(np.square(mb) + np.diag(covb)) / p
sigmaw2 = np.sum(np.square(mw) + np.diag(covw)) / p
return sigma2, sigmab2, sigmaw2
def veb_iridge(X, y,
tol = 1e-8, max_iter = 10000,
init_sigma = 1.0, init_sigmab = 1.0, init_sigmaw = 1.0,
init_mb = 1.0, init_mw = 1.0,
init_sb = 1.0, init_sw = 1.0,
use_convergence = True,
update_sigma = True,
update_sigmab = True,
update_sigmaw = True,
debug = True
):
n, p = X.shape
XTX = np.dot(X.T, X)
XTy = np.dot(X.T, y)
yTy = np.dot(y.T, y)
elbo_path = np.zeros(max_iter + 1)
# Initialize hyperparameters
sigma = init_sigma
sigmab = init_sigmab
sigmaw = init_sigmaw
sigma2 = sigma * sigma
sigmab2 = sigmab * sigmab
sigmaw2 = sigmaw * sigmaw
# Initialize variational parameters
covb = np.eye(p) * init_sb * init_sb
covw = np.eye(p) * init_sw * init_sw
mb = np.repeat(init_mb, p)
mw = np.repeat(init_mw, p)
niter = 0
elbo_path[0] = -np.inf
for itn in range(1, max_iter + 1):
'''
Update
'''
mb, covb = veb_ridge_step(XTX, XTy, sigma2, sigmab2, mw, covw)
mw, covw = veb_ridge_step(XTX, XTy, sigma2, sigmaw2, mb, covb)
if update_sigma or update_sigmab or update_sigmaw:
_sigma2, _sigmab2, _sigmaw2 = get_sigma_updates(X, y, XTX, XTy, yTy, mb, mw, covb, covw)
if update_sigma: sigma2 = _sigma2
if update_sigmab: sigmab2 = _sigmab2
if update_sigmaw: sigmaw2 = _sigmaw2
sigma = np.sqrt(sigma2)
sigmab = np.sqrt(sigmab2)
sigmaw = np.sqrt(sigmaw2)
if debug:
print(f"Iteration {itn}")
print(sigma2, sigmab2, sigmaw2)
'''
Convergence
'''
niter += 1
elbo_path[itn] = get_elbo(X, XTX, XTy, yTy, sigma2, sigmab2, sigmaw2, mb, mw, covb, covw)
if use_convergence:
if elbo_path[itn] - elbo_path[itn - 1] < tol: break
return mb, mw, covb, covw, niter, elbo_path[:niter + 1], sigma, sigmab, sigmaw
def veb_show_result(X, y,
vebres, ax1, ax2, ax3, ax4,
bwvals1, bwvals2,
true_ypred, true_posterior,
use_logscale_elbo = True):
mb, mw, covb, covw, niter, elbo_path, sigma, sigmab, sigmaw = vebres
bw = np.multiply(mb, mw)
ypred = np.dot(X, bw)
veb_posterior = qposterior_hist2d(mb, mw, covb, covw, bwvals1, bwvals2, n = 1e6)
_sigma2, _sigmab2, _sigmaw2 = \
get_sigma_updates(X, y, np.dot(X.T, X), np.dot(X.T, y), np.dot(y.T, y),
mb, mw, covb, covw)
sigma = np.sqrt(_sigma2)
sigmab = np.sqrt(_sigmab2)
sigmaw = np.sqrt(_sigmaw2)
print(f"Optimized in {niter} iterations. Final ELBO: {elbo_path[-1]:.3f}")
print("Optimum parameters:")
print(f"mb = {mb[0]:g}, {mb[1]:g}")
print(f"mw = {mw[0]:g}, {mw[1]:g}")
print(f"sb")
print_matrix(covb, "{:g}")
print("")
print(f"sw")
print_matrix(covw, "{:g}")
print("")
max_elbo = np.max(elbo_path)
elbo_diff = max_elbo - elbo_path[1:]
min_diff = np.min(elbo_diff[elbo_diff!=0])
elbo_diff[elbo_diff == 0] = min_diff
ax1.scatter(np.arange(niter - 1), elbo_diff[:-1])
ax1.plot(np.arange(niter - 1), elbo_diff[:-1])
if use_logscale_elbo:
ax1.set_yscale('log')
ax1.set_xlabel('Iteration')
ax1.set_ylabel('Distance to "best" ELBO')
ax2.scatter(ypred, ypredtrue)
mpl_utils.plot_diag(ax2)
ax2.text(0.02, 0.94, f"bw = ({bw[0]:.2f}, {bw[1]:.2f})", transform = ax2.transAxes)
ax2.set_xlabel('VEB $y_{\mathrm{pred}}$')
ax2.set_ylabel('True $y_{\mathrm{pred}}$')
zdata = true_posterior
norm = cm.colors.Normalize(vmin=np.min(zdata), vmax=np.max(zdata))
cs1, cs2, zlevels = \
plot_contours(ax4, bwvals1, bwvals2, zdata,
bwtrue, norm = norm, cstep = 10, nlevels = 5,
showbeta = True, showZmax = False,
xlabel = r"$b_1 w_1$",
ylabel = r"$b_2 w_2$",
zlabel = "Posterior",
)
ax4.set_title("True Posterior", pad = 20.0)
zdata = veb_posterior
cs1, cs2, zlevels = \
plot_contours(ax3, bwvals1, bwvals2, zdata,
bwtrue, norm = norm, cstep = 10, nlevels = 5,
showbeta = True, showZmax = False,
xlabel = r"$b_1 w_1$",
ylabel = r"$b_2 w_2$",
zlabel = "Posterior",
)
ax3.set_title("Variational Posterior", pad = 20.0)
return sigma, sigmab, sigmaw
veb1_res = veb_iridge(X, y, init_sigma = sigtrue, init_sigmab = sigbtrue, init_sigmaw = sigwtrue,
update_sigma = False, update_sigmab = False, update_sigmaw = False, debug = False)
fig = plt.figure(figsize=(12, 12))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
veb1_sigma, veb1_sigmab, veb1_sigmaw = \
veb_show_result(X, y,
veb1_res, ax1, ax2, ax3, ax4,
bwvals1, bwvals2,
ypredtrue, np.exp(numerical_logpost)
)
plt.tight_layout()
plt.show()
VEB optimization of all parameters
The problem starts when I try to obtain both the variational parameters $m_b$, $s_b$, $m_w$ and $s_w$, and the hyperparameters $\sigma$, $\sigma_b$ and $\sigma_w$ using the update scheme described in the writeup. I have switched off the convergence criteria to check how the ELBO evolves over time.
I found that the convergence depends on the initialization of $\sigma$, $\sigma_b$ and $\sigma_w$. Initializing at the true values of the parameters lead to "unnatural" ELBO updates. (I had observed similar ELBO updates in the trend filtering example).
veb2_res = veb_iridge(X, y, init_sigma = sigtrue, init_sigmab = sigbtrue, init_sigmaw = sigwtrue,
debug = False,
use_convergence = False, max_iter = 100)
fig = plt.figure(figsize=(12, 12))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
veb2_sigma, veb2_sigmab, veb2_sigmaw = \
veb_show_result(X, y,
veb2_res, ax1, ax2, ax3, ax4,
bwvals1, bwvals2,
ypredtrue, np.exp(numerical_logpost),
use_logscale_elbo = True
)
plt.tight_layout()
plt.show()
veb3_res = veb_iridge(X, y, init_sigma = sigtrue, init_sigmab = sigbtrue, init_sigmaw = sigwtrue,
debug = False, update_sigma = False,
use_convergence = False, max_iter = 100)
fig = plt.figure(figsize=(12, 12))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
veb3_sigma, veb3_sigmab, veb3_sigmaw = \
veb_show_result(X, y,
veb3_res, ax1, ax2, ax3, ax4,
bwvals1, bwvals2,
ypredtrue, np.exp(numerical_logpost),
use_logscale_elbo = True
)
plt.tight_layout()
plt.show()
veb4_res = veb_iridge(X, y, init_sigma = sigtrue, init_sigmab = sigbtrue, init_sigmaw = sigwtrue,
debug = False, update_sigma = True, update_sigmab = False, update_sigmaw = False,
use_convergence = False, max_iter = 1000)
fig = plt.figure(figsize=(12, 12))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
veb4_sigma, veb4_sigmab, veb4_sigmaw = \
veb_show_result(X, y,
veb4_res, ax1, ax2, ax3, ax4,
bwvals1, bwvals2,
ypredtrue, np.exp(numerical_logpost),
use_logscale_elbo = True
)
plt.tight_layout()
plt.show()
Initialization with very high $\sigma$ leads to well-behaved optimization
If I initialize $\sigma = 100.0$ and fix the values of $\sigma_b = 2.0$ and $\sigma_w = 4.0$, then the ELBO is well-behaved (always increasing). The optimized values are the same as the previous one, where we initialize with $\sigma = 10.0$ and allowed updates for 1000 iterations (without stopping the iterations with any convergence criteria).
veb5_res = veb_iridge(X, y, init_sigma = 100.0, init_sigmab = sigbtrue, init_sigmaw = sigwtrue,
debug = False, update_sigma = True, update_sigmab = False, update_sigmaw = False,
use_convergence = False, max_iter = 250)
fig = plt.figure(figsize=(12, 12))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
veb5_sigma, veb5_sigmab, veb5_sigmaw = \
veb_show_result(X, y,
veb5_res, ax1, ax2, ax3, ax4,
bwvals1, bwvals2,
ypredtrue, np.exp(numerical_logpost),
use_logscale_elbo = True
)
plt.tight_layout()
plt.show()
What is the expectation of $\sigma$, $\sigma_b$ and $\sigma_w$?
data = [[sigtrue, sigbtrue, sigwtrue],
[veb1_sigma, veb1_sigmab, veb1_sigmaw],
[veb2_sigma, veb2_sigmab, veb2_sigmaw],
[veb3_sigma, veb3_sigmab, veb3_sigmaw],
[veb4_sigma, veb4_sigmab, veb4_sigmaw],
[veb5_sigma, veb5_sigmab, veb5_sigmaw]
]
colnames = ['s', 'sb', 'sw']
rownames = ['True', 'VEB-fix', 'VEB-full', 'VEB-fix-s', 'VEB-fix-sb-sw', 'VEB-high-s']
df = pd.DataFrame.from_records(data, columns = colnames, index = rownames)
df.style.format("{:.3f}")