import matplotlib.gridspec as gridspec
fig = plt.figure(figsize = (22, 22))
gs = fig.add_gridspec(2, 2, height_ratios=(1, 1), width_ratios = (2,1), wspace=0, hspace=0)
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[1,0])
gs_rcol = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gs[0:2,1], height_ratios = (2, 1))
axr1 = fig.add_subplot(gs_rcol[0, 0])
axr2 = fig.add_subplot(gs_rcol[1, 0])
# PC1 vs PC2
xvals = - loadings[:, top_factor2]
yvals = - loadings[:, top_factor3]
# Combine outliers in x-axis and y-axis
# outlier_idx_x = np.where(iqr_outlier(xvals, axis = 0, bar = 5.0))[0]
# outlier_idx_y = np.where(iqr_outlier(yvals, axis = 0, bar = 5.0))[0]
outlier_idx_x = np.argsort(contribution_pheno[:, top_factor2])[::-1][:20]
outlier_idx_y = np.argsort(contribution_pheno[:, top_factor3])[::-1][:40]
outlier_idx = np.union1d(outlier_idx_x, outlier_idx_y)
x_center = np.mean(ax1.get_xlim())
common_idx = np.setdiff1d(np.arange(xvals.shape[0]), outlier_idx)
txt_list = []
text_idx_list = []
for i in outlier_idx:
if i != tidx:
txt = trait_df_noRx.loc[trait_indices[i]]['short_description'].strip()
txt_list.append(txt)
text_idx_list.append(i)
# Mark Type 2 diabetes
txt_list.append("Type 2 Diabetes")
text_idx_list.append(tidx)
scatter_plot = ax1.scatter(xvals[outlier_idx], yvals[outlier_idx], alpha = 0.7, s = 100, color = nygc_colors['orange'])
ax1.scatter(xvals[tidx], yvals[tidx], s = 150, color = nygc_colors['blue'])
ax1.scatter(xvals[common_idx], yvals[common_idx], alpha = 0.1, s = 100, color = nygc_colors['khaki'])
# # Mark using textalloc package
if len(text_idx_list) > 0:
txt_idx = np.array(text_idx_list)
textalloc.allocate_text(fig, ax1, xvals[txt_idx], yvals[txt_idx], txt_list,
# x_scatter = xvals, y_scatter = yvals,
scatter_plot = scatter_plot,
textsize = 10, textcolor = 'black', linecolor = nygc_colors['gray'])
# # Mark using adjustText package
# # https://github.com/Phlya/adjustText
# annots = []
# for i, txt in zip(text_idx_list, txt_list):
# if xvals[i] > x_center:
# annots += [ax1.annotate(txt, (xvals[i], yvals[i]), fontsize = 6, ha = 'right')]
# else:
# annots += [ax1.annotate(txt, (xvals[i], yvals[i]), fontsize = 6)]
# adjust_text(annots, arrowprops=dict(arrowstyle='-', color = nygc_colors['gray']))
# ax1.set_xticks([-0.010, -0.005, 0.0, 0.005, 0.010])
for side, border in ax1.spines.items():
if side == 'left':
border.set_bounds(-0.06, 0.04)
elif side == 'bottom':
border.set_bounds(-0.05, 0.03)
else:
border.set_visible(False)
ax1.set_xlabel(f"Factor {top_factor2 + 1}")
ax1.set_ylabel(f"Factor {top_factor3 + 1}")
# PC22 vs PC19 on ax2
xvals = loadings[:, top_factor1]
yvals = - loadings[:, top_factor4]
# Combine outliers in x-axis and y-axis
# outlier_idx_x = np.where(iqr_outlier(xvals, axis = 0, bar = 5.0))[0]
# outlier_idx_y = np.where(iqr_outlier(yvals, axis = 0, bar = 5.0))[0]
outlier_idx_x = np.argsort(contribution_pheno[:, top_factor1])[::-1][:20]
outlier_idx_y = np.argsort(contribution_pheno[:, top_factor4])[::-1][:20]
outlier_idx = np.union1d(outlier_idx_x, outlier_idx_y)
x_center = np.mean(ax2.get_xlim())
common_idx = np.setdiff1d(np.arange(xvals.shape[0]), outlier_idx)
txt_list = []
text_idx_list = []
for i in outlier_idx:
if i != tidx:
txt = trait_df_noRx.loc[trait_indices[i]]['short_description'].strip()
txt_list.append(txt)
text_idx_list.append(i)
# Mark Type 2 diabetes
txt_list.append("Type 2 Diabetes")
text_idx_list.append(tidx)
scatter_plot = ax2.scatter(xvals[outlier_idx], yvals[outlier_idx], alpha = 0.7, s = 100, color = nygc_colors['orange'])
ax2.scatter(xvals[tidx], yvals[tidx], s = 150, color = nygc_colors['blue'])
ax2.scatter(xvals[common_idx], yvals[common_idx], alpha = 0.1, s = 100, color = nygc_colors['khaki'])
# # Mark using textalloc package
if len(text_idx_list) > 0:
txt_idx = np.array(text_idx_list)
textalloc.allocate_text(fig, ax2, xvals[txt_idx], yvals[txt_idx], txt_list,
# x_scatter = xvals, y_scatter = yvals,
scatter_plot = scatter_plot,
textsize = 10, textcolor = 'black', linecolor = nygc_colors['gray'])
# ax1.set_xticks([-0.010, -0.005, 0.0, 0.005, 0.010])
for side, border in ax2.spines.items():
if side == 'left':
border.set_bounds(-0.006, 0.01)
elif side == 'bottom':
border.set_bounds(-0.01, 0.01)
else:
border.set_visible(False)
ax2.set_xlabel(f"Factor {top_factor1 + 1}")
ax2.set_ylabel(f"Factor {top_factor4 + 1}")
# Contribution of variants
xvals = top_variant_score
yvals = np.arange(n_plot_variants)[::-1]
axr1.barh(yvals, xvals, align = 'center', height = 0.7, color = nygc_colors['orange'], edgecolor = 'None')
axr1.set_yticks(yvals)
axr1.set_yticklabels(top_variant_names)
for side in ['top', 'right', 'left']:
axr1.spines[side].set_visible(False)
axr1.tick_params(left=False)
axr1.set_xlabel(f"Contribution of variants to F{top_factor1 + 1}")
axr1.set_xticks([0, 0.001, 0.002])
# ax1.set_title(method_names[method])
# Enrichment
xvals = -np.log10(cts_df["Coefficient_P_value"].to_numpy())
yvals = np.arange(n_plot_tissues)[::-1]
axr2.barh(yvals, xvals, align = 'center', height = 0.7, color = nygc_colors['lightblue'], edgecolor = 'None')
axr2.set_yticks(yvals)
axr2.set_yticklabels(cts_df["Name"])
for side in ['top', 'right', 'left']:
axr2.spines[side].set_visible(False)
axr2.tick_params(left=False)
axr2.set_xlabel(r"$-\log_{10}(P)$")
# Panel labels
ax1.text(-0.25, 0.97, "(a)", transform=ax1.transAxes, fontweight='bold')
ax2.text(-0.25, 0.97, "(b)", transform=ax2.transAxes, fontweight='bold')
axr1.text(-1.10, 0.97, "(c)", transform=axr1.transAxes, fontweight='bold')
axr2.text(-1.10, 0.97, "(d)", transform=axr2.transAxes, fontweight='bold')
gs.tight_layout(fig)
plt.savefig('../plots/colormann-manuscript/panukb_t2d_rpca.pdf', bbox_inches='tight')
plt.show()