Poster for ProbGen24, Vienna

Author

Saikat Banerjee

Published

April 8, 2024

Abstract
High quality plots used in the poster for ProbGen24, using NYGC color palette.

Setup

Code
import numpy as np
import pandas as pd
import pickle
import sys
import os
import dsc
from dsc.query_engine import Query_Processor as dscQP
from dsc import dsc_io

import matplotlib
import matplotlib.pyplot as plt
from pymir import mpl_stylesheet
from pymir import mpl_utils
Code
import matplotlib.font_manager as mpl_fm
font_path = '/gpfs/commons/home/sbanerjee/nygc/Futura'
mpl_fm.fontManager.addfont(font_path + '/FuturaStd-Book.otf') # Loads "Futura Std"

mpl_stylesheet.banskt_presentation(splinecolor = 'black', dpi = 300)
# futura_book = FontProperties(fname='/gpfs/commons/home/sbanerjee/nygc/Futura/FuturaStd-Book.otf')

# NYGC Color Palette
nygc_colors = {
    'brown': '#7F0814',
    'darkred': '#d42e12',
    'orange': '#F37239',
    'darkyellow': '#F79320',
    'yellow': '#FFE438',
    'darkblue': '#003059',
    'blue': '#266DB6',
    'lightblue': '#A3D5ED',
    'darkgreen': '#006838',
    'green': '#0A8A42',
    'lightgreen': '#74B74A',
    'yellowgreen': '#BAD75F',
    'darkgray': '#1A1A1A',
    'gray': '#666666',
    'lightgray': '#CCCCCC',
    'khaki': '#ADA194',
    'darkkhaki': '#5E514D',
}

# Style sheet for NYGC poster
mpl_stylesheet.banskt_presentation(dpi = 300, fontsize = 28, 
    splinecolor = nygc_colors['darkgray'], black = nygc_colors['darkgray'])
plt.rcParams['font.family'] = 'Futura Std'

Simulation Plots

Code
dsc_output = "/gpfs/commons/groups/knowles_lab/sbanerjee/low_rank_matrix_approximation_numerical_experiments/blockdiag"
dsc_fname  = os.path.basename(os.path.normpath(dsc_output))
db = os.path.join(dsc_output, dsc_fname + ".db")
dscoutpkl = os.path.join("/gpfs/commons/home/sbanerjee/work/npd/lrma-dsc/dsc/results", dsc_fname + "_dscout.pkl")
dscout    = pd.read_pickle(dscoutpkl)
dscout
DSC simulate simulate.n simulate.p simulate.k simulate.h2 simulate.h2_shared_frac simulate.aq lowrankfit matfactor score.L_rmse score.F_rmse score.Z_rmse score.L_psnr score.F_psnr score.Z_psnr score.adj_MI
0 1 blockdiag 200.0 2000.0 10.0 0.2 0.5 0.6 identical truncated_svd 0.244982 0.414399 0.004264 28.429593 26.081163 23.105261 0.018100
1 2 blockdiag 200.0 2000.0 10.0 0.2 0.5 0.6 identical truncated_svd 0.264152 0.417129 0.004318 28.688465 25.536584 23.284510 0.015638
2 3 blockdiag 200.0 2000.0 10.0 0.2 0.5 0.6 identical truncated_svd 0.252526 0.419340 0.004285 28.831802 25.198688 23.760390 0.654857
3 4 blockdiag 200.0 2000.0 10.0 0.2 0.5 0.6 identical truncated_svd 0.285154 0.409911 0.004718 28.881899 25.741441 24.626305 0.561981
4 5 blockdiag 200.0 2000.0 10.0 0.2 0.5 0.6 identical truncated_svd 0.292962 0.420819 0.004262 27.246965 25.527629 23.594094 0.024386
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
895 8 blockdiag_aq 200.0 2000.0 10.0 0.2 0.5 0.8 identical factorgo 0.597619 0.720961 0.002017 20.790216 21.536737 30.477673 0.010297
896 9 blockdiag_aq 200.0 2000.0 10.0 0.2 0.5 0.4 identical factorgo 0.201241 0.400942 0.002174 30.378105 26.809769 29.938074 -0.009727
897 9 blockdiag_aq 200.0 2000.0 10.0 0.2 0.5 0.8 identical factorgo 0.509286 0.669955 0.002038 21.469111 20.631187 28.405981 0.003175
898 10 blockdiag_aq 200.0 2000.0 10.0 0.2 0.5 0.4 identical factorgo 0.270847 0.429577 0.002189 28.086705 25.500348 30.518931 0.022548
899 10 blockdiag_aq 200.0 2000.0 10.0 0.2 0.5 0.8 identical factorgo 0.626669 0.756680 0.002087 19.320562 20.277492 30.701310 -0.004610

900 rows × 17 columns

Code
def stratify_dfcol(df, colname, value):
    #return pd_utils.select_dfrows(df, [f"$({colname}) == {value}"])
    return df.loc[df[colname] == value]

def stratify_dfcols(df, condition_list):
    for (colname, value) in condition_list:
        df = stratify_dfcol(df, colname, value)
    return df

def stratify_dfcols_in_list(df, colname, values):
    return df.loc[df[colname].isin(values)]

methods = {
    "rpca" : ["rpca", "truncated_svd"],
    "nnm"  : ["nnm", "truncated_svd"],
    "nnm_sparse" : ["nnm_sparse", "truncated_svd"],
    "truncated_svd" : ["identical", "truncated_svd"],
    "factorgo" : ["identical", "factorgo"],
}
method_labels = {
    "rpca" : "RobustPCA",
    "nnm" : "NNM",
    "nnm_sparse" : "NNM-Sparse",
    "truncated_svd": "tSVD",
    "factorgo": "FactorGO",
}

method_colors = {
    "rpca" : nygc_colors['brown'], # Vivid Orange
    "nnm" : nygc_colors['darkred'], # Vivid Red
    "nnm_sparse" : nygc_colors['darkyellow'], # Strong Purple
    "truncated_svd" : nygc_colors['lightblue'], # gray
    "factorgo" : nygc_colors['lightgreen'], # Very Light Blue
}

# method_colors = {
#     "rpca" : '#FF6800', # Vivid Orange
#     "nnm" : '#C10020', # Vivid Red
#     "nnm_sparse" : '#803E75', # Strong Purple
#     "truncated_svd" : '#535154', # gray
#     "factorgo" : '#A6BDD7', # Very Light Blue
# }

# Base parameters
simparams = {'p': 2000, 'k': 10, 'h2': 0.2, 'h2_shared_frac': 0.5, 'aq': 0.6}
score_names = {
    'L_rmse': r"$\| L - \hat{L}\|_F$",
    'F_rmse': r"$\| F - \hat{F}\|_F$",
    'Z_rmse': r"$\| LF^{T} - \hat{L}\hat{F}^{T}\|_F$",
    'adj_MI': "Adjusted Mutual Information Score",
}

def get_simulation_with_variable(df, var_name, var_values):
    condition = [(f'simulate.{k}', v) for k, v in simparams.items() if k != var_name]
    df1 = stratify_dfcols(df, condition)
    df2 = stratify_dfcols_in_list(df1, f'simulate.{var_name}', var_values)
    return df2

def get_scores_from_dataframe(df, score_name, variable_name, variable_values, 
        methods = methods):
    simdf = get_simulation_with_variable(df, variable_name, variable_values)
    scores = {key: list() for key in methods.keys()}
    for method, mlist in methods.items():
        mrows = stratify_dfcols(simdf, [('lowrankfit', mlist[0]), ('matfactor', mlist[1])])
        for value in variable_values:
            vrows = stratify_dfcol(mrows, f'simulate.{variable_name}', value)
            scores[method].append(vrows[f'score.{score_name}'].to_numpy())
    return scores

def random_jitter(xvals, yvals, d = 0.1):
    xjitter = [x + np.random.randn(len(y)) * d for x, y in zip(xvals, yvals)]
    return xjitter

def boxplot_scores(variable, variable_values, 
        methods = methods, score_names = score_names,
        dscout = dscout, method_colors = method_colors,
        custom_font = 'Futura Std', xlabel = "No. of variants"):
    
    nmethods = len(methods)
    nvariables = len(variable_values)
    nscores = len(score_names)
    
    figh = 8
    figw = (nscores * figh) + (nscores - 1)
    fig = plt.figure(figsize = (figw, figh))
    axs = [fig.add_subplot(1, nscores, x+1) for x in range(nscores)]
    boxs = {x: None for x in methods.keys()}
    
    for i, (score_name, score_label) in enumerate(score_names.items()):
        scores = get_scores_from_dataframe(dscout, score_name, variable, variable_values)
        for j, mkey in enumerate(methods.keys()):
            boxcolor = method_colors[mkey]
            boxface = f'#{boxcolor[1:]}80'
            medianprops = dict(linewidth=0, color = boxcolor)
            whiskerprops = dict(linewidth=2, color = boxcolor)
            boxprops = dict(linewidth=2, color = boxcolor, facecolor = boxface)
            flierprops = dict(marker='o', markerfacecolor=boxface, markersize=3, markeredgecolor = boxcolor)

            xpos = [x * (nmethods + 1) + j for x in range(nvariables)]
            boxs[mkey] = axs[i].boxplot(scores[mkey], positions = xpos,
                showcaps = False, showfliers = False,
                widths = 0.7, patch_artist = True, notch = False,
                flierprops = flierprops, boxprops = boxprops,
                medianprops = medianprops, whiskerprops = whiskerprops)
            
            axs[i].scatter(random_jitter(xpos, scores[mkey]), scores[mkey], 
                           edgecolor = boxcolor, facecolor = boxface, linewidths = 1, 
                           s = 10)

        xcenter = [x * (nmethods + 1) + (nmethods - 1) / 2 for x in range(nvariables)]
        axs[i].set_xticks(xcenter)
        axs[i].set_xticklabels(variable_values)
        axs[i].set_xlabel(xlabel)
        axs[i].set_ylabel(score_label)
        xlim_low = 0 - (nvariables - 1) / 2
        #xlim_high = (nvariables - 1) * (nmethods + 1) + (nmethods - 1) + (nvariables - 1) / 2
        xlim_high = (nmethods + 1.5) * nvariables - 2.5
        axs[i].set_xlim( xlim_low, xlim_high )

    plt.tight_layout()
    return axs, boxs

variable = 'p'
variable_values = [500, 1000, 2000, 5000, 10000]

axs, boxs = boxplot_scores(variable, variable_values)

plt.savefig('../plots/probgen24/numerical_experiments.png', bbox_inches='tight')
plt.show()

Code
fig = plt.figure(figsize = (5, 4.5))
ax1 = fig.add_subplot(111)
handles = [boxs[mkey]["boxes"][0] for mkey in methods.keys()]
labels = [method_labels[mkey] for mkey in methods.keys()]
ax1.legend(handles = handles, labels = labels, loc = 'upper left', frameon = False, handlelength = 4, ncol = 1)

for side, border in ax1.spines.items():
    border.set_visible(False)
ax1.tick_params(bottom = False, top = False, left = False, right = False,
                   labelbottom = False, labeltop = False, labelleft = False, labelright = False)


plt.savefig('../plots/probgen24/numerical_experiments_legend.png', bbox_inches='tight')
plt.show()

Code
variable = 'k'
variable_values = [2,5,10,15,20]

axs, boxs = boxplot_scores(variable, variable_values, xlabel = "No. of hidden factors")

plt.savefig('../plots/probgen24/numerical_experiments_hidden_factors.png', bbox_inches='tight')
plt.show()

Code
simdata_filename = os.path.join(dsc_output, "blockdiag/blockdiag_7.pkl")
with open(simdata_filename, "rb") as fh:
    simdata = pickle.load(fh)

sys.path.append("../utils/")
import plot_functions as mpy_plotfn

fig = plt.figure(figsize = (8, 8))
ax1 = fig.add_subplot(111)

nsample, p = simdata['Z'].shape
k = simdata['Ltrue'].shape[1]
# mpy_plotfn.plot_covariance_heatmap(ax1, simdata['Z'] * np.sqrt(p) / np.sqrt(nsample))
mpy_plotfn.plot_covariance_heatmap(ax1, simdata['Ltrue'] * np.sqrt(k), vmax = 0.2)

plt.tight_layout()
plt.savefig('../plots/probgen24/numerical_experiments_Ltrue_covariance_heatmap.png', bbox_inches='tight')
plt.show()

Code
fig = plt.figure(figsize = (6, 6))
ax1 = fig.add_subplot(111)

def make_plot_pca(ax, comp, labels, unique_labels, colorlist, bgcolor = "#F0F0F0"):
    pc1 = comp[:, 0]
    pc2 = comp[:, 1]
    for i, label in enumerate(unique_labels):
        idx = np.array([k for k, x in enumerate(labels) if x == label])
        ax.scatter(pc1[idx], pc2[idx], s = 100, alpha = 0.7, label = label, color = colorlist[i])
    ax.tick_params(bottom = False, top = False, left = False, right = False,
        labelbottom = False, labeltop = False, labelleft = False, labelright = False)
    ax.patch.set_facecolor(bgcolor)
    ax.patch.set_alpha(0.3)
    for side, border in ax.spines.items():
        border.set_visible(False)
    return

colorlist = [nygc_colors[x] for x in ['orange', 'blue', 'yellowgreen']]
make_plot_pca(ax1, simdata['Ltrue'], simdata['Ctrue'], list(set(simdata['Ctrue'])), colorlist)
ax1.set_xlabel("PC1")
ax1.set_ylabel("PC2")
plt.savefig('../plots/probgen24/numerical_experiments_Ltrue_principal_components.png', bbox_inches='tight')
plt.show()

Structure Plot from PGC

Code
mf_methods = ['ialm', 'nnm', 'nnm_sparse']
lowrank_X = dict()

def get_nan_centered_Z(df):
    X_nan = np.array(zscore_df).T
    X_nan_cent = X_nan - np.nanmean(X_nan, axis = 0, keepdims = True)
    X_nan_mask = np.isnan(X_nan)
    X_cent = np.nan_to_num(X_nan_cent, copy = True, nan = 0.0)
    return X_cent

zscore_df = pd.read_pickle(f"../data/zscore_df.pkl")
X_cent = get_nan_centered_Z(zscore_df)

for method in mf_methods:
    with open (f"../data/lowrank_X_{method}.pkl", 'rb') as handle:
        lowrank_X[method] = pickle.load(handle)
Code
def compute_loadings_factors(X, k = None):
    #X_cent = mpy_simulate.do_standardize(X, scale = False)
    #X_cent /= np.sqrt(np.prod(X_cent.shape))
    U, S, Vt = np.linalg.svd(X, full_matrices = False)
    S2 = np.square(S)
    explained_variance = S2 / np.sum(S2)
    if k is None:
        k = np.where(explained_variance < 1e-4)[0][0] - 1
    U_low = U[:, :k]
    S_low = S[:k]
    factors = Vt[:k, :].T
    loadings = U_low @ np.diag(S_low)
    return U_low, S_low, loadings, factors

loadings  = dict()
factors   = dict()
eigenvals = dict()

_, eigenvals['tsvd'], loadings['tsvd'], factors['tsvd'] = compute_loadings_factors(X_cent, k = 30)
for m in mf_methods:
     _, eigenvals[m], loadings[m], factors[m] = compute_loadings_factors(lowrank_X[m])
Code
# def get_cos2_scores(pcomps):
#     ntrait, npcomp = pcomps.shape
#     x = np.zeros((ntrait, npcomp))
#     for i in range(ntrait):
#         cos2_trait = np.array([np.square(pcomps[i, pcidx]) for pcidx in range(npcomp)])
#         x[i, :] = cos2_trait / np.sum(cos2_trait)
#     return x

def compute_cos(factor):
    return (factor ** 2) / (np.sum(factor ** 2, axis = 1).reshape((factor.shape[0], 1)))

def stacked_barplot(ax, data, xlabels, colors, bar_width = 1.0, alpha = 1.0, showxlabels = False):
    '''
    Parameters
    ----------
        data: 
            dict() of scores. 
            - <key> : items for the stacked bars (e.g. traits or components)
            - <value> : list of scores for the items. All dict entries must have the same length of <value>
        xlabels: 
            label for each entry in the data <value> list. Must be of same length of data <value>
        colors: 
            dict(<key>, <color>) corresponding to each data <key>.
    '''
    indices = np.arange(len(xlabels))
    bottom = np.zeros(len(xlabels))

    for item, weights in data.items():
        ax.bar(indices, weights, bar_width, label = item, bottom = bottom, color = colors[item], alpha = alpha)
        bottom += weights

    if showxlabels:
        ax.set_xticks(indices)
        ax.set_xticklabels(xlabels, rotation=90, ha='center')
        ax.tick_params(bottom = True, top = False, left = False, right = False,
                   labelbottom = True, labeltop = False, labelleft = False, labelright = False)
    else:
        ax.tick_params(bottom = False, top = False, left = False, right = False,
                   labelbottom = False, labeltop = False, labelleft = False, labelright = False)

    for side, border in ax.spines.items():
        border.set_visible(False)

    return


def structure_plot(ax, pcomps, trait_labels, comp_colors, npcomp, showxlabels = False):
    cos2_scores = compute_cos(pcomps)
    cos2_plot_data = {
        f"{i+1}" : cos2_scores[:, i] for i in range(npcomp)
    }
    stacked_barplot(ax, cos2_plot_data, trait_labels, comp_colors, alpha = 0.8, showxlabels = showxlabels)
    return
Code
phenotype_dict_readable = {
    'AD_sumstats_Jansenetal_2019sept.txt.gz' : 'Alzheimers Disease - Jansen 2019',
    'anxiety.meta.full.cc.txt.gz' : 'Anxiety',
    'anxiety.meta.full.fs.txt.gz' : 'Anxiety',
    'CNCR_Insomnia_all' : 'Insomnia',
    'daner_adhd_meta_filtered_NA_iPSYCH23_PGC11_sigPCs_woSEX_2ell6sd_EUR_Neff_70.txt.gz' : 
        'PGC - ADHD 2019',
    'daner_PGC_BIP32b_mds7a_0416a.txt.gz' : 'PGC - Bipolar Disorder 2019',
    'daner_PGC_BIP32b_mds7a_mds7a_BD1.0416a_INFO6_A5_NTOT.txt.gz' : 'PGC - Bipolar Disorder I 2019',
    'daner_PGC_BIP32b_mds7a_mds7a_BD2.0416a_INFO6_A5_NTOT.txt.gz' : 'PGC - Bipolar Disorder II 2019',
    'ENIGMA_Intracraneal_Volume' : 'ENIGMA - Intracraneal Volume',
    'ieu-a-1000' : 'Neuroticism',
    'ieu-a-1041' : 'Intracranial volume',
    'ieu-a-1042' : 'Nucleus accumbens volume',
    'ieu-a-1043' : 'Amygdala volume',
    'ieu-a-1044' : 'Caudate volume',
    'ieu-a-1045' : 'Hippocampus volume',
    'ieu-a-1046' : 'Pallidum volume',
    'ieu-a-1047' : 'Putamen volume',
    'ieu-a-1048' : 'Thalamus volume',
    'ieu-a-1085' : 'Amyotrophic lateral sclerosis',
    'ieu-a-118' : 'Neuroticism',
    'ieu-a-1183' : 'ADHD',
    'ieu-a-1184' : 'Autism Spectrum Disorder',
    'ieu-a-1185' : 'Autism Spectrum Disorder',
    'ieu-a-1186' : 'Anorexia Nervosa',
    'ieu-a-1188' : 'Major Depressive Disorder',
    'ieu-a-1189' : 'Obsessive Compulsive Disorder',
    'ieu-a-22' : 'Schizophrenia',
    'ieu-a-297' : 'Alzheimers disease',
    'ieu-a-806' : 'Autism',
    'ieu-a-990' : 'Bulimia nervosa',
    'ieu-b-10' : 'Focal epilepsy',
    'ieu-b-11' : 'Focal epilepsy',
    'ieu-b-12' : 'Juvenile absence epilepsy',
    'ieu-b-13' : 'Childhood absence epilepsy',
    'ieu-b-14' : 'Focal epilepsy',
    'ieu-b-15' : 'Focal epilepsy',
    'ieu-b-16' : 'Generalized epilepsy',
    'ieu-b-17' : 'Juvenile myoclonic epilepsy',
    'ieu-b-18' : 'Multiple sclerosis',
    'ieu-b-2' : 'Alzheimers disease',
    'ieu-b-41' : 'Bipolar Disorder',
    'ieu-b-42' : 'Schizophrenia',
    'ieu-b-7' : 'Parkinsons',
    'ieu-b-8' : 'Epilepsy',
    'ieu-b-9' : 'Generalized epilepsy',
    'IGAP_Alzheimer' : 'Alzheimers Disease - IGAP 2018',
    'iPSYCH-PGC_ASD_Nov2017.txt.gz' : 'iPSYCH-PGC - ASD 2019',
    'Jones_et_al_2016_Chronotype' : 'Chronotype - Jones 2016',
    'Jones_et_al_2016_SleepDuration' : 'Sleep Duration - Jones 2016',
    'MDD_MHQ_BIP_METACARPA_INFO6_A5_NTOT_no23andMe_noUKBB.txt.gz' : 'PGC MDD + BD w/o 23andMe, UKB',
    'MDD_MHQ_METACARPA_INFO6_A5_NTOT_no23andMe_noUKBB.txt.gz' : 'PGC MDD w/o 23andMe, UKB',
    'MHQ_Depression_WG_MAF1_INFO4_HRC_Only_Filtered_Dups_FOR_METACARPA_INFO6_A5_NTOT.txt.gz' : 'PGC - Depression',
    'MHQ_Recurrent_Depression_WG_MAF1_INFO4_HRC_Only_Filtered_Dups_FOR_METACARPA_INFO6_A5_NTOT.txt.gz' : 'PGC - Recurrent Depression',
    'MHQ_Single_Depression_WG_MAF1_INFO4_HRC_Only_Filtered_Dups_FOR_METACARPA_INFO6_A5_NTOT.txt.gz' : 'PGC - Single Depression',
    'MHQ_Subthreshold_WG_MAF1_INFO4_HRC_Only_Filtered_Dups_FOR_METACARPA_INFO6_A5_NTOT.txt.gz' : 'PGC - Subthreshold MDD',
    'ocd_aug2017.txt.gz' : 'PGC - OCD 2017',
    'PGC_ADHD_EUR_2017' : 'PGC - ADHD EUR 2017',
    'PGC_ASD_2017_CEU' : 'PGC - ASD CEU 2017',
    'pgc-bip2021-all.vcf.txt.gz' : 'PGC - Bipolar Disorder 2021',
    'pgc-bip2021-BDI.vcf.txt.gz' : 'PGC - Bipolar Disorder I 2021',
    'pgc-bip2021-BDII.vcf.txt.gz' : 'PGC - Bipolar Disorder II 2021',
    'pgc.scz2' : 'PGC - Schizophrenia (2014)',
    'PGC3_SCZ_wave3_public.v2.txt.gz' : 'PGC - Schizophrenia 2022',
    'pgcAN2.2019-07.vcf.txt.gz' : 'PGC - Anorexia Nervosa 2019',
    'pts_all_freeze2_overall.txt.gz' : 'PGC - PTSD 2019',
    'SSGAC_Depressive_Symptoms' : 'SSGAC - Depressive Symptoms',
    'SSGAC_Education_Years_Pooled' : 'SSGAC - Education Years Pooled',
    'UKB_1160_Sleep_duration' : 'UKB_1160_Sleep_duration',
    'UKB_1180_Morning_or_evening_person_chronotype' : 'UKB - Chronotype',
    'UKB_1200_Sleeplessness_or_insomnia' : 'UKB - Sleeplessness/insomnia',
    'UKB_20002_1243_self_reported_psychological_or_psychiatric_problem' : 'UKB - Self-reported Psychiatric',
    'UKB_20002_1262_self_reported_parkinsons_disease' : 'UKB - Self-reported Parkinsons',
    'UKB_20002_1265_self_reported_migraine' : 'UKB - Self-reported Migraine',
    'UKB_20002_1289_self_reported_schizophrenia' : 'UKB - Self-reported Schizophrenia',
    'UKB_20002_1616_self_reported_insomnia' : 'UKB - Self-reported Insomnia',
    'UKB_20016_Fluid_intelligence_score' : 'UKB - Fluid intelligence score',
    'UKB_20127_Neuroticism_score' : 'UKB - Neuroticism score',
    'UKB_G40_Diagnoses_main_ICD10_G40_Epilepsy' : 'UKB - G40 Epilepsy',
    'UKB_G43_Diagnoses_main_ICD10_G43_Migraine' : 'UKB - G43 Migraine',
    'ieu-b-5070' : 'Schizophrenia',
    'GPC-NEO-NEUROTICISM' : 'GPC - NEO Neuroticism',
    'ieu-a-1009' : 'Subjective well being',
    'ieu-a-1018' : 'Subjective well being',
    'ieu-a-1019' : 'Migraine in bipolar disorder',
    'ieu-a-1029' : 'Internalizing problems',
    'ieu-a-1061' : 'G speed factor',
    'ieu-a-1062' : 'Symbol search',
    'ieu-a-1063' : '8-choice reaction time',
    'ieu-a-1064' : '2-choice reaction time',
    'ieu-a-1065' : 'Inspection time',
    'ieu-a-1066' : 'Simple reaction time',
    'ieu-a-1067' : 'Digit symbol',
    'ieu-a-1068' : '4-choice reaction time',
    'ieu-a-45' : 'Anorexia nervosa',
    'ieu-a-298' : 'Alzheimers Disease',
    'ieu-a-808' : 'Bipolar Disorder',
    'ieu-a-810' : 'Schizophrenia',
    'ieu-a-812' : 'Parkinsons',
    'ieu-a-818' : 'Parkinsons',
    'ieu-a-824' : 'Alzheimers Disease',
    'ieu-b-43' : 'frontotemporal dementia',
    'ILAE_Genetic_generalised_epilepsy' : 'ILAE_Genetic_generalised_epilepsy'
}

labels_readable = [phenotype_dict_readable[x] for x in zscore_df.columns]
labels_readable
['Alzheimers Disease - Jansen 2019',
 'Insomnia',
 'GPC - NEO Neuroticism',
 'Alzheimers Disease - IGAP 2018',
 'Chronotype - Jones 2016',
 'Sleep Duration - Jones 2016',
 'PGC MDD + BD w/o 23andMe, UKB',
 'PGC MDD w/o 23andMe, UKB',
 'PGC - Depression',
 'PGC - Recurrent Depression',
 'PGC - Single Depression',
 'PGC - Subthreshold MDD',
 'PGC - Schizophrenia 2022',
 'PGC - ADHD EUR 2017',
 'PGC - ASD CEU 2017',
 'SSGAC - Depressive Symptoms',
 'SSGAC - Education Years Pooled',
 'UKB_1160_Sleep_duration',
 'UKB - Chronotype',
 'UKB - Sleeplessness/insomnia',
 'UKB - Self-reported Psychiatric',
 'UKB - Self-reported Parkinsons',
 'UKB - Self-reported Migraine',
 'UKB - Self-reported Schizophrenia',
 'UKB - Self-reported Insomnia',
 'UKB - Fluid intelligence score',
 'UKB - Neuroticism score',
 'UKB - G40 Epilepsy',
 'UKB - G43 Migraine',
 'Anxiety',
 'Anxiety',
 'PGC - Bipolar Disorder 2019',
 'PGC - Bipolar Disorder I 2019',
 'PGC - Bipolar Disorder II 2019',
 'PGC - ADHD 2019',
 'iPSYCH-PGC - ASD 2019',
 'Neuroticism',
 'Internalizing problems',
 'G speed factor',
 '8-choice reaction time',
 '2-choice reaction time',
 'Inspection time',
 'Simple reaction time',
 'Digit symbol',
 '4-choice reaction time',
 'Amyotrophic lateral sclerosis',
 'Neuroticism',
 'Alzheimers disease',
 'Bulimia nervosa',
 'Focal epilepsy',
 'Focal epilepsy',
 'Juvenile absence epilepsy',
 'Childhood absence epilepsy',
 'Focal epilepsy',
 'Focal epilepsy',
 'Generalized epilepsy',
 'Juvenile myoclonic epilepsy',
 'Multiple sclerosis',
 'Alzheimers disease',
 'Parkinsons',
 'Epilepsy',
 'Generalized epilepsy',
 'PGC - OCD 2017',
 'PGC - Bipolar Disorder I 2021',
 'PGC - Bipolar Disorder II 2021',
 'PGC - Bipolar Disorder 2021',
 'PGC - Schizophrenia (2014)',
 'PGC - Anorexia Nervosa 2019',
 'PGC - PTSD 2019']
Code
from matplotlib.gridspec import GridSpec

plot_methods = {
    'tsvd' : 'tSVD',
    'ialm' : 'RobustPCA',
    'nnm'  : 'NNM',
    'nnm_sparse' : 'NNM-Sparse',
}
npcomp = 10

# trait_indices_sorted = list()
# for idx in trait_indices:
#     trait_indices_sorted += list(idx)

# trait_labels_sorted = [labels_readable[i] for i in trait_indices_sorted]
pcomp_colors  = {f"{i+1}": color for i, color in enumerate(mpl_stylesheet.kelly_colors() + mpl_stylesheet.banskt_colors())}
    
fig = plt.figure(figsize = (32, 32))
gs = GridSpec(nrows = len(plot_methods) + 1, ncols=1, figure=fig, height_ratios=[0.3] + [1 for i in plot_methods])
ax = [None for i in range(len(plot_methods) + 1)]
ax[0] = fig.add_subplot(gs[0, 0])

for i, m in enumerate(plot_methods.keys()):
    iplot = i + 1
    showxlabels = True if iplot == len(plot_methods) else False
    ax[iplot] = fig.add_subplot(gs[iplot, 0])
    structure_plot(ax[iplot], loadings[m], labels_readable, pcomp_colors, npcomp, showxlabels = showxlabels)
    ax[iplot].set_title(plot_methods[m], fontsize = 32)
    
plt_handles, plt_labels = ax[i].get_legend_handles_labels()
ax[0].legend(plt_handles, plt_labels, 
             loc = 'lower center', bbox_to_anchor=(0.5, 0), title = "Principal Components",
             frameon = False, handlelength = 8, ncol = 5)
for side, border in ax[0].spines.items():
    border.set_visible(False)
ax[0].tick_params(bottom = False, top = False, left = False, right = False,
                   labelbottom = False, labeltop = False, labelleft = False, labelright = False)

#legend(bbox_to_anchor=(1.04, 1), loc="upper left")

plt.tight_layout(h_pad = 2.0)

plt.savefig('../plots/probgen24/pgc_structure_plot.png', bbox_inches='tight')
plt.show()