Compare trans-eQTL p-values with and without non-EUR samples
Path to all data files
#collapse-hide
teqtl_all_dir = '/usr/users/sbanerj/trans-eQTL/gtex_v8_tejaas_trans_eQTLs_20200309/protein_coding_lncRNA_fixed_gamma_alt-haa-spl-pan-wb_combined_cut5e-8'
teqtl_eur_dir = '/cbscratch/sbanerj/trans-eqtl/dev-pipeline/gtex_v8_EUR_202006'
teqtl_amx_dir = '/cbscratch/sbanerj/trans-eqtl/dev-pipeline/gtex_v8_AMX_202006'
tissue_file = 'gtex/tissue_table.txt'
json_file = 'gtex/gtex_v8_metadata.json'
nsample_file = "gtex/tissue_nsamples.txt"
Python libraries
#collapse-hide
import os
import numpy as np
import json
import collections
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from utils import mpl_stylesheet
mpl_stylesheet.banskt_presentation(fontfamily = 'latex-clearsans', fontsize = 18, colors = 'banskt', dpi = 72)
from utils import utils
Read metadata for our trans-eQTLs
#collapse-hide
tshorts, tfulls, tstrings = utils.read_tissues(tissue_file)
with open(json_file) as instream:
gtex_meta = json.load(instream)
tissue_colors = dict()
tissue_names = dict()
tissue_nsamples = dict()
for tshort, tfull, tstring in zip(tshorts, tfulls, tstrings):
if tshort in tshorts:
tissue_names[tshort] = tstring
tissue_colors[tshort] = "#" + gtex_meta[tfull]["colorHex"]
tissue_nsamples = dict()
with open(nsample_file, 'r') as instream:
for line in instream:
tshort = line.strip().split()[0].strip()
tissue_nsamples[tshort] = int(line.strip().split()[1].strip())
brain_tissues = ['bam', 'ban', 'bca', 'bceh', 'bce', 'bco', 'bfr', 'bhi', 'bhy', 'bnu', 'bpu', 'bsp', 'bsu']
altsb_tissues = ['haa', 'pan', 'spl', 'wb']
Define some functions to read results
#collapse-hide
def read_transeqtls(filename):
idcol = 0
chrcol = 1
bpcol = 2
pvcol = 7
rsidlist = list()
chrmlist = list()
bplist = list()
pvallist = list()
with open(filename, 'r') as instream:
next(instream)
for line in instream:
linesplit = line.strip().split()
rsid = linesplit[idcol]
chrm = int(linesplit[chrcol])
bppos = int(linesplit[bpcol])
pval = float(linesplit[pvcol])
rsidlist.append(rsid)
chrmlist.append(chrm)
bplist.append(bppos)
pvallist.append(pval)
return rsidlist, chrmlist, pvallist, bplist
Read the results for trans-eQTLs
#collapse-hide
whichtissues = ['as', 'ms', 'sse']
RES_FIELDS = ['vall', 'veur']
class CompareRes(collections.namedtuple('_CompareRes', RES_FIELDS)):
__slots__ = ()
eurres = dict()
amxres = dict()
for tshort in whichtissues:
eurres[tshort] = list()
amxres[tshort] = list()
teqtl_all_file = os.path.join(teqtl_all_dir, tshort, 'trans_eqtls_ldpruned.txt')
teqtl_eur_file = os.path.join(teqtl_eur_dir, tshort,
'tejaas/raw_std/permnull_sb0.1_knn30',
'all_lead_transeqtl_rr.txt'
)
teqtl_amx_file = os.path.join(teqtl_amx_dir, tshort,
'tejaas/raw_std/permnull_sb0.1_knn30',
'all_lead_transeqtl_rr.txt'
)
allvarids, allchrms, allpvals, allbppos = read_transeqtls(teqtl_all_file)
eurvarids, eurchrms, eurpvals, eurbppos = read_transeqtls(teqtl_eur_file)
amxvarids, amxchrms, amxpvals, amxbppos = read_transeqtls(teqtl_amx_file)
for i, varid in enumerate(allvarids):
if varid in eurvarids:
j = eurvarids.index(varid)
eurres[tshort].append(CompareRes(vall = allpvals[i], veur = eurpvals[j]))
if varid in amxvarids:
j = amxvarids.index(varid)
amxres[tshort].append(CompareRes(vall = allpvals[i], veur = amxpvals[j]))
#collapse-hide
fig = plt.figure(figsize = (18, 12))
ax = list([0, 0, 0, 0, 0, 0])
ax[0] = fig.add_subplot(231)
ax[1] = fig.add_subplot(232)
ax[2] = fig.add_subplot(233)
ax[3] = fig.add_subplot(234)
ax[4] = fig.add_subplot(235)
ax[5] = fig.add_subplot(236)
for i, tshort in enumerate(whichtissues):
xvals = [-np.log10(x.vall) for x in eurres[tshort]]
yvals = [-np.log10(x.veur) for x in eurres[tshort]]
ax[i].scatter(xvals, yvals, alpha = 0.4, facecolor = 'tomato', edgecolor = 'maroon')
ax[i].set_xlabel('All samples')
ax[i].set_ylabel('Only EUR samples')
boxprops = dict(boxstyle='square, pad=0.6', facecolor='white',
edgecolor=tissue_colors[tshort], linewidth = 4)
ax[i].text(0.93, 0.93, tissue_names[tshort],
transform=ax[i].transAxes, verticalalignment='top', horizontalalignment = 'right', bbox=boxprops)
xvals = [-np.log10(x.vall) for x in amxres[tshort]]
yvals = [-np.log10(x.veur) for x in amxres[tshort]]
ax[i+3].scatter(xvals, yvals, alpha = 0.4, facecolor = 'tomato', edgecolor = 'maroon')
ax[i+3].set_xlabel('All samples')
ax[i+3].set_ylabel('Remove fraction of samples')
boxprops = dict(boxstyle='square, pad=0.6', facecolor='white',
edgecolor=tissue_colors[tshort], linewidth = 4)
ax[i+3].text(0.93, 0.93, tissue_names[tshort],
transform=ax[i+3].transAxes, verticalalignment='top', horizontalalignment = 'right', bbox=boxprops)
ax[i].xaxis.set_major_locator(MaxNLocator(integer=True))
ax[i].yaxis.set_major_locator(MaxNLocator(integer=True))
ax[i+3].xaxis.set_major_locator(MaxNLocator(integer=True))
ax[i+3].yaxis.set_major_locator(MaxNLocator(integer=True))
plt.tight_layout()
plt.show()
Load $p$ values for all samples
, EUR samples
and simulations
#collapse-hide
tshort = 'sse'
teqtl_eur_file = os.path.join(teqtl_eur_dir, tshort,
'tejaas/raw_std/permnull_sb0.1_knn30/chr21',
'rr.txt')
teqtl_all_file = os.path.join('/scratch/sbanerj/trans-eqtl/dev-pipeline/gtex_v8_202003', tshort,
'tejaas/raw_std/permnull_sb0.1_knn30/chr21',
'rr.txt'
)
teqtl_sim_file = os.path.join('/cbscratch/sbanerj/trans-eqtl/simulation',
'12639_450_10_800_30_100_100_0.01_0.5_0.0_1.0_0.6_4.0_0.1_20_0.02/sim001',
'tejaas/permnull_sb0.2/raw_knn30/peer0',
'rr.txt'
)
eurvarids, eurchrms, eurpvals, eurbppos = read_transeqtls(teqtl_eur_file)
allvarids, allchrms, allpvals, allbppos = read_transeqtls(teqtl_all_file)
simvarids, simchrms, simpvals, simbppos = read_transeqtls(teqtl_sim_file)
- Look at the $p$ value distribution without non-EUR samples, with all samples and simulations
#collapse-hide
fig = plt.figure(figsize = (18,6))
ax1 = fig.add_subplot(131)
ax2 = fig.add_subplot(132)
ax3 = fig.add_subplot(133)
ax1.hist(eurpvals, alpha = 0.7, bins = 50, lw = 0, density = True)
ax2.hist(allpvals, alpha = 0.7, bins = 50, lw = 0, density = True)
ax3.hist(simpvals, alpha = 0.7, bins = 50, lw = 0, density = True)
ax1.text(0.93, 0.93, 'Only EUR',
transform=ax1.transAxes, verticalalignment='top', horizontalalignment = 'right')
ax2.text(0.93, 0.93, 'All samples',
transform=ax2.transAxes, verticalalignment='top', horizontalalignment = 'right')
ax3.text(0.93, 0.93, 'Simulation',
transform=ax3.transAxes, verticalalignment='top', horizontalalignment = 'right')
for ax in [ax1, ax2, ax3]:
ax.set_ylim([0, 4.5])
plt.tight_layout()
plt.show()