hex_colors = [
'#2D69C4', # blue
'#CC2529', # red
'#93AA00', # Vivid Yellowish Green
'#535154', # gray
'#6B4C9A', # purple
'#FFB300', # Vivid Yellow
'#922428', # dark brown
'#948B3D', # olive
]
hex_colors_40 = [
"#3c3c3c0d",
"#084609",
"#ff4ff4",
"#01d94a",
"#b700ce",
"#91c900",
"#5f42ed",
"#5fa200",
"#8d6dff",
"#c9f06b",
"#0132a7",
"#ffbb1f",
"#0080ed",
"#f56600",
"#3afaf5",
"#c10001",
"#01e698",
"#a20096",
"#00e2c1",
"#ff5ac8",
"#008143",
"#cd0057",
"#4aeeff",
"#8c001a",
"#b5f2a2",
"#5d177d",
"#a99900",
"#e299ff",
"#5b6b00",
"#96aeff",
"#a46f00",
"#007acb",
"#ff9757",
"#00a8e0",
"#ff708e",
"#baefc7",
"#622b25",
"#c8c797",
"#885162",
"#ffb7a5",
"#ffa3c3"]
trait_type_unique = trait_df['trait_type'].unique().tolist()
trait_type_dict = {
trait: color for trait, color in zip(
trait_type_unique,
hex_colors[:len(trait_type_unique)])
}
llm_methods = [
"ls-da3m0ns/bge_large_medical",
"medicalai/ClinicalBERT",
"emilyalsentzer/Bio_ClinicalBERT",
]
llm_ctypes = ["community", "kmeans"]
llm_clusters = {method : { x : None for x in llm_ctypes } for method in llm_methods}
llm_outdir = "/gpfs/commons/home/sbanerjee/work/npd/PanUKB/results/llm"
for method in llm_methods:
for ctype in llm_ctypes:
m_filename = os.path.join(llm_outdir, f"{method}/{ctype}_clusters.pkl")
with open(m_filename, "rb") as fh:
llm_clusters[method][ctype] = pickle.load(fh)
def get_llm_cluster_index(selectidx, method, ctype):
clusteridx = np.full([selectidx.shape[0],], -1)
for i, ccomps in enumerate(llm_clusters[method][ctype]):
for idx in ccomps:
clusteridx[idx] = i
return clusteridx