Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
12 lines
624 B
Python
12 lines
624 B
Python
### Create reference file for borzoi-immun cibersortx step
|
|
import pandas as pd
|
|
|
|
MANE_data = pd.read_csv("MANE.GRCh38.v1.3.update.tsv", sep = '\t')
|
|
LM22_sourceGEP = pd.read_csv("LM22_sourceGEP.txt", sep = '\t')
|
|
|
|
MANE_data['ENSG'] = [i.split('.')[0] for i in MANE_data['Ensembl_Gene']]
|
|
mane_map = MANE_data[['symbol','ENSG']]
|
|
LM22_sourceGEP_ensg = mane_map.merge(LM22_sourceGEP, left_on='symbol', right_on='genesinput').dropna().drop_duplicates(subset=['symbol'])
|
|
LM22_sourceGEP_ensg = LM22_sourceGEP_ensg.drop(['symbol', 'genesinput'],axis = 1)
|
|
LM22_sourceGEP_ensg.to_csv('LM22_sourceGEP_ensg.txt', sep = '\t', index=False)
|