Initial commit: digital-patients pipeline (clean, no large files)

Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
This commit is contained in:
2026-03-26 15:15:23 +01:00
commit 9e6a16c19b
45 changed files with 7207 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
### Create reference file for borzoi-immun cibersortx step
import pandas as pd
MANE_data = pd.read_csv("MANE.GRCh38.v1.3.update.tsv", sep = '\t')
LM22_sourceGEP = pd.read_csv("LM22_sourceGEP.txt", sep = '\t')
MANE_data['ENSG'] = [i.split('.')[0] for i in MANE_data['Ensembl_Gene']]
mane_map = MANE_data[['symbol','ENSG']]
LM22_sourceGEP_ensg = mane_map.merge(LM22_sourceGEP, left_on='symbol', right_on='genesinput').dropna().drop_duplicates(subset=['symbol'])
LM22_sourceGEP_ensg = LM22_sourceGEP_ensg.drop(['symbol', 'genesinput'],axis = 1)
LM22_sourceGEP_ensg.to_csv('LM22_sourceGEP_ensg.txt', sep = '\t', index=False)