Initial commit: digital-patients pipeline (clean, no large files)

Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
This commit is contained in:
2026-03-26 15:15:23 +01:00
commit 9e6a16c19b
45 changed files with 7207 additions and 0 deletions

32
app_filter/vep.ini Normal file
View File

@@ -0,0 +1,32 @@
cache 1
dir_cache /data
assembly 'GRCh38'
offline 1
force_overwrite 1
custom file=/opt/vep/.vep/Cosmic_GenomeScreensMutant_v103_GRCh38.vcf.gz,short_name=COSMIC,format=vcf,type=exact,fields=LEGACY_ID%CNT
# Functional impact predictions
sift b # CALCULATE 1 - score
polyphen b # THESE TWO CAN BE AVERAGED
plugin REVEL,/opt/vep/.vep/REVEL/new_tabbed_revel_grch38.tsv.gz
# TODO: CALCUALTE THEIR L2 NORM
# plugin CADD,/opt/vep/.vep/CADD/GRCh38/whole_genome_SNVs.tsv.gz,/opt/vep/.vep/CADD/GRCh38/InDels.tsv.gz
# Clinical significance
check_existing
clinvar
# Allele frequencies
af
af_1kg
af_gnomade
af_gnomadg
max_af
# Additional useful flags
numbers
hgvs
symbol
canonical