Initial commit: digital-patients pipeline (clean, no large files)
Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
This commit is contained in:
217
test.nf
Normal file
217
test.nf
Normal file
@@ -0,0 +1,217 @@
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
|
||||
// ================= IMAGES GO HERE ================================================================
|
||||
|
||||
params.containerOptions = '--rm' // '--gpus all --rm -v /mnt:/mnt'
|
||||
params.containerOptions_synthea = "--rm --privileged --gpus all" // -v /mnt:/mnt"
|
||||
params.containerOptions_borzoi = '--rm --gpus all' // '--gpus all --rm -v /mnt:/mnt'
|
||||
|
||||
params.container_borzoi = 'harbor.cluster.omic.ai/omic/digital-patients/borzoi:latest'
|
||||
params.container_vcf2prot = "harbor.cluster.omic.ai/omic/digital-patients/vcf2prot:latest"
|
||||
|
||||
params.container_rna2protexpression = 'harbor.cluster.omic.ai/omic/digital-patients/rna2protexpression:latest'
|
||||
params.containerOptions_rna2protexpression = '--gpus all --rm' // -v /mnt:/mnt -v /dbs:/dbs'
|
||||
// params.containerOptions_rna2protexpression = '--gpus all --rm -v /mnt:/mnt -v /dbs:/dbs'
|
||||
|
||||
params.container_corto = 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest'
|
||||
params.container_ecotyper = 'harbor.cluster.omic.ai/omic/digital-patients/ecotyper:latest'
|
||||
|
||||
params.container_synthea = 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf'
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
//SYNTHEA
|
||||
params.n_pat = 2 //10 //number of patients to generate, must be => 2 (one male one female)
|
||||
params.percent_male = 0.5 // between 0-1, percent of male patients
|
||||
//params.age = '18-80' //age range of the population, expressed as age-age
|
||||
//params.state = '"District of Columbia"' //'Hawaii' //'Texas' //'Georgia'
|
||||
//params.city = '' //'Washington' //'' //'Honolulu' //'Houston' //'Atlanta'
|
||||
|
||||
// Keep both versions of disease params
|
||||
params.disease = 'schizophrenia' //'schizophrenia' //'healthy' //'leukaemia' //'leukaemia' //'Purpura and other haemorrhagic conditions' //['tongue cancer', 'dementia', 'arthritis'] //'tongue cancer' //'dementia'
|
||||
|
||||
|
||||
//params.n_var = 100
|
||||
|
||||
|
||||
params.project_name = 'test'
|
||||
|
||||
//CIBERSORT
|
||||
params.cibersortx_username = "gabriel.richman.2009@anderson.ucla.edu"
|
||||
params.cibersortx_token = "b5b39e563fb947df4cfd4843d40fdb99"
|
||||
|
||||
|
||||
|
||||
// ====================== FILEPATHS HERE =========================================================================
|
||||
|
||||
|
||||
params.imputed_store = '/mnt/Avatar/imputed/ukbb/imputed' //'/rosalind/ukbb/imputed'
|
||||
params.pheno_store ='/mnt/dreamdock-data/digital-patient-data/data/ukbb_phenotypes_filtered.csv' // '/mnt/Avatar/dd/synthea/metadata/ukbb_phenotypes_filtered.csv'
|
||||
|
||||
//CORTO
|
||||
params.regulon = '/mnt/dreamdock-data/digital-patient-data/data/regulon.rda' // '/Workspace/next/registry/pipelines/digital_patient/regulon.rda'
|
||||
|
||||
params.healthy_dir = '/mnt/dreamdock-data/digital-patient-data/healthy' // '/Workspace/next/registry/pipelines/digital_patient' //'/mnt/Avatar/digital_patient' // data copy is here
|
||||
|
||||
params.synthea_support_dir = '/mnt/Avatar/dd/synthea/supporting-data/'
|
||||
|
||||
//BORZOI
|
||||
params.mane = '/mnt/dreamdock-data/digital-patient-data/data/MANE.GRCh38.v1.3.update.tsv' // '/Workspace/next/registry/pipelines/digital_patient/MANE.GRCh38.v1.3.update.tsv'
|
||||
|
||||
//CIBERSORT
|
||||
params.signature_matrix = '/mnt/dreamdock-data/digital-patient-data/data/LM22_sourceGEP_ensg.txt' // "/Workspace/next/registry/pipelines/digital_patient/LM22_sourceGEP_ensg.txt"
|
||||
|
||||
|
||||
params.outdir = '/mnt/dreamdock-data/digital-patient-data/out' // '/mnt/OmicNAS/dd/digital_patient/new'
|
||||
// params.outdir = '/data/digital-patients-data' // '/mnt/OmicNAS/dd/digital_patient/new'
|
||||
params.ecotyper_outdir = "${params.outdir}/ecotyper"
|
||||
|
||||
// ====================== FILTERING PARAMETERS ====================================================================
|
||||
|
||||
// Enable/disable filtering
|
||||
params.enable_filtering = true
|
||||
|
||||
// Filtering thresholds based on Gabe's requirements
|
||||
params.top_n_variants = 500
|
||||
params.top_n_genes = 1000
|
||||
params.transcriptome_log2fc_threshold = 1.5
|
||||
params.top_n_proteins = 500
|
||||
params.top_n_immune_cells = 20
|
||||
params.top_n_metabolites = 50
|
||||
params.metabolome_pvalue_threshold = 0.05
|
||||
|
||||
// ===============================================================================================================
|
||||
|
||||
//SYNTHEA
|
||||
include {get_disease_stats_no_patients} from './main_synthea.nf'
|
||||
include {generate_m_variants_cudf} from './main_synthea.nf'
|
||||
include {generate_f_variants_cudf} from './main_synthea.nf'
|
||||
include {make_vcfs} from './main_synthea.nf'
|
||||
include {generate_m_healthy_cudf} from './main_synthea.nf'
|
||||
include {generate_f_healthy_cudf} from './main_synthea.nf'
|
||||
|
||||
//BORZOI
|
||||
include {FILTER_VCF} from './main_borzoi.nf'
|
||||
include {PREDICT_EXPRESSION} from './main_borzoi.nf'
|
||||
include {CREATE_PROTEIN_CLUSTER} from './main_borzoi.nf'
|
||||
|
||||
//VCF2PROT
|
||||
include {VCF2PROT} from './main_vcf2prot.nf'
|
||||
|
||||
//RNA2PROTEINEXPRESSION
|
||||
include {RNA2PROTEXPRESSION} from './main_rna2proteinexpression'
|
||||
|
||||
//CORTO
|
||||
include {CORTO} from './main_corto.nf'
|
||||
|
||||
//CIBERSORT
|
||||
include {CONVERT_TO_TXT} from './main_cibersortx.nf'
|
||||
include {CIBERSORTx_FRACTIONS} from './main_cibersortx.nf'
|
||||
include {CIBERSORTx_HIRES} from './main_cibersortx.nf'
|
||||
include {ADD_TISSUE_NAMES_TO_CIBERSORTX} from './main_cibersortx.nf'
|
||||
|
||||
//FILTERING PROCESSES
|
||||
include {FILTER_VARIANTS} from './main_filter_outputs.nf'
|
||||
include {FILTER_TRANSCRIPTOME} from './main_filter_outputs.nf'
|
||||
include {FILTER_PROTEOME} from './main_filter_outputs.nf'
|
||||
include {FILTER_IMMUNE_CELLS} from './main_filter_outputs.nf'
|
||||
include {FILTER_METABOLOME} from './main_filter_outputs.nf'
|
||||
include {FILTER_MUTATED_PROTEINS} from './main_filter_outputs.nf'
|
||||
include {CREATE_SUMMARY_REPORT} from './main_filter_outputs.nf'
|
||||
|
||||
|
||||
workflow {
|
||||
pheno_store_ch = file(params.pheno_store)
|
||||
imputed_store_ch = file(params.imputed_store)
|
||||
synthea_support_ch = file(params.synthea_support_dir)
|
||||
regulon_ch = file(params.regulon)
|
||||
signature_matrix_ch = file(params.signature_matrix)
|
||||
mane_ch = file(params.mane)
|
||||
health_dir_ch = file(params.healthy_dir)
|
||||
|
||||
//SYNTHEA
|
||||
switch (params.disease) {
|
||||
case 'healthy':
|
||||
//healthy
|
||||
generate_m_healthy_cudf(health_dir_ch)
|
||||
generate_f_healthy_cudf(health_dir_ch)
|
||||
m_healthy = generate_m_healthy_cudf.out
|
||||
f_healthy = generate_f_healthy_cudf.out
|
||||
txt_ch = f_healthy.mix(m_healthy).flatten()
|
||||
break
|
||||
default:
|
||||
//disease
|
||||
get_disease_stats_no_patients(pheno_store_ch, imputed_store_ch)
|
||||
generate_m_variants_cudf(get_disease_stats_no_patients.out)
|
||||
generate_f_variants_cudf(get_disease_stats_no_patients.out)
|
||||
f_var = generate_f_variants_cudf.out
|
||||
m_var = generate_m_variants_cudf.out
|
||||
txt_ch = f_var.mix(m_var).flatten()
|
||||
}
|
||||
make_vcfs(txt_ch, synthea_support_ch)
|
||||
|
||||
// TODO: ADD STEP TO CREATE AVERAGED COHORT (HEALTHY)
|
||||
// TODO: ADD STEP TO CREATE AVERAGED DISEASED
|
||||
// TODO: ADD STEP TO CALCULATE STATISTICS LOG2FC and ADJ PVAL
|
||||
|
||||
//BORZOI
|
||||
// Prepare the VCF file(s)
|
||||
//vcf_ch = Channel.fromPath(make_vcfs.out)
|
||||
// THIS IS USES TO CREATE PROTEIN CLUSTER ON A SAME BORZOI INPUT AND SPLIT ONES TOO BIG FOR BORZOI (LEN = 524288)
|
||||
// THIS STEP NEEDS TO BE RUN TO CREATE DATA FOR NEXT STEP ONLY IF NEW REFERENCE/RNAS ARE GOING TO BE USED
|
||||
//CREATE_PROTEIN_CLUSTER(params.mane)
|
||||
FILTER_VCF(mane_ch, make_vcfs.out)
|
||||
PREDICT_EXPRESSION(FILTER_VCF.out, mane_ch)
|
||||
|
||||
//VCF2PROT
|
||||
VCF2PROT(make_vcfs.out, FILTER_VCF.out)
|
||||
|
||||
//RNA2PROTEINEXPRESSION
|
||||
// Updated RNA to protein expression step to run only a single process at a time, avoids CUDA out-of-memory errors.
|
||||
PREDICT_EXPRESSION.out
|
||||
.collect()
|
||||
.flatten()
|
||||
.set { rna_input }
|
||||
RNA2PROTEXPRESSION(rna_input)
|
||||
|
||||
//CORTO
|
||||
CORTO(PREDICT_EXPRESSION.out, regulon_ch)
|
||||
|
||||
//CIBERSORT
|
||||
signature_file = Channel.fromPath(signature_matrix_ch, checkIfExists: true)
|
||||
CONVERT_TO_TXT(PREDICT_EXPRESSION.out)
|
||||
CIBERSORTx_FRACTIONS(CONVERT_TO_TXT.out, signature_file)
|
||||
CIBERSORTx_HIRES(CONVERT_TO_TXT.out, CIBERSORTx_FRACTIONS.out, signature_file)
|
||||
ADD_TISSUE_NAMES_TO_CIBERSORTX(CONVERT_TO_TXT.out, CIBERSORTx_HIRES.out)
|
||||
|
||||
// ==================== FILTERING STAGE ====================
|
||||
if (params.enable_filtering) {
|
||||
// Filter variants
|
||||
FILTER_VARIANTS(make_vcfs.out)
|
||||
|
||||
// Filter transcriptome
|
||||
FILTER_TRANSCRIPTOME(PREDICT_EXPRESSION.out)
|
||||
|
||||
// Filter proteome
|
||||
FILTER_PROTEOME(RNA2PROTEXPRESSION.out)
|
||||
|
||||
// Filter immune cells
|
||||
FILTER_IMMUNE_CELLS(ADD_TISSUE_NAMES_TO_CIBERSORTX.out)
|
||||
|
||||
// Filter metabolome
|
||||
FILTER_METABOLOME(CORTO.out)
|
||||
|
||||
// Filter mutated proteins
|
||||
FILTER_MUTATED_PROTEINS(VCF2PROT.out, FILTER_VARIANTS.out.filtered_vcf)
|
||||
|
||||
// Create summary report
|
||||
CREATE_SUMMARY_REPORT(
|
||||
FILTER_TRANSCRIPTOME.out,
|
||||
FILTER_PROTEOME.out,
|
||||
FILTER_IMMUNE_CELLS.out,
|
||||
FILTER_METABOLOME.out,
|
||||
FILTER_VARIANTS.out.filtered_vcf
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user