nextflow.enable.dsl=2 // ================= IMAGES GO HERE ================================================================ params.containerOptions = '--rm' // '--gpus all --rm -v /mnt:/mnt' params.containerOptions_synthea = "--rm --privileged --gpus all" // -v /mnt:/mnt" params.containerOptions_borzoi = '--rm --gpus all' // '--gpus all --rm -v /mnt:/mnt' params.container_borzoi = 'harbor.cluster.omic.ai/omic/digital-patients/borzoi:latest' params.container_vcf2prot = "harbor.cluster.omic.ai/omic/digital-patients/vcf2prot:latest" params.container_rna2protexpression = 'harbor.cluster.omic.ai/omic/digital-patients/rna2protexpression:latest' params.containerOptions_rna2protexpression = '--gpus all --rm' // -v /mnt:/mnt -v /dbs:/dbs' // params.containerOptions_rna2protexpression = '--gpus all --rm -v /mnt:/mnt -v /dbs:/dbs' params.container_corto = 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest' params.container_ecotyper = 'harbor.cluster.omic.ai/omic/digital-patients/ecotyper:latest' params.container_synthea = 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf' // ================================================================================================= //SYNTHEA params.n_pat = 2 //10 //number of patients to generate, must be => 2 (one male one female) params.percent_male = 0.5 // between 0-1, percent of male patients //params.age = '18-80' //age range of the population, expressed as age-age //params.state = '"District of Columbia"' //'Hawaii' //'Texas' //'Georgia' //params.city = '' //'Washington' //'' //'Honolulu' //'Houston' //'Atlanta' // Keep both versions of disease params params.disease = 'schizophrenia' //'schizophrenia' //'healthy' //'leukaemia' //'leukaemia' //'Purpura and other haemorrhagic conditions' //['tongue cancer', 'dementia', 'arthritis'] //'tongue cancer' //'dementia' //params.n_var = 100 params.project_name = 'test' //CIBERSORT params.cibersortx_username = "gabriel.richman.2009@anderson.ucla.edu" params.cibersortx_token = "b5b39e563fb947df4cfd4843d40fdb99" // ====================== FILEPATHS HERE ========================================================================= params.imputed_store = '/mnt/Avatar/imputed/ukbb/imputed' //'/rosalind/ukbb/imputed' params.pheno_store ='/mnt/dreamdock-data/digital-patient-data/data/ukbb_phenotypes_filtered.csv' // '/mnt/Avatar/dd/synthea/metadata/ukbb_phenotypes_filtered.csv' //CORTO params.regulon = '/mnt/dreamdock-data/digital-patient-data/data/regulon.rda' // '/Workspace/next/registry/pipelines/digital_patient/regulon.rda' params.healthy_dir = '/mnt/dreamdock-data/digital-patient-data/healthy' // '/Workspace/next/registry/pipelines/digital_patient' //'/mnt/Avatar/digital_patient' // data copy is here params.synthea_support_dir = '/mnt/Avatar/dd/synthea/supporting-data/' //BORZOI params.mane = '/mnt/dreamdock-data/digital-patient-data/data/MANE.GRCh38.v1.3.update.tsv' // '/Workspace/next/registry/pipelines/digital_patient/MANE.GRCh38.v1.3.update.tsv' //CIBERSORT params.signature_matrix = '/mnt/dreamdock-data/digital-patient-data/data/LM22_sourceGEP_ensg.txt' // "/Workspace/next/registry/pipelines/digital_patient/LM22_sourceGEP_ensg.txt" params.outdir = '/mnt/dreamdock-data/digital-patient-data/out' // '/mnt/OmicNAS/dd/digital_patient/new' // params.outdir = '/data/digital-patients-data' // '/mnt/OmicNAS/dd/digital_patient/new' params.ecotyper_outdir = "${params.outdir}/ecotyper" // ====================== FILTERING PARAMETERS ==================================================================== // Enable/disable filtering params.enable_filtering = true // Filtering thresholds based on Gabe's requirements params.top_n_variants = 500 params.top_n_genes = 1000 params.transcriptome_log2fc_threshold = 1.5 params.top_n_proteins = 500 params.top_n_immune_cells = 20 params.top_n_metabolites = 50 params.metabolome_pvalue_threshold = 0.05 // =============================================================================================================== //SYNTHEA include {get_disease_stats_no_patients} from './main_synthea.nf' include {generate_m_variants_cudf} from './main_synthea.nf' include {generate_f_variants_cudf} from './main_synthea.nf' include {make_vcfs} from './main_synthea.nf' include {generate_m_healthy_cudf} from './main_synthea.nf' include {generate_f_healthy_cudf} from './main_synthea.nf' //BORZOI include {FILTER_VCF} from './main_borzoi.nf' include {PREDICT_EXPRESSION} from './main_borzoi.nf' include {CREATE_PROTEIN_CLUSTER} from './main_borzoi.nf' //VCF2PROT include {VCF2PROT} from './main_vcf2prot.nf' //RNA2PROTEINEXPRESSION include {RNA2PROTEXPRESSION} from './main_rna2proteinexpression' //CORTO include {CORTO} from './main_corto.nf' //CIBERSORT include {CONVERT_TO_TXT} from './main_cibersortx.nf' include {CIBERSORTx_FRACTIONS} from './main_cibersortx.nf' include {CIBERSORTx_HIRES} from './main_cibersortx.nf' include {ADD_TISSUE_NAMES_TO_CIBERSORTX} from './main_cibersortx.nf' //FILTERING PROCESSES include {FILTER_VARIANTS} from './main_filter_outputs.nf' include {FILTER_TRANSCRIPTOME} from './main_filter_outputs.nf' include {FILTER_PROTEOME} from './main_filter_outputs.nf' include {FILTER_IMMUNE_CELLS} from './main_filter_outputs.nf' include {FILTER_METABOLOME} from './main_filter_outputs.nf' include {FILTER_MUTATED_PROTEINS} from './main_filter_outputs.nf' include {CREATE_SUMMARY_REPORT} from './main_filter_outputs.nf' workflow { pheno_store_ch = file(params.pheno_store) imputed_store_ch = file(params.imputed_store) synthea_support_ch = file(params.synthea_support_dir) regulon_ch = file(params.regulon) signature_matrix_ch = file(params.signature_matrix) mane_ch = file(params.mane) health_dir_ch = file(params.healthy_dir) //SYNTHEA switch (params.disease) { case 'healthy': //healthy generate_m_healthy_cudf(health_dir_ch) generate_f_healthy_cudf(health_dir_ch) m_healthy = generate_m_healthy_cudf.out f_healthy = generate_f_healthy_cudf.out txt_ch = f_healthy.mix(m_healthy).flatten() break default: //disease get_disease_stats_no_patients(pheno_store_ch, imputed_store_ch) generate_m_variants_cudf(get_disease_stats_no_patients.out) generate_f_variants_cudf(get_disease_stats_no_patients.out) f_var = generate_f_variants_cudf.out m_var = generate_m_variants_cudf.out txt_ch = f_var.mix(m_var).flatten() } make_vcfs(txt_ch, synthea_support_ch) // TODO: ADD STEP TO CREATE AVERAGED COHORT (HEALTHY) // TODO: ADD STEP TO CREATE AVERAGED DISEASED // TODO: ADD STEP TO CALCULATE STATISTICS LOG2FC and ADJ PVAL //BORZOI // Prepare the VCF file(s) //vcf_ch = Channel.fromPath(make_vcfs.out) // THIS IS USES TO CREATE PROTEIN CLUSTER ON A SAME BORZOI INPUT AND SPLIT ONES TOO BIG FOR BORZOI (LEN = 524288) // THIS STEP NEEDS TO BE RUN TO CREATE DATA FOR NEXT STEP ONLY IF NEW REFERENCE/RNAS ARE GOING TO BE USED //CREATE_PROTEIN_CLUSTER(params.mane) FILTER_VCF(mane_ch, make_vcfs.out) PREDICT_EXPRESSION(FILTER_VCF.out, mane_ch) //VCF2PROT VCF2PROT(make_vcfs.out, FILTER_VCF.out) //RNA2PROTEINEXPRESSION // Updated RNA to protein expression step to run only a single process at a time, avoids CUDA out-of-memory errors. PREDICT_EXPRESSION.out .collect() .flatten() .set { rna_input } RNA2PROTEXPRESSION(rna_input) //CORTO CORTO(PREDICT_EXPRESSION.out, regulon_ch) //CIBERSORT signature_file = Channel.fromPath(signature_matrix_ch, checkIfExists: true) CONVERT_TO_TXT(PREDICT_EXPRESSION.out) CIBERSORTx_FRACTIONS(CONVERT_TO_TXT.out, signature_file) CIBERSORTx_HIRES(CONVERT_TO_TXT.out, CIBERSORTx_FRACTIONS.out, signature_file) ADD_TISSUE_NAMES_TO_CIBERSORTX(CONVERT_TO_TXT.out, CIBERSORTx_HIRES.out) // ==================== FILTERING STAGE ==================== if (params.enable_filtering) { // Filter variants FILTER_VARIANTS(make_vcfs.out) // Filter transcriptome FILTER_TRANSCRIPTOME(PREDICT_EXPRESSION.out) // Filter proteome FILTER_PROTEOME(RNA2PROTEXPRESSION.out) // Filter immune cells FILTER_IMMUNE_CELLS(ADD_TISSUE_NAMES_TO_CIBERSORTX.out) // Filter metabolome FILTER_METABOLOME(CORTO.out) // Filter mutated proteins FILTER_MUTATED_PROTEINS(VCF2PROT.out, FILTER_VARIANTS.out.filtered_vcf) // Create summary report CREATE_SUMMARY_REPORT( FILTER_TRANSCRIPTOME.out, FILTER_PROTEOME.out, FILTER_IMMUNE_CELLS.out, FILTER_METABOLOME.out, FILTER_VARIANTS.out.filtered_vcf ) } }