nextflow.enable.dsl=2 // ================= CONTAINER IMAGES ================================================================ params.container_borzoi = 'harbor.cluster.omic.ai/omic/digital-patients/borzoi:latest' params.container_vcf2prot = 'harbor.cluster.omic.ai/omic/digital-patients/vcf2prot:latest' params.container_rna2protexpression = 'harbor.cluster.omic.ai/omic/digital-patients/rna2protexpression:latest' params.container_corto = 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest' params.container_ecotyper = 'harbor.cluster.omic.ai/omic/digital-patients/ecotyper:latest' params.container_synthea = 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf' // Container options - empty for k8s (k8s handles GPU scheduling via pod specs) params.containerOptions = '' params.containerOptions_synthea = '' params.containerOptions_borzoi = '' params.containerOptions_rna2protexpression = '' // ================================================================================================= // SYNTHEA params.n_pat = 2 params.percent_male = 0.5 params.disease = 'schizophrenia' params.project_name = 'test' // CIBERSORT params.cibersortx_username = "gabriel.richman.2009@anderson.ucla.edu" params.cibersortx_token = "b5b39e563fb947df4cfd4843d40fdb99" // ====================== FILEPATHS (PVC mount paths) =============================================== params.imputed_store = '/omic/eureka/digital-patients/imputed' params.pheno_store = '/omic/eureka/digital-patients/ukbb_phenotypes_filtered.csv' params.regulon = '/omic/eureka/digital-patients/regulon.rda' params.healthy_dir = '/omic/eureka/digital-patients/healthy' params.synthea_support_dir = '/omic/eureka/digital-patients/supporting-data' params.mane = '/omic/eureka/digital-patients/MANE.GRCh38.v1.3.update.tsv' params.signature_matrix = '/omic/eureka/digital-patients/LM22_sourceGEP_ensg.txt' params.outdir = '/omic/eureka/digital-patients/output' params.ecotyper_outdir = "${params.outdir}/ecotyper" // ====================== FILTERING PARAMETERS ===================================================== params.enable_filtering = true params.top_n_variants = 500 params.top_n_genes = 1000 params.transcriptome_log2fc_threshold = 1.5 params.top_n_proteins = 500 params.top_n_immune_cells = 20 params.top_n_metabolites = 50 params.metabolome_pvalue_threshold = 0.05 // ====================== INCLUDES ================================================================ //SYNTHEA include {get_disease_stats_no_patients} from './main_synthea.nf' include {generate_m_variants_cudf} from './main_synthea.nf' include {generate_f_variants_cudf} from './main_synthea.nf' include {make_vcfs} from './main_synthea.nf' include {generate_m_healthy_cudf} from './main_synthea.nf' include {generate_f_healthy_cudf} from './main_synthea.nf' //BORZOI include {FILTER_VCF} from './main_borzoi.nf' include {PREDICT_EXPRESSION} from './main_borzoi.nf' include {CREATE_PROTEIN_CLUSTER} from './main_borzoi.nf' //VCF2PROT include {VCF2PROT} from './main_vcf2prot.nf' //RNA2PROTEINEXPRESSION include {RNA2PROTEXPRESSION} from './main_rna2proteinexpression' //CORTO include {CORTO} from './main_corto.nf' //CIBERSORT include {CONVERT_TO_TXT} from './main_cibersortx.nf' include {CIBERSORTx_FRACTIONS} from './main_cibersortx.nf' include {CIBERSORTx_HIRES} from './main_cibersortx.nf' include {ADD_TISSUE_NAMES_TO_CIBERSORTX} from './main_cibersortx.nf' //FILTERING PROCESSES include {FILTER_VARIANTS} from './main_filter_outputs.nf' include {FILTER_TRANSCRIPTOME} from './main_filter_outputs.nf' include {FILTER_PROTEOME} from './main_filter_outputs.nf' include {FILTER_IMMUNE_CELLS} from './main_filter_outputs.nf' include {FILTER_METABOLOME} from './main_filter_outputs.nf' include {FILTER_MUTATED_PROTEINS} from './main_filter_outputs.nf' include {CREATE_SUMMARY_REPORT} from './main_filter_outputs.nf' workflow { pheno_store_ch = file(params.pheno_store) imputed_store_ch = file(params.imputed_store) synthea_support_ch = file(params.synthea_support_dir) regulon_ch = file(params.regulon) signature_matrix_ch = file(params.signature_matrix) mane_ch = file(params.mane) health_dir_ch = file(params.healthy_dir) //SYNTHEA switch (params.disease) { case 'healthy': generate_m_healthy_cudf(health_dir_ch) generate_f_healthy_cudf(health_dir_ch) m_healthy = generate_m_healthy_cudf.out f_healthy = generate_f_healthy_cudf.out txt_ch = f_healthy.mix(m_healthy).flatten() break default: get_disease_stats_no_patients(pheno_store_ch, imputed_store_ch) generate_m_variants_cudf(get_disease_stats_no_patients.out) generate_f_variants_cudf(get_disease_stats_no_patients.out) f_var = generate_f_variants_cudf.out m_var = generate_m_variants_cudf.out txt_ch = f_var.mix(m_var).flatten() } make_vcfs(txt_ch, synthea_support_ch) //BORZOI FILTER_VCF(mane_ch, make_vcfs.out) PREDICT_EXPRESSION(FILTER_VCF.out, mane_ch) //VCF2PROT VCF2PROT(make_vcfs.out, FILTER_VCF.out) //RNA2PROTEINEXPRESSION PREDICT_EXPRESSION.out .collect() .flatten() .set { rna_input } RNA2PROTEXPRESSION(rna_input) //CORTO CORTO(PREDICT_EXPRESSION.out, regulon_ch) //CIBERSORT signature_file = Channel.fromPath(signature_matrix_ch, checkIfExists: true) CONVERT_TO_TXT(PREDICT_EXPRESSION.out) CIBERSORTx_FRACTIONS(CONVERT_TO_TXT.out, signature_file) CIBERSORTx_HIRES(CONVERT_TO_TXT.out, CIBERSORTx_FRACTIONS.out, signature_file) ADD_TISSUE_NAMES_TO_CIBERSORTX(CONVERT_TO_TXT.out, CIBERSORTx_HIRES.out) // FILTERING STAGE if (params.enable_filtering) { FILTER_VARIANTS(make_vcfs.out) FILTER_TRANSCRIPTOME(PREDICT_EXPRESSION.out) FILTER_PROTEOME(RNA2PROTEXPRESSION.out) FILTER_IMMUNE_CELLS(ADD_TISSUE_NAMES_TO_CIBERSORTX.out) FILTER_METABOLOME(CORTO.out) FILTER_MUTATED_PROTEINS(VCF2PROT.out, FILTER_VARIANTS.out.filtered_vcf) CREATE_SUMMARY_REPORT( FILTER_TRANSCRIPTOME.out, FILTER_PROTEOME.out, FILTER_IMMUNE_CELLS.out, FILTER_METABOLOME.out, FILTER_VARIANTS.out.filtered_vcf ) } }