Configure pipeline for WES execution on k8s cluster
- Add main.nf as WES entry point with PVC-based paths - Update nextflow.config with k8s/k8s_gpu profiles (eureka-pvc) - Update params.json defaults to /omic/eureka/digital-patients/ paths - Remove stageInMode 'copy' from corto and vcf2prot for k8s compat - Add reassemble.nf for one-time chunk reassembly on PVC
This commit is contained in:
161
main.nf
Normal file
161
main.nf
Normal file
@@ -0,0 +1,161 @@
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// ================= CONTAINER IMAGES ================================================================
|
||||
|
||||
params.container_borzoi = 'harbor.cluster.omic.ai/omic/digital-patients/borzoi:latest'
|
||||
params.container_vcf2prot = 'harbor.cluster.omic.ai/omic/digital-patients/vcf2prot:latest'
|
||||
params.container_rna2protexpression = 'harbor.cluster.omic.ai/omic/digital-patients/rna2protexpression:latest'
|
||||
params.container_corto = 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest'
|
||||
params.container_ecotyper = 'harbor.cluster.omic.ai/omic/digital-patients/ecotyper:latest'
|
||||
params.container_synthea = 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf'
|
||||
|
||||
// Container options - empty for k8s (k8s handles GPU scheduling via pod specs)
|
||||
params.containerOptions = ''
|
||||
params.containerOptions_synthea = ''
|
||||
params.containerOptions_borzoi = ''
|
||||
params.containerOptions_rna2protexpression = ''
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// SYNTHEA
|
||||
params.n_pat = 2
|
||||
params.percent_male = 0.5
|
||||
params.disease = 'schizophrenia'
|
||||
params.project_name = 'test'
|
||||
|
||||
// CIBERSORT
|
||||
params.cibersortx_username = "gabriel.richman.2009@anderson.ucla.edu"
|
||||
params.cibersortx_token = "b5b39e563fb947df4cfd4843d40fdb99"
|
||||
|
||||
// ====================== FILEPATHS (PVC mount paths) ===============================================
|
||||
|
||||
params.imputed_store = '/omic/eureka/digital-patients/imputed'
|
||||
params.pheno_store = '/omic/eureka/digital-patients/ukbb_phenotypes_filtered.csv'
|
||||
params.regulon = '/omic/eureka/digital-patients/regulon.rda'
|
||||
params.healthy_dir = '/omic/eureka/digital-patients/healthy'
|
||||
params.synthea_support_dir = '/omic/eureka/digital-patients/supporting-data'
|
||||
params.mane = '/omic/eureka/digital-patients/MANE.GRCh38.v1.3.update.tsv'
|
||||
params.signature_matrix = '/omic/eureka/digital-patients/LM22_sourceGEP_ensg.txt'
|
||||
params.outdir = '/omic/eureka/digital-patients/output'
|
||||
params.ecotyper_outdir = "${params.outdir}/ecotyper"
|
||||
|
||||
// ====================== FILTERING PARAMETERS =====================================================
|
||||
|
||||
params.enable_filtering = true
|
||||
params.top_n_variants = 500
|
||||
params.top_n_genes = 1000
|
||||
params.transcriptome_log2fc_threshold = 1.5
|
||||
params.top_n_proteins = 500
|
||||
params.top_n_immune_cells = 20
|
||||
params.top_n_metabolites = 50
|
||||
params.metabolome_pvalue_threshold = 0.05
|
||||
|
||||
// ====================== INCLUDES ================================================================
|
||||
|
||||
//SYNTHEA
|
||||
include {get_disease_stats_no_patients} from './main_synthea.nf'
|
||||
include {generate_m_variants_cudf} from './main_synthea.nf'
|
||||
include {generate_f_variants_cudf} from './main_synthea.nf'
|
||||
include {make_vcfs} from './main_synthea.nf'
|
||||
include {generate_m_healthy_cudf} from './main_synthea.nf'
|
||||
include {generate_f_healthy_cudf} from './main_synthea.nf'
|
||||
|
||||
//BORZOI
|
||||
include {FILTER_VCF} from './main_borzoi.nf'
|
||||
include {PREDICT_EXPRESSION} from './main_borzoi.nf'
|
||||
include {CREATE_PROTEIN_CLUSTER} from './main_borzoi.nf'
|
||||
|
||||
//VCF2PROT
|
||||
include {VCF2PROT} from './main_vcf2prot.nf'
|
||||
|
||||
//RNA2PROTEINEXPRESSION
|
||||
include {RNA2PROTEXPRESSION} from './main_rna2proteinexpression'
|
||||
|
||||
//CORTO
|
||||
include {CORTO} from './main_corto.nf'
|
||||
|
||||
//CIBERSORT
|
||||
include {CONVERT_TO_TXT} from './main_cibersortx.nf'
|
||||
include {CIBERSORTx_FRACTIONS} from './main_cibersortx.nf'
|
||||
include {CIBERSORTx_HIRES} from './main_cibersortx.nf'
|
||||
include {ADD_TISSUE_NAMES_TO_CIBERSORTX} from './main_cibersortx.nf'
|
||||
|
||||
//FILTERING PROCESSES
|
||||
include {FILTER_VARIANTS} from './main_filter_outputs.nf'
|
||||
include {FILTER_TRANSCRIPTOME} from './main_filter_outputs.nf'
|
||||
include {FILTER_PROTEOME} from './main_filter_outputs.nf'
|
||||
include {FILTER_IMMUNE_CELLS} from './main_filter_outputs.nf'
|
||||
include {FILTER_METABOLOME} from './main_filter_outputs.nf'
|
||||
include {FILTER_MUTATED_PROTEINS} from './main_filter_outputs.nf'
|
||||
include {CREATE_SUMMARY_REPORT} from './main_filter_outputs.nf'
|
||||
|
||||
|
||||
workflow {
|
||||
pheno_store_ch = file(params.pheno_store)
|
||||
imputed_store_ch = file(params.imputed_store)
|
||||
synthea_support_ch = file(params.synthea_support_dir)
|
||||
regulon_ch = file(params.regulon)
|
||||
signature_matrix_ch = file(params.signature_matrix)
|
||||
mane_ch = file(params.mane)
|
||||
health_dir_ch = file(params.healthy_dir)
|
||||
|
||||
//SYNTHEA
|
||||
switch (params.disease) {
|
||||
case 'healthy':
|
||||
generate_m_healthy_cudf(health_dir_ch)
|
||||
generate_f_healthy_cudf(health_dir_ch)
|
||||
m_healthy = generate_m_healthy_cudf.out
|
||||
f_healthy = generate_f_healthy_cudf.out
|
||||
txt_ch = f_healthy.mix(m_healthy).flatten()
|
||||
break
|
||||
default:
|
||||
get_disease_stats_no_patients(pheno_store_ch, imputed_store_ch)
|
||||
generate_m_variants_cudf(get_disease_stats_no_patients.out)
|
||||
generate_f_variants_cudf(get_disease_stats_no_patients.out)
|
||||
f_var = generate_f_variants_cudf.out
|
||||
m_var = generate_m_variants_cudf.out
|
||||
txt_ch = f_var.mix(m_var).flatten()
|
||||
}
|
||||
make_vcfs(txt_ch, synthea_support_ch)
|
||||
|
||||
//BORZOI
|
||||
FILTER_VCF(mane_ch, make_vcfs.out)
|
||||
PREDICT_EXPRESSION(FILTER_VCF.out, mane_ch)
|
||||
|
||||
//VCF2PROT
|
||||
VCF2PROT(make_vcfs.out, FILTER_VCF.out)
|
||||
|
||||
//RNA2PROTEINEXPRESSION
|
||||
PREDICT_EXPRESSION.out
|
||||
.collect()
|
||||
.flatten()
|
||||
.set { rna_input }
|
||||
RNA2PROTEXPRESSION(rna_input)
|
||||
|
||||
//CORTO
|
||||
CORTO(PREDICT_EXPRESSION.out, regulon_ch)
|
||||
|
||||
//CIBERSORT
|
||||
signature_file = Channel.fromPath(signature_matrix_ch, checkIfExists: true)
|
||||
CONVERT_TO_TXT(PREDICT_EXPRESSION.out)
|
||||
CIBERSORTx_FRACTIONS(CONVERT_TO_TXT.out, signature_file)
|
||||
CIBERSORTx_HIRES(CONVERT_TO_TXT.out, CIBERSORTx_FRACTIONS.out, signature_file)
|
||||
ADD_TISSUE_NAMES_TO_CIBERSORTX(CONVERT_TO_TXT.out, CIBERSORTx_HIRES.out)
|
||||
|
||||
// FILTERING STAGE
|
||||
if (params.enable_filtering) {
|
||||
FILTER_VARIANTS(make_vcfs.out)
|
||||
FILTER_TRANSCRIPTOME(PREDICT_EXPRESSION.out)
|
||||
FILTER_PROTEOME(RNA2PROTEXPRESSION.out)
|
||||
FILTER_IMMUNE_CELLS(ADD_TISSUE_NAMES_TO_CIBERSORTX.out)
|
||||
FILTER_METABOLOME(CORTO.out)
|
||||
FILTER_MUTATED_PROTEINS(VCF2PROT.out, FILTER_VARIANTS.out.filtered_vcf)
|
||||
CREATE_SUMMARY_REPORT(
|
||||
FILTER_TRANSCRIPTOME.out,
|
||||
FILTER_PROTEOME.out,
|
||||
FILTER_IMMUNE_CELLS.out,
|
||||
FILTER_METABOLOME.out,
|
||||
FILTER_VARIANTS.out.filtered_vcf
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -8,7 +8,6 @@ process CORTO {
|
||||
publishDir "${params.outdir}/${params.project_name}", mode: 'copy'
|
||||
// debug true
|
||||
// maxForks 1
|
||||
stageInMode 'copy'
|
||||
|
||||
input:
|
||||
path TPM
|
||||
|
||||
@@ -7,7 +7,6 @@ process VCF2PROT {
|
||||
containerOptions "${params.containerOptions}"
|
||||
// echo true
|
||||
publishDir "${params.outdir}/vcf2prot", mode: 'copy'
|
||||
stageInMode 'copy'
|
||||
maxForks 1
|
||||
|
||||
input:
|
||||
|
||||
105
nextflow.config
105
nextflow.config
@@ -1,98 +1,47 @@
|
||||
manifest {
|
||||
name = 'digital patients'
|
||||
name = 'digital-patients'
|
||||
author = 'omic'
|
||||
recurseSubmodules = true
|
||||
homePage = 'https://gitlab.com/omic/next/registry/pipelines/digitalpatients'
|
||||
homePage = 'https://trs-gitea.cluster.omic.ai/omic/digital-patients'
|
||||
description = 'generative digital patients and multi-omics pipeline'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=21.04.3'
|
||||
defaultBranch = 'master'
|
||||
defaultBranch = 'main'
|
||||
}
|
||||
|
||||
// docker {
|
||||
// enabled = true
|
||||
// temp = 'auto'
|
||||
// }
|
||||
|
||||
// process {
|
||||
// withLabel: 'gpu_process' {
|
||||
// containerOptions = '--gpus all --rm'
|
||||
// }
|
||||
//
|
||||
// withLabel: 'cpu_process' {
|
||||
// containerOptions = '--rm'
|
||||
// }
|
||||
// }
|
||||
|
||||
def sharedPod = [
|
||||
[env: 'NXF_DEBUG', value: '0'],
|
||||
[label: 'omic-app', value: 'digitalpatients'],
|
||||
[imagePullSecret: 'gitlab-registry-secret'],
|
||||
[volumeClaim: 'avatar-new', mountPath: '/mnt/Avatar/'],
|
||||
]
|
||||
|
||||
profiles {
|
||||
standard {
|
||||
docker {
|
||||
docker.enabled = true
|
||||
enabled = true
|
||||
temp = 'auto'
|
||||
}
|
||||
}
|
||||
|
||||
k8s_gpu {
|
||||
process {
|
||||
executor = 'local'
|
||||
|
||||
withLabel: 'gpu_process' {
|
||||
maxForks = 1 // Only one GPU task at a time on single GPU system
|
||||
containerOptions = '--gpus all --rm'
|
||||
}
|
||||
|
||||
withLabel: 'cpu_process' {
|
||||
containerOptions = '--rm'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
k8s {
|
||||
process {
|
||||
|
||||
executor = 'k8s'
|
||||
namespace = 'bioinformatics'
|
||||
debug = true
|
||||
|
||||
pod = sharedPod
|
||||
|
||||
withLabel: 'gpu_process' {
|
||||
|
||||
pod = sharedPod + [
|
||||
[nodeSelector: [gpu: 'yes', 'gpu-type': 'geforce-rtx-3090']]
|
||||
]
|
||||
pod = [[nodeSelector: 'nvidia.com/gpu.present=true']]
|
||||
accelerator = [request: 1, type: 'nvidia.com/gpu']
|
||||
}
|
||||
docker {
|
||||
enabled = true
|
||||
}
|
||||
k8s {
|
||||
storageClaimName = 'eureka-pvc'
|
||||
storageMountPath = '/omic/eureka'
|
||||
}
|
||||
}
|
||||
|
||||
workDir = "/mnt/dreamdock-data/digital-patient-data/work"
|
||||
|
||||
k8s {
|
||||
serviceAccount = 'nextflow-sa'
|
||||
namespace = 'bioinformatics'
|
||||
storageClaimName = 'dreamdock-data'
|
||||
storageMountPath = '/mnt/dreamdock-data'
|
||||
|
||||
pullPolicy = 'IfNotPresent'
|
||||
cleanup = true // delete pods after Ctrl+C or finished?
|
||||
// cleanup = false // delete pods after Ctrl+C or finished?
|
||||
|
||||
|
||||
// RUN AS DIFFERENT USERS
|
||||
// securityContext = [fsGroup: 1000]
|
||||
// securityContext = [
|
||||
// runAsUser: 1000,
|
||||
// fsGroup: 1000,
|
||||
// runAsNonRoot: true
|
||||
// ]
|
||||
process {
|
||||
executor = 'k8s'
|
||||
}
|
||||
docker {
|
||||
enabled = true
|
||||
}
|
||||
k8s {
|
||||
storageClaimName = 'eureka-pvc'
|
||||
storageMountPath = '/omic/eureka'
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Use container image
|
||||
// process.container = 'harbor.cluster.omic.ai/omic/faiss-indexer:latest'
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
352
params.json
352
params.json
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"params": {
|
||||
"// SYNTHEA PARAMETERS": {},
|
||||
"n_pat": {
|
||||
"type": "integer",
|
||||
"description": "Number of patients to generate (must be >= 2)",
|
||||
@@ -10,9 +9,7 @@
|
||||
"var_name": "params.n_pat",
|
||||
"examples": [2, 10, 100],
|
||||
"pattern": "^[0-9]+$",
|
||||
"validation": {
|
||||
"min": 2
|
||||
},
|
||||
"validation": { "min": 2 },
|
||||
"notes": "Minimum of 2 patients required (one male, one female)"
|
||||
},
|
||||
"percent_male": {
|
||||
@@ -23,42 +20,9 @@
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.percent_male",
|
||||
"examples": [0.0, 0.5, 1.0],
|
||||
"pattern": "^0(\\.\\d+)?|1(\\.0)?$",
|
||||
"validation": {
|
||||
"min": 0.0,
|
||||
"max": 1.0
|
||||
},
|
||||
"validation": { "min": 0.0, "max": 1.0 },
|
||||
"notes": "Value must be between 0 and 1 inclusive"
|
||||
},
|
||||
"imputed_store": {
|
||||
"type": "folder",
|
||||
"description": "Path to imputed UKBB data",
|
||||
"default": "/mnt/Avatar/imputed/ukbb/imputed",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.imputed_store",
|
||||
"examples": [
|
||||
"/mnt/Avatar/imputed/ukbb/imputed",
|
||||
"/rosalind/ukbb/imputed"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Directory containing imputed UKBB data"
|
||||
},
|
||||
"pheno_store": {
|
||||
"type": "file",
|
||||
"description": "Path to phenotype data file",
|
||||
"default": "/mnt/Avatar/dd/synthea/metadata/ukbb_phenotypes_filtered.csv",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.pheno_store",
|
||||
"examples": [
|
||||
"/mnt/Avatar/dd/synthea/metadata/ukbb_phenotypes_filtered.csv"
|
||||
],
|
||||
"pattern": ".*\\.csv$",
|
||||
"validation": {},
|
||||
"notes": "CSV file containing filtered UKBB phenotype data"
|
||||
},
|
||||
"disease": {
|
||||
"type": "string",
|
||||
"description": "Disease or condition to simulate",
|
||||
@@ -66,83 +30,10 @@
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.disease",
|
||||
"examples": [
|
||||
"schizophrenia",
|
||||
"healthy",
|
||||
"leukaemia",
|
||||
"Purpura and other haemorrhagic conditions"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"enum": [
|
||||
"schizophrenia",
|
||||
"healthy",
|
||||
"leukaemia",
|
||||
"Purpura and other haemorrhagic conditions"
|
||||
],
|
||||
"validation": {},
|
||||
"examples": ["schizophrenia", "healthy", "leukaemia"],
|
||||
"enum": ["schizophrenia", "healthy", "leukaemia", "Purpura and other haemorrhagic conditions"],
|
||||
"notes": "Use 'healthy' for healthy individuals or specify a disease condition"
|
||||
},
|
||||
"healthy_dir": {
|
||||
"type": "folder",
|
||||
"description": "Path to healthy patient data directory",
|
||||
"default": "/Workspace/next/registry/pipelines/digital_patient",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.healthy_dir",
|
||||
"examples": [
|
||||
"/Workspace/next/registry/pipelines/digital_patient",
|
||||
"/mnt/Avatar/digital_patient"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Directory containing data for healthy patient generation"
|
||||
},
|
||||
"outdir": {
|
||||
"type": "folder",
|
||||
"description": "Output directory for results",
|
||||
"default": "/mnt/omic-next-apis/wes/digital_patients",
|
||||
"required": true,
|
||||
"pipeline_io": "output",
|
||||
"var_name": "params.outdir",
|
||||
"examples": [
|
||||
"/mnt/OmicNAS/dd/digital_patient",
|
||||
"/path/to/custom/output"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Directory where all pipeline results will be stored"
|
||||
},
|
||||
"// BORZOI PARAMETERS": {},
|
||||
"container_borzoi": {
|
||||
"type": "string",
|
||||
"description": "Borzoi container image",
|
||||
"default": "borzoi:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_borzoi",
|
||||
"examples": [
|
||||
"borzoi:latest",
|
||||
"borzoi:v1.0"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Docker container image for Borzoi module"
|
||||
},
|
||||
"containerOptions": {
|
||||
"type": "string",
|
||||
"description": "Container runtime options",
|
||||
"default": "--gpus all --rm -v /mnt:/mnt",
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.containerOptions",
|
||||
"examples": [
|
||||
"--gpus all --rm -v /mnt:/mnt",
|
||||
"--rm -v /data:/data"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Docker container runtime options for GPU usage and volume mounts"
|
||||
},
|
||||
"project_name": {
|
||||
"type": "string",
|
||||
"description": "Project identifier",
|
||||
@@ -150,106 +41,92 @@
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.project_name",
|
||||
"examples": [
|
||||
"test",
|
||||
"production",
|
||||
"schizophrenia_study"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Identifier for the digital patient project"
|
||||
"examples": ["test", "production", "schizophrenia_study"]
|
||||
},
|
||||
"imputed_store": {
|
||||
"type": "folder",
|
||||
"description": "Path to imputed UKBB data directory",
|
||||
"default": "/omic/eureka/digital-patients/imputed",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.imputed_store",
|
||||
"examples": ["/omic/eureka/digital-patients/imputed"],
|
||||
"notes": "Directory containing imputed UKBB GWAS .bgz files"
|
||||
},
|
||||
"pheno_store": {
|
||||
"type": "file",
|
||||
"description": "Path to phenotype data file",
|
||||
"default": "/omic/eureka/digital-patients/ukbb_phenotypes_filtered.csv",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.pheno_store",
|
||||
"examples": ["/omic/eureka/digital-patients/ukbb_phenotypes_filtered.csv"],
|
||||
"pattern": ".*\\.csv$",
|
||||
"notes": "CSV file containing filtered UKBB phenotype data"
|
||||
},
|
||||
"healthy_dir": {
|
||||
"type": "folder",
|
||||
"description": "Path to healthy patient data directory (gnomad files)",
|
||||
"default": "/omic/eureka/digital-patients/healthy",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.healthy_dir",
|
||||
"examples": ["/omic/eureka/digital-patients/healthy"],
|
||||
"notes": "Directory containing gnomad.genomes.v4.1.sites.{male,female}.txt"
|
||||
},
|
||||
"synthea_support_dir": {
|
||||
"type": "folder",
|
||||
"description": "Path to Synthea supporting data (VCF templates, liftover, genome)",
|
||||
"default": "/omic/eureka/digital-patients/supporting-data",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.synthea_support_dir",
|
||||
"examples": ["/omic/eureka/digital-patients/supporting-data"],
|
||||
"notes": "Must contain vcf/vcf_template.vcf, ucsc-liftover/hg19ToHg38.over.chain.gz, genome/hg38.fa"
|
||||
},
|
||||
"mane": {
|
||||
"type": "file",
|
||||
"description": "Path to MANE reference file",
|
||||
"default": "/Workspace/next/registry/pipelines/digital_patient/MANE.GRCh38.v1.3.update.tsv",
|
||||
"default": "/omic/eureka/digital-patients/MANE.GRCh38.v1.3.update.tsv",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.mane",
|
||||
"examples": [
|
||||
"/Workspace/next/registry/pipelines/digital_patient/MANE.GRCh38.v1.3.update.tsv"
|
||||
],
|
||||
"examples": ["/omic/eureka/digital-patients/MANE.GRCh38.v1.3.update.tsv"],
|
||||
"pattern": ".*\\.tsv$",
|
||||
"validation": {},
|
||||
"notes": "MANE transcripts reference file in TSV format"
|
||||
},
|
||||
"// VCF2PROT PARAMETERS": {},
|
||||
"container_vcf2prot": {
|
||||
"type": "string",
|
||||
"description": "VCF2PROT container image",
|
||||
"default": "vcf2prot:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_vcf2prot",
|
||||
"examples": [
|
||||
"vcf2prot:latest",
|
||||
"vcf2prot:v1.0"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Docker container image for VCF2PROT module"
|
||||
},
|
||||
"// RNA2PROTEINEXPRESSION PARAMETERS": {},
|
||||
"container_rna2protexpression": {
|
||||
"type": "string",
|
||||
"description": "RNA2PROTEINEXPRESSION container image",
|
||||
"default": "rna2protexpression:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_rna2protexpression",
|
||||
"examples": [
|
||||
"rna2protexpression:latest",
|
||||
"rna2protexpression:v1.0"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Docker container image for RNA2PROTEINEXPRESSION module"
|
||||
},
|
||||
"containerOptions_rna2protexpression": {
|
||||
"type": "string",
|
||||
"description": "Container options for RNA2PROTEINEXPRESSION",
|
||||
"default": "--gpus all --rm -v /mnt:/mnt -v /dbs:/dbs",
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.containerOptions_rna2protexpression",
|
||||
"examples": [
|
||||
"--gpus all --rm -v /mnt:/mnt -v /dbs:/dbs"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Docker container runtime options for RNA2PROTEINEXPRESSION with GPU and database volume mounts"
|
||||
},
|
||||
"// CORTO PARAMETERS": {},
|
||||
"container_corto": {
|
||||
"type": "string",
|
||||
"description": "CORTO container image",
|
||||
"default": "corto:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_corto",
|
||||
"examples": [
|
||||
"corto:latest",
|
||||
"corto:v1.0"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Docker container image for CORTO module"
|
||||
},
|
||||
"regulon": {
|
||||
"type": "file",
|
||||
"description": "Path to regulon RDA file",
|
||||
"default": "/Workspace/next/registry/pipelines/digital_patient/regulon.rda",
|
||||
"default": "/omic/eureka/digital-patients/regulon.rda",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.regulon",
|
||||
"examples": [
|
||||
"/Workspace/next/registry/pipelines/digital_patient/regulon.rda"
|
||||
],
|
||||
"examples": ["/omic/eureka/digital-patients/regulon.rda"],
|
||||
"pattern": ".*\\.rda$",
|
||||
"validation": {},
|
||||
"notes": "Regulon data file in RDA format for CORTO module"
|
||||
},
|
||||
"// CIBERSORT PARAMETERS": {},
|
||||
"signature_matrix": {
|
||||
"type": "file",
|
||||
"description": "Path to signature matrix file for CIBERSORTx",
|
||||
"default": "/omic/eureka/digital-patients/LM22_sourceGEP_ensg.txt",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.signature_matrix",
|
||||
"examples": ["/omic/eureka/digital-patients/LM22_sourceGEP_ensg.txt"],
|
||||
"pattern": ".*\\.txt$",
|
||||
"notes": "Signature matrix file for CIBERSORTx analysis"
|
||||
},
|
||||
"outdir": {
|
||||
"type": "folder",
|
||||
"description": "Output directory for results",
|
||||
"default": "/omic/eureka/digital-patients/output",
|
||||
"required": true,
|
||||
"pipeline_io": "output",
|
||||
"var_name": "params.outdir",
|
||||
"examples": ["/omic/eureka/digital-patients/output"],
|
||||
"notes": "Directory where all pipeline results will be stored"
|
||||
},
|
||||
"cibersortx_username": {
|
||||
"type": "string",
|
||||
"description": "CIBERSORTx username for authentication",
|
||||
@@ -257,11 +134,6 @@
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.cibersortx_username",
|
||||
"examples": [
|
||||
"gabriel.richman.2009@anderson.ucla.edu"
|
||||
],
|
||||
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
|
||||
"validation": {},
|
||||
"notes": "Username (email) for CIBERSORTx authentication"
|
||||
},
|
||||
"cibersortx_token": {
|
||||
@@ -271,55 +143,55 @@
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.cibersortx_token",
|
||||
"examples": [
|
||||
"b5b39e563fb947df4cfd4843d40fdb99"
|
||||
],
|
||||
"pattern": "^[a-f0-9]{32}$",
|
||||
"validation": {},
|
||||
"notes": "Authentication token for CIBERSORTx API access"
|
||||
},
|
||||
"container_borzoi": {
|
||||
"type": "string",
|
||||
"description": "Borzoi container image",
|
||||
"default": "harbor.cluster.omic.ai/omic/digital-patients/borzoi:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_borzoi"
|
||||
},
|
||||
"container_vcf2prot": {
|
||||
"type": "string",
|
||||
"description": "VCF2PROT container image",
|
||||
"default": "harbor.cluster.omic.ai/omic/digital-patients/vcf2prot:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_vcf2prot"
|
||||
},
|
||||
"container_rna2protexpression": {
|
||||
"type": "string",
|
||||
"description": "RNA2PROTEINEXPRESSION container image",
|
||||
"default": "harbor.cluster.omic.ai/omic/digital-patients/rna2protexpression:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_rna2protexpression"
|
||||
},
|
||||
"container_corto": {
|
||||
"type": "string",
|
||||
"description": "CORTO container image",
|
||||
"default": "harbor.cluster.omic.ai/omic/digital-patients/corto:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_corto"
|
||||
},
|
||||
"container_ecotyper": {
|
||||
"type": "string",
|
||||
"description": "ECOTyper container image",
|
||||
"default": "ecotyper:latest",
|
||||
"description": "ECOTyper/CIBERSORTx container image",
|
||||
"default": "harbor.cluster.omic.ai/omic/digital-patients/ecotyper:latest",
|
||||
"required": true,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_ecotyper",
|
||||
"examples": [
|
||||
"ecotyper:latest",
|
||||
"ecotyper:v1.0"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Docker container image for ECOTyper module"
|
||||
"var_name": "params.container_ecotyper"
|
||||
},
|
||||
"signature_matrix": {
|
||||
"type": "file",
|
||||
"description": "Path to signature matrix file",
|
||||
"default": "/Workspace/next/registry/pipelines/digital_patient/LM22_sourceGEP_ensg.txt",
|
||||
"container_synthea": {
|
||||
"type": "string",
|
||||
"description": "Synthea container image",
|
||||
"default": "harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.signature_matrix",
|
||||
"examples": [
|
||||
"/Workspace/next/registry/pipelines/digital_patient/LM22_sourceGEP_ensg.txt"
|
||||
],
|
||||
"pattern": ".*\\.txt$",
|
||||
"validation": {},
|
||||
"notes": "Signature matrix file for CIBERSORTx analysis"
|
||||
},
|
||||
"ecotyper_outdir": {
|
||||
"type": "folder",
|
||||
"description": "Output directory for ECOTyper results",
|
||||
"default": "/mnt/omic-next-apis/wes/digital-patients/ecotyper",
|
||||
"required": true,
|
||||
"pipeline_io": "output",
|
||||
"var_name": "params.ecotyper_outdir",
|
||||
"examples": [
|
||||
"/mnt/OmicNAS/olamide/ecotyper/results/ecotyper"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"validation": {},
|
||||
"notes": "Directory where ECOTyper results will be stored"
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.container_synthea"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
66
reassemble.nf
Normal file
66
reassemble.nf
Normal file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env nextflow
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// One-time script to reassemble chunked files on the PVC
|
||||
// Run once, then delete. Not part of the main pipeline.
|
||||
|
||||
params.base_dir = '/omic/eureka/digital-patients'
|
||||
params.outdir = '/omic/eureka/digital-patients'
|
||||
|
||||
process REASSEMBLE {
|
||||
container 'alpine:latest'
|
||||
executor 'local'
|
||||
|
||||
script:
|
||||
"""
|
||||
echo "=== Reassembling chunked files ==="
|
||||
|
||||
# hg38.fa (35 parts)
|
||||
if [ -d "${params.base_dir}/supporting-data/genome/hg38.fa_parts" ]; then
|
||||
echo "Reassembling hg38.fa..."
|
||||
cat ${params.base_dir}/supporting-data/genome/hg38.fa_parts/part_* > ${params.base_dir}/supporting-data/genome/hg38.fa
|
||||
echo " Done: \$(du -h ${params.base_dir}/supporting-data/genome/hg38.fa | cut -f1)"
|
||||
fi
|
||||
|
||||
# gnomad male (12 parts)
|
||||
if [ -d "${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt_parts" ]; then
|
||||
echo "Reassembling gnomad male..."
|
||||
cat ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt_parts/part_* > ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt
|
||||
echo " Done: \$(du -h ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt | cut -f1)"
|
||||
fi
|
||||
|
||||
# gnomad female (12 parts)
|
||||
if [ -d "${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt_parts" ]; then
|
||||
echo "Reassembling gnomad female..."
|
||||
cat ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt_parts/part_* > ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt
|
||||
echo " Done: \$(du -h ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt | cut -f1)"
|
||||
fi
|
||||
|
||||
# F5_SCHIZO male (7 parts)
|
||||
if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz_parts" ]; then
|
||||
echo "Reassembling F5_SCHIZO male..."
|
||||
cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz
|
||||
echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz | cut -f1)"
|
||||
fi
|
||||
|
||||
# F5_SCHIZO female (7 parts)
|
||||
if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz_parts" ]; then
|
||||
echo "Reassembling F5_SCHIZO female..."
|
||||
cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz
|
||||
echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz | cut -f1)"
|
||||
fi
|
||||
|
||||
# F5_SCHIZO both_sexes (7 parts)
|
||||
if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz_parts" ]; then
|
||||
echo "Reassembling F5_SCHIZO both_sexes..."
|
||||
cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz
|
||||
echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz | cut -f1)"
|
||||
fi
|
||||
|
||||
echo "=== All reassembly complete ==="
|
||||
"""
|
||||
}
|
||||
|
||||
workflow {
|
||||
REASSEMBLE()
|
||||
}
|
||||
Reference in New Issue
Block a user