Configure pipeline for WES execution on k8s cluster

- Add main.nf as WES entry point with PVC-based paths
- Update nextflow.config with k8s/k8s_gpu profiles (eureka-pvc)
- Update params.json defaults to /omic/eureka/digital-patients/ paths
- Remove stageInMode 'copy' from corto and vcf2prot for k8s compat
- Add reassemble.nf for one-time chunk reassembly on PVC
This commit is contained in:
2026-03-26 17:58:28 +01:00
parent 9e6a16c19b
commit 88627edc25
6 changed files with 371 additions and 325 deletions

View File

@@ -1,6 +1,5 @@
{
"params": {
"// SYNTHEA PARAMETERS": {},
"n_pat": {
"type": "integer",
"description": "Number of patients to generate (must be >= 2)",
@@ -10,9 +9,7 @@
"var_name": "params.n_pat",
"examples": [2, 10, 100],
"pattern": "^[0-9]+$",
"validation": {
"min": 2
},
"validation": { "min": 2 },
"notes": "Minimum of 2 patients required (one male, one female)"
},
"percent_male": {
@@ -23,42 +20,9 @@
"pipeline_io": "parameter",
"var_name": "params.percent_male",
"examples": [0.0, 0.5, 1.0],
"pattern": "^0(\\.\\d+)?|1(\\.0)?$",
"validation": {
"min": 0.0,
"max": 1.0
},
"validation": { "min": 0.0, "max": 1.0 },
"notes": "Value must be between 0 and 1 inclusive"
},
"imputed_store": {
"type": "folder",
"description": "Path to imputed UKBB data",
"default": "/mnt/Avatar/imputed/ukbb/imputed",
"required": true,
"pipeline_io": "input",
"var_name": "params.imputed_store",
"examples": [
"/mnt/Avatar/imputed/ukbb/imputed",
"/rosalind/ukbb/imputed"
],
"pattern": ".*",
"validation": {},
"notes": "Directory containing imputed UKBB data"
},
"pheno_store": {
"type": "file",
"description": "Path to phenotype data file",
"default": "/mnt/Avatar/dd/synthea/metadata/ukbb_phenotypes_filtered.csv",
"required": true,
"pipeline_io": "input",
"var_name": "params.pheno_store",
"examples": [
"/mnt/Avatar/dd/synthea/metadata/ukbb_phenotypes_filtered.csv"
],
"pattern": ".*\\.csv$",
"validation": {},
"notes": "CSV file containing filtered UKBB phenotype data"
},
"disease": {
"type": "string",
"description": "Disease or condition to simulate",
@@ -66,83 +30,10 @@
"required": true,
"pipeline_io": "parameter",
"var_name": "params.disease",
"examples": [
"schizophrenia",
"healthy",
"leukaemia",
"Purpura and other haemorrhagic conditions"
],
"pattern": ".*",
"enum": [
"schizophrenia",
"healthy",
"leukaemia",
"Purpura and other haemorrhagic conditions"
],
"validation": {},
"examples": ["schizophrenia", "healthy", "leukaemia"],
"enum": ["schizophrenia", "healthy", "leukaemia", "Purpura and other haemorrhagic conditions"],
"notes": "Use 'healthy' for healthy individuals or specify a disease condition"
},
"healthy_dir": {
"type": "folder",
"description": "Path to healthy patient data directory",
"default": "/Workspace/next/registry/pipelines/digital_patient",
"required": true,
"pipeline_io": "input",
"var_name": "params.healthy_dir",
"examples": [
"/Workspace/next/registry/pipelines/digital_patient",
"/mnt/Avatar/digital_patient"
],
"pattern": ".*",
"validation": {},
"notes": "Directory containing data for healthy patient generation"
},
"outdir": {
"type": "folder",
"description": "Output directory for results",
"default": "/mnt/omic-next-apis/wes/digital_patients",
"required": true,
"pipeline_io": "output",
"var_name": "params.outdir",
"examples": [
"/mnt/OmicNAS/dd/digital_patient",
"/path/to/custom/output"
],
"pattern": ".*",
"validation": {},
"notes": "Directory where all pipeline results will be stored"
},
"// BORZOI PARAMETERS": {},
"container_borzoi": {
"type": "string",
"description": "Borzoi container image",
"default": "borzoi:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_borzoi",
"examples": [
"borzoi:latest",
"borzoi:v1.0"
],
"pattern": ".*",
"validation": {},
"notes": "Docker container image for Borzoi module"
},
"containerOptions": {
"type": "string",
"description": "Container runtime options",
"default": "--gpus all --rm -v /mnt:/mnt",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.containerOptions",
"examples": [
"--gpus all --rm -v /mnt:/mnt",
"--rm -v /data:/data"
],
"pattern": ".*",
"validation": {},
"notes": "Docker container runtime options for GPU usage and volume mounts"
},
"project_name": {
"type": "string",
"description": "Project identifier",
@@ -150,106 +41,92 @@
"required": true,
"pipeline_io": "parameter",
"var_name": "params.project_name",
"examples": [
"test",
"production",
"schizophrenia_study"
],
"pattern": ".*",
"validation": {},
"notes": "Identifier for the digital patient project"
"examples": ["test", "production", "schizophrenia_study"]
},
"imputed_store": {
"type": "folder",
"description": "Path to imputed UKBB data directory",
"default": "/omic/eureka/digital-patients/imputed",
"required": true,
"pipeline_io": "input",
"var_name": "params.imputed_store",
"examples": ["/omic/eureka/digital-patients/imputed"],
"notes": "Directory containing imputed UKBB GWAS .bgz files"
},
"pheno_store": {
"type": "file",
"description": "Path to phenotype data file",
"default": "/omic/eureka/digital-patients/ukbb_phenotypes_filtered.csv",
"required": true,
"pipeline_io": "input",
"var_name": "params.pheno_store",
"examples": ["/omic/eureka/digital-patients/ukbb_phenotypes_filtered.csv"],
"pattern": ".*\\.csv$",
"notes": "CSV file containing filtered UKBB phenotype data"
},
"healthy_dir": {
"type": "folder",
"description": "Path to healthy patient data directory (gnomad files)",
"default": "/omic/eureka/digital-patients/healthy",
"required": true,
"pipeline_io": "input",
"var_name": "params.healthy_dir",
"examples": ["/omic/eureka/digital-patients/healthy"],
"notes": "Directory containing gnomad.genomes.v4.1.sites.{male,female}.txt"
},
"synthea_support_dir": {
"type": "folder",
"description": "Path to Synthea supporting data (VCF templates, liftover, genome)",
"default": "/omic/eureka/digital-patients/supporting-data",
"required": true,
"pipeline_io": "input",
"var_name": "params.synthea_support_dir",
"examples": ["/omic/eureka/digital-patients/supporting-data"],
"notes": "Must contain vcf/vcf_template.vcf, ucsc-liftover/hg19ToHg38.over.chain.gz, genome/hg38.fa"
},
"mane": {
"type": "file",
"description": "Path to MANE reference file",
"default": "/Workspace/next/registry/pipelines/digital_patient/MANE.GRCh38.v1.3.update.tsv",
"default": "/omic/eureka/digital-patients/MANE.GRCh38.v1.3.update.tsv",
"required": true,
"pipeline_io": "input",
"var_name": "params.mane",
"examples": [
"/Workspace/next/registry/pipelines/digital_patient/MANE.GRCh38.v1.3.update.tsv"
],
"examples": ["/omic/eureka/digital-patients/MANE.GRCh38.v1.3.update.tsv"],
"pattern": ".*\\.tsv$",
"validation": {},
"notes": "MANE transcripts reference file in TSV format"
},
"// VCF2PROT PARAMETERS": {},
"container_vcf2prot": {
"type": "string",
"description": "VCF2PROT container image",
"default": "vcf2prot:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_vcf2prot",
"examples": [
"vcf2prot:latest",
"vcf2prot:v1.0"
],
"pattern": ".*",
"validation": {},
"notes": "Docker container image for VCF2PROT module"
},
"// RNA2PROTEINEXPRESSION PARAMETERS": {},
"container_rna2protexpression": {
"type": "string",
"description": "RNA2PROTEINEXPRESSION container image",
"default": "rna2protexpression:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_rna2protexpression",
"examples": [
"rna2protexpression:latest",
"rna2protexpression:v1.0"
],
"pattern": ".*",
"validation": {},
"notes": "Docker container image for RNA2PROTEINEXPRESSION module"
},
"containerOptions_rna2protexpression": {
"type": "string",
"description": "Container options for RNA2PROTEINEXPRESSION",
"default": "--gpus all --rm -v /mnt:/mnt -v /dbs:/dbs",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.containerOptions_rna2protexpression",
"examples": [
"--gpus all --rm -v /mnt:/mnt -v /dbs:/dbs"
],
"pattern": ".*",
"validation": {},
"notes": "Docker container runtime options for RNA2PROTEINEXPRESSION with GPU and database volume mounts"
},
"// CORTO PARAMETERS": {},
"container_corto": {
"type": "string",
"description": "CORTO container image",
"default": "corto:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_corto",
"examples": [
"corto:latest",
"corto:v1.0"
],
"pattern": ".*",
"validation": {},
"notes": "Docker container image for CORTO module"
},
"regulon": {
"type": "file",
"description": "Path to regulon RDA file",
"default": "/Workspace/next/registry/pipelines/digital_patient/regulon.rda",
"default": "/omic/eureka/digital-patients/regulon.rda",
"required": true,
"pipeline_io": "input",
"var_name": "params.regulon",
"examples": [
"/Workspace/next/registry/pipelines/digital_patient/regulon.rda"
],
"examples": ["/omic/eureka/digital-patients/regulon.rda"],
"pattern": ".*\\.rda$",
"validation": {},
"notes": "Regulon data file in RDA format for CORTO module"
},
"// CIBERSORT PARAMETERS": {},
"signature_matrix": {
"type": "file",
"description": "Path to signature matrix file for CIBERSORTx",
"default": "/omic/eureka/digital-patients/LM22_sourceGEP_ensg.txt",
"required": true,
"pipeline_io": "input",
"var_name": "params.signature_matrix",
"examples": ["/omic/eureka/digital-patients/LM22_sourceGEP_ensg.txt"],
"pattern": ".*\\.txt$",
"notes": "Signature matrix file for CIBERSORTx analysis"
},
"outdir": {
"type": "folder",
"description": "Output directory for results",
"default": "/omic/eureka/digital-patients/output",
"required": true,
"pipeline_io": "output",
"var_name": "params.outdir",
"examples": ["/omic/eureka/digital-patients/output"],
"notes": "Directory where all pipeline results will be stored"
},
"cibersortx_username": {
"type": "string",
"description": "CIBERSORTx username for authentication",
@@ -257,11 +134,6 @@
"required": true,
"pipeline_io": "parameter",
"var_name": "params.cibersortx_username",
"examples": [
"gabriel.richman.2009@anderson.ucla.edu"
],
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
"validation": {},
"notes": "Username (email) for CIBERSORTx authentication"
},
"cibersortx_token": {
@@ -271,55 +143,55 @@
"required": true,
"pipeline_io": "parameter",
"var_name": "params.cibersortx_token",
"examples": [
"b5b39e563fb947df4cfd4843d40fdb99"
],
"pattern": "^[a-f0-9]{32}$",
"validation": {},
"notes": "Authentication token for CIBERSORTx API access"
},
"container_borzoi": {
"type": "string",
"description": "Borzoi container image",
"default": "harbor.cluster.omic.ai/omic/digital-patients/borzoi:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_borzoi"
},
"container_vcf2prot": {
"type": "string",
"description": "VCF2PROT container image",
"default": "harbor.cluster.omic.ai/omic/digital-patients/vcf2prot:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_vcf2prot"
},
"container_rna2protexpression": {
"type": "string",
"description": "RNA2PROTEINEXPRESSION container image",
"default": "harbor.cluster.omic.ai/omic/digital-patients/rna2protexpression:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_rna2protexpression"
},
"container_corto": {
"type": "string",
"description": "CORTO container image",
"default": "harbor.cluster.omic.ai/omic/digital-patients/corto:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_corto"
},
"container_ecotyper": {
"type": "string",
"description": "ECOTyper container image",
"default": "ecotyper:latest",
"description": "ECOTyper/CIBERSORTx container image",
"default": "harbor.cluster.omic.ai/omic/digital-patients/ecotyper:latest",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.container_ecotyper",
"examples": [
"ecotyper:latest",
"ecotyper:v1.0"
],
"pattern": ".*",
"validation": {},
"notes": "Docker container image for ECOTyper module"
"var_name": "params.container_ecotyper"
},
"signature_matrix": {
"type": "file",
"description": "Path to signature matrix file",
"default": "/Workspace/next/registry/pipelines/digital_patient/LM22_sourceGEP_ensg.txt",
"container_synthea": {
"type": "string",
"description": "Synthea container image",
"default": "harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf",
"required": true,
"pipeline_io": "input",
"var_name": "params.signature_matrix",
"examples": [
"/Workspace/next/registry/pipelines/digital_patient/LM22_sourceGEP_ensg.txt"
],
"pattern": ".*\\.txt$",
"validation": {},
"notes": "Signature matrix file for CIBERSORTx analysis"
},
"ecotyper_outdir": {
"type": "folder",
"description": "Output directory for ECOTyper results",
"default": "/mnt/omic-next-apis/wes/digital-patients/ecotyper",
"required": true,
"pipeline_io": "output",
"var_name": "params.ecotyper_outdir",
"examples": [
"/mnt/OmicNAS/olamide/ecotyper/results/ecotyper"
],
"pattern": ".*",
"validation": {},
"notes": "Directory where ECOTyper results will be stored"
"pipeline_io": "parameter",
"var_name": "params.container_synthea"
}
}
}