Configure bioemu for WES deployment

- Update main.nf: Harbor container image, PVC mount paths, remove containerOptions and stageInMode
- Update nextflow.config: Add k8s/k8s_gpu profiles, minimal config for WES injection
- Update params.json: Correct paths for eureka PVC
- Update Dockerfile.wes: CUDA base image for GPU support
This commit is contained in:
2026-03-26 14:11:57 +01:00
parent ca4ceae21e
commit f776745722
4 changed files with 120 additions and 107 deletions

View File

@@ -1,15 +1,14 @@
FROM python:3.11-slim FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
wget git python3 python3-pip build-essential curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /opt/bioemu WORKDIR /opt/bioemu
# Install minimal system deps RUN python3 -m pip install --upgrade pip && \
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ && rm -rf /var/lib/apt/lists/*
# Install bioemu with CPU-only PyTorch first (smaller)
# The k8s GPU nodes will have CUDA drivers available
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
pip install --no-cache-dir bioemu mdtraj scikit-learn pandas matplotlib seaborn pip install --no-cache-dir bioemu mdtraj scikit-learn pandas matplotlib seaborn
RUN mkdir -p /opt/bioemu/scripts/ /data /results && chmod -R 777 /data /results RUN mkdir -p /opt/bioemu/scripts/ /data /results && chmod -R 777 /data /results

31
main.nf
View File

@@ -1,23 +1,22 @@
#!/usr/bin/env nextflow #!/usr/bin/env nextflow
nextflow.enable.dsl=2 nextflow.enable.dsl=2
// Define parameters // Define parameters — PVC mount paths for k8s execution
params.complex_name = "hgh_mab1" // Default complex name params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
params.protein1_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/hgh.fasta" params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
params.protein2_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/mab1.fasta" params.complex_name = "protein_complex"
params.exp_dG = -13.1 // kcal/mol from experimental database params.exp_dG = -10.0 // kcal/mol (placeholder experimental value)
params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output" params.outdir = '/omic/eureka/bioemu/output'
params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache" params.cache_dir = '/tmp/bioemu_cache'
// Parameters for structure generation and analysis // Parameters for structure generation and analysis
params.num_samples = 20 params.num_samples = 10
params.batch_size = 5 params.batch_size = 5
params.temperature = 300 params.temperature = 300
params.n_clusters = 5 params.n_clusters = 5
process GENERATE_STRUCTURE { process GENERATE_STRUCTURE {
container 'bioemu:latest' container 'harbor.cluster.omic.ai/omic/bioemu:latest'
containerOptions '--rm --gpus all -v /mnt:/mnt'
publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy' publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy'
input: input:
@@ -31,6 +30,9 @@ process GENERATE_STRUCTURE {
# Extract sequence from FASTA # Extract sequence from FASTA
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n') SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
# Create cache dir
mkdir -p ${params.cache_dir}
# Run BioEmu # Run BioEmu
python3 -m bioemu.sample \\ python3 -m bioemu.sample \\
--sequence "\${SEQUENCE}" \\ --sequence "\${SEQUENCE}" \\
@@ -46,8 +48,7 @@ process GENERATE_STRUCTURE {
} }
process CALCULATE_BINDING { process CALCULATE_BINDING {
container 'bioemu:latest' container 'harbor.cluster.omic.ai/omic/bioemu:latest'
containerOptions '--rm --gpus all -v /mnt:/mnt -v /data:/data'
publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy' publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy'
input: input:
@@ -63,8 +64,8 @@ process CALCULATE_BINDING {
script: script:
""" """
# Run binding energy calculation with the existing script # Run binding energy calculation
python3 /data/olamide/fresh-bioemu2/scripts/calculate_binding.py \\ python3 /opt/bioemu/scripts/calculate_binding.py \\
--protein1_topology ${protein1_topology} \\ --protein1_topology ${protein1_topology} \\
--protein1_samples ${protein1_samples} \\ --protein1_samples ${protein1_samples} \\
--protein2_topology ${protein2_topology} \\ --protein2_topology ${protein2_topology} \\
@@ -119,7 +120,7 @@ workflow {
.map { it -> tuple(it[1], it[2]) } .map { it -> tuple(it[1], it[2]) }
.first() .first()
// Calculate binding energy (direct script reference) // Calculate binding energy
CALCULATE_BINDING( CALCULATE_BINDING(
protein1_files[0], // protein1_topology.pdb protein1_files[0], // protein1_topology.pdb
protein1_files[1], // protein1_samples.xtc protein1_files[1], // protein1_samples.xtc

View File

@@ -1,38 +1,55 @@
// Manifest for Nextflow metadata
manifest { manifest {
name = 'BioEmu-Nextflow' name = 'bioemu'
author = 'Generated from BioEmu repository' author = 'Olamide'
homePage = 'https://github.com/microsoft/bioemu' description = 'BioEmu - Biomolecular Emulator for protein structure sampling and binding energy analysis'
description = 'Nextflow pipeline for BioEmu - Biomolecular Emulator for protein structure sampling'
mainScript = 'main.nf' mainScript = 'main.nf'
version = '1.0.0' version = '1.0.0'
} }
// Global default parameters
params { params {
fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/villin_headpiece.fasta" protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output" protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache" complex_name = 'protein_complex'
exp_dG = -10.0
outdir = '/omic/eureka/bioemu/output'
cache_dir = '/tmp/bioemu_cache'
num_samples = 10 num_samples = 10
batch_size_100 = 10 batch_size = 5
temperature = 300
n_clusters = 5
} }
// Container configurations profiles {
standard {
docker { docker {
enabled = true enabled = true
runOptions = '--gpus all' runOptions = '--gpus all'
} }
}
// Process configurations k8s {
process { process {
cpus = 1 executor = 'k8s'
memory = '8 GB' }
docker {
enabled = true
}
} }
// Execution configurations k8s_gpu {
executor { process {
$local { executor = 'k8s'
cpus = 4 pod = [[nodeSelector: 'nvidia.com/gpu.present=true']]
accelerator = [request: 1, type: 'nvidia.com/gpu']
}
docker {
enabled = true
}
}
}
process {
container = 'harbor.cluster.omic.ai/omic/bioemu:latest'
cpus = 2
memory = '8 GB' memory = '8 GB'
} }
}

View File

@@ -1,78 +1,81 @@
{ {
"params": { "params": {
"fasta_list": { "protein1_fasta": {
"type": "file[]", "type": "file",
"description": "FASTA files containing protein sequences", "description": "FASTA file for protein 1",
"default": [], "default": "/omic/eureka/bioemu/input/trp_cage.fasta",
"required": true, "required": true,
"pipeline_io": "input", "pipeline_io": "input",
"var_name": "params.fasta_list", "var_name": "params.protein1_fasta",
"examples": [ "examples": [
["/omic/olamide/examples/prot1.fasta", "/omic/olamide/examples/prot2.fasta"] "/omic/eureka/bioemu/input/trp_cage.fasta"
], ],
"pattern": ".*\\.fasta$", "pattern": ".*\\.fasta$",
"validation": {}, "validation": {},
"notes": "Select one or more FASTA files with protein sequences" "notes": "FASTA file containing the first protein sequence"
},
"protein2_fasta": {
"type": "file",
"description": "FASTA file for protein 2",
"default": "/omic/eureka/bioemu/input/villin_headpiece.fasta",
"required": true,
"pipeline_io": "input",
"var_name": "params.protein2_fasta",
"examples": [
"/omic/eureka/bioemu/input/villin_headpiece.fasta"
],
"pattern": ".*\\.fasta$",
"validation": {},
"notes": "FASTA file containing the second protein sequence"
}, },
"outdir": { "outdir": {
"type": "folder", "type": "folder",
"description": "Output Directory", "description": "Output directory for results",
"default": "/omic/olamide/output", "default": "/omic/eureka/bioemu/output",
"required": true, "required": true,
"pipeline_io": "output", "pipeline_io": "output",
"var_name": "params.outdir", "var_name": "params.outdir",
"examples": [ "examples": [
"/omic/olamide/output" "/omic/eureka/bioemu/output"
], ],
"pattern": ".*", "pattern": ".*",
"validation": {}, "validation": {},
"notes": "Select where to save your analysis results" "notes": "Directory where prediction results will be stored"
},
"complex_name": {
"type": "string",
"description": "Name for this protein complex",
"default": "protein_complex",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.complex_name",
"examples": [
"protein_complex"
]
}, },
"num_samples": { "num_samples": {
"type": "integer", "type": "integer",
"description": "Number of protein structure samples", "description": "Number of protein structure samples",
"default": 10, "default": 10,
"required": true,
"pipeline_io": "parameter",
"var_name": "params.num_samples",
"examples": [
"10"
],
"pattern": "^\\d+$",
"validation": {
"min": 1
},
"notes": "More samples provide better coverage of conformational space"
},
"batch_size_100": {
"type": "integer",
"description": "Batch size parameter",
"default": 10,
"required": false, "required": false,
"pipeline_io": "parameter", "pipeline_io": "parameter",
"var_name": "params.batch_size_100", "var_name": "params.num_samples",
"hidden": true, "examples": ["10"],
"examples": [ "pattern": "^\\d+$",
"10" "validation": {"min": 1},
], "notes": "More samples provide better coverage of conformational space"
"pattern": "^\\d+$"
}, },
"temperature": { "temperature": {
"type": "integer", "type": "integer",
"description": "Temperature (K) for free energy", "description": "Temperature (K) for free energy calculations",
"default": 300, "default": 300,
"required": false, "required": false,
"pipeline_io": "parameter", "pipeline_io": "parameter",
"var_name": "params.temperature", "var_name": "params.temperature",
"examples": [ "examples": ["300"],
"300"
],
"pattern": "^\\d+$", "pattern": "^\\d+$",
"validation": { "validation": {"min": 200, "max": 500},
"min": 200, "notes": "Temperature in Kelvin"
"max": 500
},
"notes": "Temperature in Kelvin for free energy calculations"
}, },
"n_clusters": { "n_clusters": {
"type": "integer", "type": "integer",
@@ -81,26 +84,19 @@
"required": false, "required": false,
"pipeline_io": "parameter", "pipeline_io": "parameter",
"var_name": "params.n_clusters", "var_name": "params.n_clusters",
"examples": [ "examples": ["5"],
"5"
],
"pattern": "^\\d+$", "pattern": "^\\d+$",
"validation": { "validation": {"min": 2}
"min": 2
}, },
"notes": "Number of clusters for free energy analysis" "exp_dG": {
}, "type": "number",
"cache_dir": { "description": "Experimental binding free energy (kcal/mol)",
"type": "folder", "default": -10.0,
"description": "Embeddings cache directory",
"default": "/tmp/bioemu_cache",
"required": false, "required": false,
"pipeline_io": "parameter", "pipeline_io": "parameter",
"var_name": "params.cache_dir", "var_name": "params.exp_dG",
"hidden": true, "examples": ["-10.0", "-13.1"],
"examples": [ "notes": "Reference experimental value for comparison"
"/tmp/bioemu_cache"
]
} }
} }
} }