Configure bioemu for WES deployment

- Update main.nf: Harbor container image, PVC mount paths, remove containerOptions and stageInMode
- Update nextflow.config: Add k8s/k8s_gpu profiles, minimal config for WES injection
- Update params.json: Correct paths for eureka PVC
- Update Dockerfile.wes: CUDA base image for GPU support
This commit is contained in:
2026-03-26 14:11:57 +01:00
parent ca4ceae21e
commit f776745722
4 changed files with 120 additions and 107 deletions

View File

@@ -1,15 +1,14 @@
FROM python:3.11-slim
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
wget git python3 python3-pip build-essential curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /opt/bioemu
# Install minimal system deps
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ && rm -rf /var/lib/apt/lists/*
# Install bioemu with CPU-only PyTorch first (smaller)
# The k8s GPU nodes will have CUDA drivers available
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
RUN python3 -m pip install --upgrade pip && \
pip install --no-cache-dir bioemu mdtraj scikit-learn pandas matplotlib seaborn
RUN mkdir -p /opt/bioemu/scripts/ /data /results && chmod -R 777 /data /results

31
main.nf
View File

@@ -1,23 +1,22 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
// Define parameters
params.complex_name = "hgh_mab1" // Default complex name
params.protein1_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/hgh.fasta"
params.protein2_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/mab1.fasta"
params.exp_dG = -13.1 // kcal/mol from experimental database
params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output"
params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache"
// Define parameters — PVC mount paths for k8s execution
params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
params.complex_name = "protein_complex"
params.exp_dG = -10.0 // kcal/mol (placeholder experimental value)
params.outdir = '/omic/eureka/bioemu/output'
params.cache_dir = '/tmp/bioemu_cache'
// Parameters for structure generation and analysis
params.num_samples = 20
params.num_samples = 10
params.batch_size = 5
params.temperature = 300
params.n_clusters = 5
process GENERATE_STRUCTURE {
container 'bioemu:latest'
containerOptions '--rm --gpus all -v /mnt:/mnt'
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy'
input:
@@ -31,6 +30,9 @@ process GENERATE_STRUCTURE {
# Extract sequence from FASTA
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
# Create cache dir
mkdir -p ${params.cache_dir}
# Run BioEmu
python3 -m bioemu.sample \\
--sequence "\${SEQUENCE}" \\
@@ -46,8 +48,7 @@ process GENERATE_STRUCTURE {
}
process CALCULATE_BINDING {
container 'bioemu:latest'
containerOptions '--rm --gpus all -v /mnt:/mnt -v /data:/data'
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy'
input:
@@ -63,8 +64,8 @@ process CALCULATE_BINDING {
script:
"""
# Run binding energy calculation with the existing script
python3 /data/olamide/fresh-bioemu2/scripts/calculate_binding.py \\
# Run binding energy calculation
python3 /opt/bioemu/scripts/calculate_binding.py \\
--protein1_topology ${protein1_topology} \\
--protein1_samples ${protein1_samples} \\
--protein2_topology ${protein2_topology} \\
@@ -119,7 +120,7 @@ workflow {
.map { it -> tuple(it[1], it[2]) }
.first()
// Calculate binding energy (direct script reference)
// Calculate binding energy
CALCULATE_BINDING(
protein1_files[0], // protein1_topology.pdb
protein1_files[1], // protein1_samples.xtc

View File

@@ -1,38 +1,55 @@
// Manifest for Nextflow metadata
manifest {
name = 'BioEmu-Nextflow'
author = 'Generated from BioEmu repository'
homePage = 'https://github.com/microsoft/bioemu'
description = 'Nextflow pipeline for BioEmu - Biomolecular Emulator for protein structure sampling'
name = 'bioemu'
author = 'Olamide'
description = 'BioEmu - Biomolecular Emulator for protein structure sampling and binding energy analysis'
mainScript = 'main.nf'
version = '1.0.0'
}
// Global default parameters
params {
fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/villin_headpiece.fasta"
outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output"
cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache"
protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
complex_name = 'protein_complex'
exp_dG = -10.0
outdir = '/omic/eureka/bioemu/output'
cache_dir = '/tmp/bioemu_cache'
num_samples = 10
batch_size_100 = 10
batch_size = 5
temperature = 300
n_clusters = 5
}
// Container configurations
profiles {
standard {
docker {
enabled = true
runOptions = '--gpus all'
}
}
// Process configurations
k8s {
process {
cpus = 1
memory = '8 GB'
executor = 'k8s'
}
docker {
enabled = true
}
}
// Execution configurations
executor {
$local {
cpus = 4
k8s_gpu {
process {
executor = 'k8s'
pod = [[nodeSelector: 'nvidia.com/gpu.present=true']]
accelerator = [request: 1, type: 'nvidia.com/gpu']
}
docker {
enabled = true
}
}
}
process {
container = 'harbor.cluster.omic.ai/omic/bioemu:latest'
cpus = 2
memory = '8 GB'
}
}

View File

@@ -1,78 +1,81 @@
{
"params": {
"fasta_list": {
"type": "file[]",
"description": "FASTA files containing protein sequences",
"default": [],
"protein1_fasta": {
"type": "file",
"description": "FASTA file for protein 1",
"default": "/omic/eureka/bioemu/input/trp_cage.fasta",
"required": true,
"pipeline_io": "input",
"var_name": "params.fasta_list",
"var_name": "params.protein1_fasta",
"examples": [
["/omic/olamide/examples/prot1.fasta", "/omic/olamide/examples/prot2.fasta"]
"/omic/eureka/bioemu/input/trp_cage.fasta"
],
"pattern": ".*\\.fasta$",
"validation": {},
"notes": "Select one or more FASTA files with protein sequences"
"notes": "FASTA file containing the first protein sequence"
},
"protein2_fasta": {
"type": "file",
"description": "FASTA file for protein 2",
"default": "/omic/eureka/bioemu/input/villin_headpiece.fasta",
"required": true,
"pipeline_io": "input",
"var_name": "params.protein2_fasta",
"examples": [
"/omic/eureka/bioemu/input/villin_headpiece.fasta"
],
"pattern": ".*\\.fasta$",
"validation": {},
"notes": "FASTA file containing the second protein sequence"
},
"outdir": {
"type": "folder",
"description": "Output Directory",
"default": "/omic/olamide/output",
"description": "Output directory for results",
"default": "/omic/eureka/bioemu/output",
"required": true,
"pipeline_io": "output",
"var_name": "params.outdir",
"examples": [
"/omic/olamide/output"
"/omic/eureka/bioemu/output"
],
"pattern": ".*",
"validation": {},
"notes": "Select where to save your analysis results"
"notes": "Directory where prediction results will be stored"
},
"complex_name": {
"type": "string",
"description": "Name for this protein complex",
"default": "protein_complex",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.complex_name",
"examples": [
"protein_complex"
]
},
"num_samples": {
"type": "integer",
"description": "Number of protein structure samples",
"default": 10,
"required": true,
"pipeline_io": "parameter",
"var_name": "params.num_samples",
"examples": [
"10"
],
"pattern": "^\\d+$",
"validation": {
"min": 1
},
"notes": "More samples provide better coverage of conformational space"
},
"batch_size_100": {
"type": "integer",
"description": "Batch size parameter",
"default": 10,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.batch_size_100",
"hidden": true,
"examples": [
"10"
],
"pattern": "^\\d+$"
"var_name": "params.num_samples",
"examples": ["10"],
"pattern": "^\\d+$",
"validation": {"min": 1},
"notes": "More samples provide better coverage of conformational space"
},
"temperature": {
"type": "integer",
"description": "Temperature (K) for free energy",
"description": "Temperature (K) for free energy calculations",
"default": 300,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.temperature",
"examples": [
"300"
],
"examples": ["300"],
"pattern": "^\\d+$",
"validation": {
"min": 200,
"max": 500
},
"notes": "Temperature in Kelvin for free energy calculations"
"validation": {"min": 200, "max": 500},
"notes": "Temperature in Kelvin"
},
"n_clusters": {
"type": "integer",
@@ -81,26 +84,19 @@
"required": false,
"pipeline_io": "parameter",
"var_name": "params.n_clusters",
"examples": [
"5"
],
"examples": ["5"],
"pattern": "^\\d+$",
"validation": {
"min": 2
"validation": {"min": 2}
},
"notes": "Number of clusters for free energy analysis"
},
"cache_dir": {
"type": "folder",
"description": "Embeddings cache directory",
"default": "/tmp/bioemu_cache",
"exp_dG": {
"type": "number",
"description": "Experimental binding free energy (kcal/mol)",
"default": -10.0,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.cache_dir",
"hidden": true,
"examples": [
"/tmp/bioemu_cache"
]
"var_name": "params.exp_dG",
"examples": ["-10.0", "-13.1"],
"notes": "Reference experimental value for comparison"
}
}
}