From f776745722c2c392f2522d19d9eb38fa9b1dd03a Mon Sep 17 00:00:00 2001 From: Olamide Isreal Date: Thu, 26 Mar 2026 14:11:57 +0100 Subject: [PATCH] Configure bioemu for WES deployment - Update main.nf: Harbor container image, PVC mount paths, remove containerOptions and stageInMode - Update nextflow.config: Add k8s/k8s_gpu profiles, minimal config for WES injection - Update params.json: Correct paths for eureka PVC - Update Dockerfile.wes: CUDA base image for GPU support --- Dockerfile.wes | 17 ++++---- main.nf | 31 +++++++------- nextflow.config | 67 ++++++++++++++++++----------- params.json | 112 +++++++++++++++++++++++------------------------- 4 files changed, 120 insertions(+), 107 deletions(-) diff --git a/Dockerfile.wes b/Dockerfile.wes index 58f85de..be21812 100644 --- a/Dockerfile.wes +++ b/Dockerfile.wes @@ -1,15 +1,14 @@ -FROM python:3.11-slim +FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y \ + wget git python3 python3-pip build-essential curl \ + && rm -rf /var/lib/apt/lists/* WORKDIR /opt/bioemu -# Install minimal system deps -RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc g++ && rm -rf /var/lib/apt/lists/* - -# Install bioemu with CPU-only PyTorch first (smaller) -# The k8s GPU nodes will have CUDA drivers available -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \ +RUN python3 -m pip install --upgrade pip && \ pip install --no-cache-dir bioemu mdtraj scikit-learn pandas matplotlib seaborn RUN mkdir -p /opt/bioemu/scripts/ /data /results && chmod -R 777 /data /results diff --git a/main.nf b/main.nf index 142bcb3..fd85c26 100644 --- a/main.nf +++ b/main.nf @@ -1,23 +1,22 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 -// Define parameters -params.complex_name = "hgh_mab1" // Default complex name -params.protein1_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/hgh.fasta" -params.protein2_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/mab1.fasta" -params.exp_dG = -13.1 // kcal/mol from experimental database -params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output" -params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache" +// Define parameters — PVC mount paths for k8s execution +params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta' +params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta' +params.complex_name = "protein_complex" +params.exp_dG = -10.0 // kcal/mol (placeholder experimental value) +params.outdir = '/omic/eureka/bioemu/output' +params.cache_dir = '/tmp/bioemu_cache' // Parameters for structure generation and analysis -params.num_samples = 20 +params.num_samples = 10 params.batch_size = 5 params.temperature = 300 params.n_clusters = 5 process GENERATE_STRUCTURE { - container 'bioemu:latest' - containerOptions '--rm --gpus all -v /mnt:/mnt' + container 'harbor.cluster.omic.ai/omic/bioemu:latest' publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy' input: @@ -31,6 +30,9 @@ process GENERATE_STRUCTURE { # Extract sequence from FASTA SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n') + # Create cache dir + mkdir -p ${params.cache_dir} + # Run BioEmu python3 -m bioemu.sample \\ --sequence "\${SEQUENCE}" \\ @@ -46,8 +48,7 @@ process GENERATE_STRUCTURE { } process CALCULATE_BINDING { - container 'bioemu:latest' - containerOptions '--rm --gpus all -v /mnt:/mnt -v /data:/data' + container 'harbor.cluster.omic.ai/omic/bioemu:latest' publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy' input: @@ -63,8 +64,8 @@ process CALCULATE_BINDING { script: """ - # Run binding energy calculation with the existing script - python3 /data/olamide/fresh-bioemu2/scripts/calculate_binding.py \\ + # Run binding energy calculation + python3 /opt/bioemu/scripts/calculate_binding.py \\ --protein1_topology ${protein1_topology} \\ --protein1_samples ${protein1_samples} \\ --protein2_topology ${protein2_topology} \\ @@ -119,7 +120,7 @@ workflow { .map { it -> tuple(it[1], it[2]) } .first() - // Calculate binding energy (direct script reference) + // Calculate binding energy CALCULATE_BINDING( protein1_files[0], // protein1_topology.pdb protein1_files[1], // protein1_samples.xtc diff --git a/nextflow.config b/nextflow.config index 7c45752..ab3a7ab 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,38 +1,55 @@ -// Manifest for Nextflow metadata manifest { - name = 'BioEmu-Nextflow' - author = 'Generated from BioEmu repository' - homePage = 'https://github.com/microsoft/bioemu' - description = 'Nextflow pipeline for BioEmu - Biomolecular Emulator for protein structure sampling' + name = 'bioemu' + author = 'Olamide' + description = 'BioEmu - Biomolecular Emulator for protein structure sampling and binding energy analysis' mainScript = 'main.nf' version = '1.0.0' } -// Global default parameters params { - fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/villin_headpiece.fasta" - outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output" - cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache" + protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta' + protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta' + complex_name = 'protein_complex' + exp_dG = -10.0 + outdir = '/omic/eureka/bioemu/output' + cache_dir = '/tmp/bioemu_cache' num_samples = 10 - batch_size_100 = 10 + batch_size = 5 + temperature = 300 + n_clusters = 5 } -// Container configurations -docker { - enabled = true - runOptions = '--gpus all' -} +profiles { + standard { + docker { + enabled = true + runOptions = '--gpus all' + } + } -// Process configurations -process { - cpus = 1 - memory = '8 GB' -} + k8s { + process { + executor = 'k8s' + } + docker { + enabled = true + } + } -// Execution configurations -executor { - $local { - cpus = 4 - memory = '8 GB' + k8s_gpu { + process { + executor = 'k8s' + pod = [[nodeSelector: 'nvidia.com/gpu.present=true']] + accelerator = [request: 1, type: 'nvidia.com/gpu'] + } + docker { + enabled = true + } } } + +process { + container = 'harbor.cluster.omic.ai/omic/bioemu:latest' + cpus = 2 + memory = '8 GB' +} diff --git a/params.json b/params.json index 65877f2..030ee7b 100644 --- a/params.json +++ b/params.json @@ -1,78 +1,81 @@ { "params": { - "fasta_list": { - "type": "file[]", - "description": "FASTA files containing protein sequences", - "default": [], + "protein1_fasta": { + "type": "file", + "description": "FASTA file for protein 1", + "default": "/omic/eureka/bioemu/input/trp_cage.fasta", "required": true, "pipeline_io": "input", - "var_name": "params.fasta_list", + "var_name": "params.protein1_fasta", "examples": [ - ["/omic/olamide/examples/prot1.fasta", "/omic/olamide/examples/prot2.fasta"] + "/omic/eureka/bioemu/input/trp_cage.fasta" ], "pattern": ".*\\.fasta$", "validation": {}, - "notes": "Select one or more FASTA files with protein sequences" + "notes": "FASTA file containing the first protein sequence" + }, + "protein2_fasta": { + "type": "file", + "description": "FASTA file for protein 2", + "default": "/omic/eureka/bioemu/input/villin_headpiece.fasta", + "required": true, + "pipeline_io": "input", + "var_name": "params.protein2_fasta", + "examples": [ + "/omic/eureka/bioemu/input/villin_headpiece.fasta" + ], + "pattern": ".*\\.fasta$", + "validation": {}, + "notes": "FASTA file containing the second protein sequence" }, "outdir": { "type": "folder", - "description": "Output Directory", - "default": "/omic/olamide/output", + "description": "Output directory for results", + "default": "/omic/eureka/bioemu/output", "required": true, "pipeline_io": "output", "var_name": "params.outdir", "examples": [ - "/omic/olamide/output" + "/omic/eureka/bioemu/output" ], "pattern": ".*", "validation": {}, - "notes": "Select where to save your analysis results" + "notes": "Directory where prediction results will be stored" + }, + "complex_name": { + "type": "string", + "description": "Name for this protein complex", + "default": "protein_complex", + "required": false, + "pipeline_io": "parameter", + "var_name": "params.complex_name", + "examples": [ + "protein_complex" + ] }, "num_samples": { "type": "integer", "description": "Number of protein structure samples", "default": 10, - "required": true, - "pipeline_io": "parameter", - "var_name": "params.num_samples", - "examples": [ - "10" - ], - "pattern": "^\\d+$", - "validation": { - "min": 1 - }, - "notes": "More samples provide better coverage of conformational space" - }, - "batch_size_100": { - "type": "integer", - "description": "Batch size parameter", - "default": 10, "required": false, "pipeline_io": "parameter", - "var_name": "params.batch_size_100", - "hidden": true, - "examples": [ - "10" - ], - "pattern": "^\\d+$" + "var_name": "params.num_samples", + "examples": ["10"], + "pattern": "^\\d+$", + "validation": {"min": 1}, + "notes": "More samples provide better coverage of conformational space" }, "temperature": { "type": "integer", - "description": "Temperature (K) for free energy", + "description": "Temperature (K) for free energy calculations", "default": 300, "required": false, "pipeline_io": "parameter", "var_name": "params.temperature", - "examples": [ - "300" - ], + "examples": ["300"], "pattern": "^\\d+$", - "validation": { - "min": 200, - "max": 500 - }, - "notes": "Temperature in Kelvin for free energy calculations" + "validation": {"min": 200, "max": 500}, + "notes": "Temperature in Kelvin" }, "n_clusters": { "type": "integer", @@ -81,26 +84,19 @@ "required": false, "pipeline_io": "parameter", "var_name": "params.n_clusters", - "examples": [ - "5" - ], + "examples": ["5"], "pattern": "^\\d+$", - "validation": { - "min": 2 - }, - "notes": "Number of clusters for free energy analysis" + "validation": {"min": 2} }, - "cache_dir": { - "type": "folder", - "description": "Embeddings cache directory", - "default": "/tmp/bioemu_cache", + "exp_dG": { + "type": "number", + "description": "Experimental binding free energy (kcal/mol)", + "default": -10.0, "required": false, "pipeline_io": "parameter", - "var_name": "params.cache_dir", - "hidden": true, - "examples": [ - "/tmp/bioemu_cache" - ] + "var_name": "params.exp_dG", + "examples": ["-10.0", "-13.1"], + "notes": "Reference experimental value for comparison" } } }