Configure bioemu for WES deployment
- Update main.nf: Harbor container image, PVC mount paths, remove containerOptions and stageInMode - Update nextflow.config: Add k8s/k8s_gpu profiles, minimal config for WES injection - Update params.json: Correct paths for eureka PVC - Update Dockerfile.wes: CUDA base image for GPU support
This commit is contained in:
@@ -1,15 +1,14 @@
|
|||||||
FROM python:3.11-slim
|
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
wget git python3 python3-pip build-essential curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /opt/bioemu
|
WORKDIR /opt/bioemu
|
||||||
|
|
||||||
# Install minimal system deps
|
RUN python3 -m pip install --upgrade pip && \
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
gcc g++ && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install bioemu with CPU-only PyTorch first (smaller)
|
|
||||||
# The k8s GPU nodes will have CUDA drivers available
|
|
||||||
RUN pip install --no-cache-dir --upgrade pip && \
|
|
||||||
pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
|
|
||||||
pip install --no-cache-dir bioemu mdtraj scikit-learn pandas matplotlib seaborn
|
pip install --no-cache-dir bioemu mdtraj scikit-learn pandas matplotlib seaborn
|
||||||
|
|
||||||
RUN mkdir -p /opt/bioemu/scripts/ /data /results && chmod -R 777 /data /results
|
RUN mkdir -p /opt/bioemu/scripts/ /data /results && chmod -R 777 /data /results
|
||||||
|
|||||||
31
main.nf
31
main.nf
@@ -1,23 +1,22 @@
|
|||||||
#!/usr/bin/env nextflow
|
#!/usr/bin/env nextflow
|
||||||
nextflow.enable.dsl=2
|
nextflow.enable.dsl=2
|
||||||
|
|
||||||
// Define parameters
|
// Define parameters — PVC mount paths for k8s execution
|
||||||
params.complex_name = "hgh_mab1" // Default complex name
|
params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
|
||||||
params.protein1_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/hgh.fasta"
|
params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
|
||||||
params.protein2_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/mab1.fasta"
|
params.complex_name = "protein_complex"
|
||||||
params.exp_dG = -13.1 // kcal/mol from experimental database
|
params.exp_dG = -10.0 // kcal/mol (placeholder experimental value)
|
||||||
params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output"
|
params.outdir = '/omic/eureka/bioemu/output'
|
||||||
params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache"
|
params.cache_dir = '/tmp/bioemu_cache'
|
||||||
|
|
||||||
// Parameters for structure generation and analysis
|
// Parameters for structure generation and analysis
|
||||||
params.num_samples = 20
|
params.num_samples = 10
|
||||||
params.batch_size = 5
|
params.batch_size = 5
|
||||||
params.temperature = 300
|
params.temperature = 300
|
||||||
params.n_clusters = 5
|
params.n_clusters = 5
|
||||||
|
|
||||||
process GENERATE_STRUCTURE {
|
process GENERATE_STRUCTURE {
|
||||||
container 'bioemu:latest'
|
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
|
||||||
containerOptions '--rm --gpus all -v /mnt:/mnt'
|
|
||||||
publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy'
|
publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy'
|
||||||
|
|
||||||
input:
|
input:
|
||||||
@@ -31,6 +30,9 @@ process GENERATE_STRUCTURE {
|
|||||||
# Extract sequence from FASTA
|
# Extract sequence from FASTA
|
||||||
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
||||||
|
|
||||||
|
# Create cache dir
|
||||||
|
mkdir -p ${params.cache_dir}
|
||||||
|
|
||||||
# Run BioEmu
|
# Run BioEmu
|
||||||
python3 -m bioemu.sample \\
|
python3 -m bioemu.sample \\
|
||||||
--sequence "\${SEQUENCE}" \\
|
--sequence "\${SEQUENCE}" \\
|
||||||
@@ -46,8 +48,7 @@ process GENERATE_STRUCTURE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
process CALCULATE_BINDING {
|
process CALCULATE_BINDING {
|
||||||
container 'bioemu:latest'
|
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
|
||||||
containerOptions '--rm --gpus all -v /mnt:/mnt -v /data:/data'
|
|
||||||
publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy'
|
publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy'
|
||||||
|
|
||||||
input:
|
input:
|
||||||
@@ -63,8 +64,8 @@ process CALCULATE_BINDING {
|
|||||||
|
|
||||||
script:
|
script:
|
||||||
"""
|
"""
|
||||||
# Run binding energy calculation with the existing script
|
# Run binding energy calculation
|
||||||
python3 /data/olamide/fresh-bioemu2/scripts/calculate_binding.py \\
|
python3 /opt/bioemu/scripts/calculate_binding.py \\
|
||||||
--protein1_topology ${protein1_topology} \\
|
--protein1_topology ${protein1_topology} \\
|
||||||
--protein1_samples ${protein1_samples} \\
|
--protein1_samples ${protein1_samples} \\
|
||||||
--protein2_topology ${protein2_topology} \\
|
--protein2_topology ${protein2_topology} \\
|
||||||
@@ -119,7 +120,7 @@ workflow {
|
|||||||
.map { it -> tuple(it[1], it[2]) }
|
.map { it -> tuple(it[1], it[2]) }
|
||||||
.first()
|
.first()
|
||||||
|
|
||||||
// Calculate binding energy (direct script reference)
|
// Calculate binding energy
|
||||||
CALCULATE_BINDING(
|
CALCULATE_BINDING(
|
||||||
protein1_files[0], // protein1_topology.pdb
|
protein1_files[0], // protein1_topology.pdb
|
||||||
protein1_files[1], // protein1_samples.xtc
|
protein1_files[1], // protein1_samples.xtc
|
||||||
|
|||||||
@@ -1,38 +1,55 @@
|
|||||||
// Manifest for Nextflow metadata
|
|
||||||
manifest {
|
manifest {
|
||||||
name = 'BioEmu-Nextflow'
|
name = 'bioemu'
|
||||||
author = 'Generated from BioEmu repository'
|
author = 'Olamide'
|
||||||
homePage = 'https://github.com/microsoft/bioemu'
|
description = 'BioEmu - Biomolecular Emulator for protein structure sampling and binding energy analysis'
|
||||||
description = 'Nextflow pipeline for BioEmu - Biomolecular Emulator for protein structure sampling'
|
|
||||||
mainScript = 'main.nf'
|
mainScript = 'main.nf'
|
||||||
version = '1.0.0'
|
version = '1.0.0'
|
||||||
}
|
}
|
||||||
|
|
||||||
// Global default parameters
|
|
||||||
params {
|
params {
|
||||||
fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/villin_headpiece.fasta"
|
protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
|
||||||
outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output"
|
protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
|
||||||
cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache"
|
complex_name = 'protein_complex'
|
||||||
|
exp_dG = -10.0
|
||||||
|
outdir = '/omic/eureka/bioemu/output'
|
||||||
|
cache_dir = '/tmp/bioemu_cache'
|
||||||
num_samples = 10
|
num_samples = 10
|
||||||
batch_size_100 = 10
|
batch_size = 5
|
||||||
|
temperature = 300
|
||||||
|
n_clusters = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
// Container configurations
|
profiles {
|
||||||
|
standard {
|
||||||
docker {
|
docker {
|
||||||
enabled = true
|
enabled = true
|
||||||
runOptions = '--gpus all'
|
runOptions = '--gpus all'
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Process configurations
|
k8s {
|
||||||
process {
|
process {
|
||||||
cpus = 1
|
executor = 'k8s'
|
||||||
memory = '8 GB'
|
}
|
||||||
|
docker {
|
||||||
|
enabled = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execution configurations
|
k8s_gpu {
|
||||||
executor {
|
process {
|
||||||
$local {
|
executor = 'k8s'
|
||||||
cpus = 4
|
pod = [[nodeSelector: 'nvidia.com/gpu.present=true']]
|
||||||
|
accelerator = [request: 1, type: 'nvidia.com/gpu']
|
||||||
|
}
|
||||||
|
docker {
|
||||||
|
enabled = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process {
|
||||||
|
container = 'harbor.cluster.omic.ai/omic/bioemu:latest'
|
||||||
|
cpus = 2
|
||||||
memory = '8 GB'
|
memory = '8 GB'
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|||||||
112
params.json
112
params.json
@@ -1,78 +1,81 @@
|
|||||||
{
|
{
|
||||||
"params": {
|
"params": {
|
||||||
"fasta_list": {
|
"protein1_fasta": {
|
||||||
"type": "file[]",
|
"type": "file",
|
||||||
"description": "FASTA files containing protein sequences",
|
"description": "FASTA file for protein 1",
|
||||||
"default": [],
|
"default": "/omic/eureka/bioemu/input/trp_cage.fasta",
|
||||||
"required": true,
|
"required": true,
|
||||||
"pipeline_io": "input",
|
"pipeline_io": "input",
|
||||||
"var_name": "params.fasta_list",
|
"var_name": "params.protein1_fasta",
|
||||||
"examples": [
|
"examples": [
|
||||||
["/omic/olamide/examples/prot1.fasta", "/omic/olamide/examples/prot2.fasta"]
|
"/omic/eureka/bioemu/input/trp_cage.fasta"
|
||||||
],
|
],
|
||||||
"pattern": ".*\\.fasta$",
|
"pattern": ".*\\.fasta$",
|
||||||
"validation": {},
|
"validation": {},
|
||||||
"notes": "Select one or more FASTA files with protein sequences"
|
"notes": "FASTA file containing the first protein sequence"
|
||||||
|
},
|
||||||
|
"protein2_fasta": {
|
||||||
|
"type": "file",
|
||||||
|
"description": "FASTA file for protein 2",
|
||||||
|
"default": "/omic/eureka/bioemu/input/villin_headpiece.fasta",
|
||||||
|
"required": true,
|
||||||
|
"pipeline_io": "input",
|
||||||
|
"var_name": "params.protein2_fasta",
|
||||||
|
"examples": [
|
||||||
|
"/omic/eureka/bioemu/input/villin_headpiece.fasta"
|
||||||
|
],
|
||||||
|
"pattern": ".*\\.fasta$",
|
||||||
|
"validation": {},
|
||||||
|
"notes": "FASTA file containing the second protein sequence"
|
||||||
},
|
},
|
||||||
"outdir": {
|
"outdir": {
|
||||||
"type": "folder",
|
"type": "folder",
|
||||||
"description": "Output Directory",
|
"description": "Output directory for results",
|
||||||
"default": "/omic/olamide/output",
|
"default": "/omic/eureka/bioemu/output",
|
||||||
"required": true,
|
"required": true,
|
||||||
"pipeline_io": "output",
|
"pipeline_io": "output",
|
||||||
"var_name": "params.outdir",
|
"var_name": "params.outdir",
|
||||||
"examples": [
|
"examples": [
|
||||||
"/omic/olamide/output"
|
"/omic/eureka/bioemu/output"
|
||||||
],
|
],
|
||||||
"pattern": ".*",
|
"pattern": ".*",
|
||||||
"validation": {},
|
"validation": {},
|
||||||
"notes": "Select where to save your analysis results"
|
"notes": "Directory where prediction results will be stored"
|
||||||
|
},
|
||||||
|
"complex_name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Name for this protein complex",
|
||||||
|
"default": "protein_complex",
|
||||||
|
"required": false,
|
||||||
|
"pipeline_io": "parameter",
|
||||||
|
"var_name": "params.complex_name",
|
||||||
|
"examples": [
|
||||||
|
"protein_complex"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"num_samples": {
|
"num_samples": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "Number of protein structure samples",
|
"description": "Number of protein structure samples",
|
||||||
"default": 10,
|
"default": 10,
|
||||||
"required": true,
|
|
||||||
"pipeline_io": "parameter",
|
|
||||||
"var_name": "params.num_samples",
|
|
||||||
"examples": [
|
|
||||||
"10"
|
|
||||||
],
|
|
||||||
"pattern": "^\\d+$",
|
|
||||||
"validation": {
|
|
||||||
"min": 1
|
|
||||||
},
|
|
||||||
"notes": "More samples provide better coverage of conformational space"
|
|
||||||
},
|
|
||||||
"batch_size_100": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "Batch size parameter",
|
|
||||||
"default": 10,
|
|
||||||
"required": false,
|
"required": false,
|
||||||
"pipeline_io": "parameter",
|
"pipeline_io": "parameter",
|
||||||
"var_name": "params.batch_size_100",
|
"var_name": "params.num_samples",
|
||||||
"hidden": true,
|
"examples": ["10"],
|
||||||
"examples": [
|
"pattern": "^\\d+$",
|
||||||
"10"
|
"validation": {"min": 1},
|
||||||
],
|
"notes": "More samples provide better coverage of conformational space"
|
||||||
"pattern": "^\\d+$"
|
|
||||||
},
|
},
|
||||||
"temperature": {
|
"temperature": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "Temperature (K) for free energy",
|
"description": "Temperature (K) for free energy calculations",
|
||||||
"default": 300,
|
"default": 300,
|
||||||
"required": false,
|
"required": false,
|
||||||
"pipeline_io": "parameter",
|
"pipeline_io": "parameter",
|
||||||
"var_name": "params.temperature",
|
"var_name": "params.temperature",
|
||||||
"examples": [
|
"examples": ["300"],
|
||||||
"300"
|
|
||||||
],
|
|
||||||
"pattern": "^\\d+$",
|
"pattern": "^\\d+$",
|
||||||
"validation": {
|
"validation": {"min": 200, "max": 500},
|
||||||
"min": 200,
|
"notes": "Temperature in Kelvin"
|
||||||
"max": 500
|
|
||||||
},
|
|
||||||
"notes": "Temperature in Kelvin for free energy calculations"
|
|
||||||
},
|
},
|
||||||
"n_clusters": {
|
"n_clusters": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
@@ -81,26 +84,19 @@
|
|||||||
"required": false,
|
"required": false,
|
||||||
"pipeline_io": "parameter",
|
"pipeline_io": "parameter",
|
||||||
"var_name": "params.n_clusters",
|
"var_name": "params.n_clusters",
|
||||||
"examples": [
|
"examples": ["5"],
|
||||||
"5"
|
|
||||||
],
|
|
||||||
"pattern": "^\\d+$",
|
"pattern": "^\\d+$",
|
||||||
"validation": {
|
"validation": {"min": 2}
|
||||||
"min": 2
|
|
||||||
},
|
},
|
||||||
"notes": "Number of clusters for free energy analysis"
|
"exp_dG": {
|
||||||
},
|
"type": "number",
|
||||||
"cache_dir": {
|
"description": "Experimental binding free energy (kcal/mol)",
|
||||||
"type": "folder",
|
"default": -10.0,
|
||||||
"description": "Embeddings cache directory",
|
|
||||||
"default": "/tmp/bioemu_cache",
|
|
||||||
"required": false,
|
"required": false,
|
||||||
"pipeline_io": "parameter",
|
"pipeline_io": "parameter",
|
||||||
"var_name": "params.cache_dir",
|
"var_name": "params.exp_dG",
|
||||||
"hidden": true,
|
"examples": ["-10.0", "-13.1"],
|
||||||
"examples": [
|
"notes": "Reference experimental value for comparison"
|
||||||
"/tmp/bioemu_cache"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user