Files
bioemu/main.nf

94 lines
3.2 KiB
Plaintext

#!/usr/bin/env nextflow
nextflow.enable.dsl=2
// Define parameters — PVC mount paths for k8s execution
params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
params.complex_name = 'protein_complex'
params.exp_dG = -10.0
params.outdir = '/omic/eureka/bioemu/output'
params.cache_dir = '/tmp/bioemu_cache'
params.num_samples = 10
params.batch_size = 5
params.temperature = 300
params.n_clusters = 5
process GENERATE_STRUCTURE {
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
publishDir "${params.outdir}/${params.complex_name}", mode: 'copy'
input:
tuple val(protein_id), path(fasta)
output:
tuple val(protein_id), path("${protein_id}_topology.pdb"), path("${protein_id}_samples.xtc")
script:
"""
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
mkdir -p ${params.cache_dir}
python3 -m bioemu.sample \\
--sequence "\${SEQUENCE}" \\
--num_samples ${params.num_samples} \\
--batch_size_100 ${params.batch_size} \\
--output_dir . \\
--cache_embeds_dir ${params.cache_dir}
mv topology.pdb ${protein_id}_topology.pdb
mv samples.xtc ${protein_id}_samples.xtc
"""
}
process CALCULATE_BINDING {
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy'
input:
path protein1_topology
path protein1_samples
path protein2_topology
path protein2_samples
output:
path "binding_energy.csv"
path "binding_energy_report.txt"
path "energy_comparison.png"
script:
"""
python3 /opt/bioemu/scripts/calculate_binding.py \\
--protein1_topology ${protein1_topology} \\
--protein1_samples ${protein1_samples} \\
--protein2_topology ${protein2_topology} \\
--protein2_samples ${protein2_samples} \\
--temperature ${params.temperature} \\
--n_clusters ${params.n_clusters} \\
--output binding_energy.csv \\
--plot energy_comparison.png
echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt
echo "======================================================" >> binding_energy_report.txt
echo "## Experimental Value: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
echo "" >> binding_energy_report.txt
PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2)
echo "## BioEmu Prediction: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
"""
}
workflow {
protein1_ch = Channel.fromPath(params.protein1_fasta)
.map { fasta -> tuple("protein1", fasta) }
protein2_ch = Channel.fromPath(params.protein2_fasta)
.map { fasta -> tuple("protein2", fasta) }
all_proteins = protein1_ch.mix(protein2_ch)
GENERATE_STRUCTURE(all_proteins)
p1 = GENERATE_STRUCTURE.out.filter { it[0] == "protein1" }.map { [it[1], it[2]] }.first()
p2 = GENERATE_STRUCTURE.out.filter { it[0] == "protein2" }.map { [it[1], it[2]] }.first()
CALCULATE_BINDING(p1[0], p1[1], p2[0], p2[1])
}