#!/usr/bin/env nextflow nextflow.enable.dsl=2 // Define parameters params.complex_name = "mhc_tcr" params.protein1_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/mhc_peptide.fasta" params.protein2_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/tcr_jm22.fasta" params.exp_dG = -7.21 // kcal/mol from experimental database params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output" params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache" params.num_samples = 20 params.batch_size = 5 params.temperature = 300 params.n_clusters = 5 // First process: Generate structure for protein1 process GENERATE_PROTEIN1 { container 'bioemu:latest' containerOptions '--rm --gpus all -v /mnt:/mnt' publishDir "${params.outdir}/${params.complex_name}/protein1", mode: 'copy' output: path "protein1_topology.pdb", emit: topology path "protein1_samples.xtc", emit: samples script: """ # Extract sequence from FASTA SEQUENCE=\$(grep -v ">" ${params.protein1_fasta} | tr -d '\\n') # Run BioEmu python3 -m bioemu.sample \\ --sequence "\${SEQUENCE}" \\ --num_samples ${params.num_samples} \\ --batch_size_100 ${params.batch_size} \\ --output_dir . \\ --cache_embeds_dir ${params.cache_dir} # Rename output files mv topology.pdb protein1_topology.pdb mv samples.xtc protein1_samples.xtc """ } // Second process: Generate structure for protein2 process GENERATE_PROTEIN2 { container 'bioemu:latest' containerOptions '--rm --gpus all -v /mnt:/mnt' publishDir "${params.outdir}/${params.complex_name}/protein2", mode: 'copy' output: path "protein2_topology.pdb", emit: topology path "protein2_samples.xtc", emit: samples script: """ # Extract sequence from FASTA SEQUENCE=\$(grep -v ">" ${params.protein2_fasta} | tr -d '\\n') # Run BioEmu python3 -m bioemu.sample \\ --sequence "\${SEQUENCE}" \\ --num_samples ${params.num_samples} \\ --batch_size_100 ${params.batch_size} \\ --output_dir . \\ --cache_embeds_dir ${params.cache_dir} # Rename output files mv topology.pdb protein2_topology.pdb mv samples.xtc protein2_samples.xtc """ } // Third process: Calculate binding energy process CALCULATE_BINDING { container 'bioemu:latest' containerOptions '--rm --gpus all -v /mnt:/mnt -v /data:/data' publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy' input: path protein1_topology path protein1_samples path protein2_topology path protein2_samples output: path "binding_energy.csv" path "binding_energy_report.txt" path "energy_comparison.png" script: """ # Run binding energy calculation python3 /data/olamide/fresh-bioemu2/scripts/calculate_binding.py \\ --protein1_topology ${protein1_topology} \\ --protein1_samples ${protein1_samples} \\ --protein2_topology ${protein2_topology} \\ --protein2_samples ${protein2_samples} \\ --temperature ${params.temperature} \\ --n_clusters ${params.n_clusters} \\ --output binding_energy.csv \\ --plot energy_comparison.png # Generate report echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt echo "======================================================" >> binding_energy_report.txt echo "## Experimental Value (Database)" >> binding_energy_report.txt echo "ΔG = ${params.exp_dG} kcal/mol" >> binding_energy_report.txt echo "" >> binding_energy_report.txt # Extract predicted value PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2) echo "## BioEmu Prediction" >> binding_energy_report.txt echo "ΔG = \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt echo "" >> binding_energy_report.txt # Calculate comparison metrics echo "## Comparison" >> binding_energy_report.txt ABS_DIFF=\$(python3 -c "print('%.2f' % abs(float('\${PREDICTED_DG}') - (${params.exp_dG})))") REL_ERROR=\$(python3 -c "print('%.2f' % (((float('\${PREDICTED_DG}') - (${params.exp_dG}))/(${params.exp_dG}))*100))") echo "Absolute Difference: \${ABS_DIFF} kcal/mol" >> binding_energy_report.txt echo "Relative Error: \${REL_ERROR}%" >> binding_energy_report.txt """ } workflow { // Generate structures for both proteins protein1_results = GENERATE_PROTEIN1() protein2_results = GENERATE_PROTEIN2() // Calculate binding energy CALCULATE_BINDING( protein1_results.topology, protein1_results.samples, protein2_results.topology, protein2_results.samples ) }