Implement interface-adaptive binding energy calculator
This commit is contained in:
140
test.nf
Normal file
140
test.nf
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env nextflow
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// Define parameters
|
||||
params.complex_name = "mhc_tcr"
|
||||
params.protein1_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/mhc_peptide.fasta"
|
||||
params.protein2_fasta = "/mnt/OmicNAS/private/old/olamide/bioemu/input/complexes/tcr_jm22.fasta"
|
||||
params.exp_dG = -7.21 // kcal/mol from experimental database
|
||||
params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output"
|
||||
params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache"
|
||||
params.num_samples = 20
|
||||
params.batch_size = 5
|
||||
params.temperature = 300
|
||||
params.n_clusters = 5
|
||||
|
||||
// First process: Generate structure for protein1
|
||||
process GENERATE_PROTEIN1 {
|
||||
container 'bioemu:latest'
|
||||
containerOptions '--rm --gpus all -v /mnt:/mnt'
|
||||
publishDir "${params.outdir}/${params.complex_name}/protein1", mode: 'copy'
|
||||
|
||||
output:
|
||||
path "protein1_topology.pdb", emit: topology
|
||||
path "protein1_samples.xtc", emit: samples
|
||||
|
||||
script:
|
||||
"""
|
||||
# Extract sequence from FASTA
|
||||
SEQUENCE=\$(grep -v ">" ${params.protein1_fasta} | tr -d '\\n')
|
||||
|
||||
# Run BioEmu
|
||||
python3 -m bioemu.sample \\
|
||||
--sequence "\${SEQUENCE}" \\
|
||||
--num_samples ${params.num_samples} \\
|
||||
--batch_size_100 ${params.batch_size} \\
|
||||
--output_dir . \\
|
||||
--cache_embeds_dir ${params.cache_dir}
|
||||
|
||||
# Rename output files
|
||||
mv topology.pdb protein1_topology.pdb
|
||||
mv samples.xtc protein1_samples.xtc
|
||||
"""
|
||||
}
|
||||
|
||||
// Second process: Generate structure for protein2
|
||||
process GENERATE_PROTEIN2 {
|
||||
container 'bioemu:latest'
|
||||
containerOptions '--rm --gpus all -v /mnt:/mnt'
|
||||
publishDir "${params.outdir}/${params.complex_name}/protein2", mode: 'copy'
|
||||
|
||||
output:
|
||||
path "protein2_topology.pdb", emit: topology
|
||||
path "protein2_samples.xtc", emit: samples
|
||||
|
||||
script:
|
||||
"""
|
||||
# Extract sequence from FASTA
|
||||
SEQUENCE=\$(grep -v ">" ${params.protein2_fasta} | tr -d '\\n')
|
||||
|
||||
# Run BioEmu
|
||||
python3 -m bioemu.sample \\
|
||||
--sequence "\${SEQUENCE}" \\
|
||||
--num_samples ${params.num_samples} \\
|
||||
--batch_size_100 ${params.batch_size} \\
|
||||
--output_dir . \\
|
||||
--cache_embeds_dir ${params.cache_dir}
|
||||
|
||||
# Rename output files
|
||||
mv topology.pdb protein2_topology.pdb
|
||||
mv samples.xtc protein2_samples.xtc
|
||||
"""
|
||||
}
|
||||
|
||||
// Third process: Calculate binding energy
|
||||
process CALCULATE_BINDING {
|
||||
container 'bioemu:latest'
|
||||
containerOptions '--rm --gpus all -v /mnt:/mnt -v /data:/data'
|
||||
publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy'
|
||||
|
||||
input:
|
||||
path protein1_topology
|
||||
path protein1_samples
|
||||
path protein2_topology
|
||||
path protein2_samples
|
||||
|
||||
output:
|
||||
path "binding_energy.csv"
|
||||
path "binding_energy_report.txt"
|
||||
path "energy_comparison.png"
|
||||
|
||||
script:
|
||||
"""
|
||||
# Run binding energy calculation
|
||||
python3 /data/olamide/fresh-bioemu2/scripts/calculate_binding.py \\
|
||||
--protein1_topology ${protein1_topology} \\
|
||||
--protein1_samples ${protein1_samples} \\
|
||||
--protein2_topology ${protein2_topology} \\
|
||||
--protein2_samples ${protein2_samples} \\
|
||||
--temperature ${params.temperature} \\
|
||||
--n_clusters ${params.n_clusters} \\
|
||||
--output binding_energy.csv \\
|
||||
--plot energy_comparison.png
|
||||
|
||||
# Generate report
|
||||
echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt
|
||||
echo "======================================================" >> binding_energy_report.txt
|
||||
echo "## Experimental Value (Database)" >> binding_energy_report.txt
|
||||
echo "ΔG = ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
|
||||
echo "" >> binding_energy_report.txt
|
||||
|
||||
# Extract predicted value
|
||||
PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2)
|
||||
|
||||
echo "## BioEmu Prediction" >> binding_energy_report.txt
|
||||
echo "ΔG = \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
|
||||
echo "" >> binding_energy_report.txt
|
||||
|
||||
# Calculate comparison metrics
|
||||
echo "## Comparison" >> binding_energy_report.txt
|
||||
ABS_DIFF=\$(python3 -c "print('%.2f' % abs(float('\${PREDICTED_DG}') - (${params.exp_dG})))")
|
||||
REL_ERROR=\$(python3 -c "print('%.2f' % (((float('\${PREDICTED_DG}') - (${params.exp_dG}))/(${params.exp_dG}))*100))")
|
||||
|
||||
echo "Absolute Difference: \${ABS_DIFF} kcal/mol" >> binding_energy_report.txt
|
||||
echo "Relative Error: \${REL_ERROR}%" >> binding_energy_report.txt
|
||||
"""
|
||||
}
|
||||
|
||||
workflow {
|
||||
// Generate structures for both proteins
|
||||
protein1_results = GENERATE_PROTEIN1()
|
||||
protein2_results = GENERATE_PROTEIN2()
|
||||
|
||||
// Calculate binding energy
|
||||
CALCULATE_BINDING(
|
||||
protein1_results.topology,
|
||||
protein1_results.samples,
|
||||
protein2_results.topology,
|
||||
protein2_results.samples
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user