119 lines
3.5 KiB
Plaintext
119 lines
3.5 KiB
Plaintext
#!/usr/bin/env nextflow
|
|
nextflow.enable.dsl=2
|
|
|
|
params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
|
|
params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
|
|
params.complex_name = 'protein_complex'
|
|
params.exp_dG = -10.0
|
|
params.outdir = '/omic/eureka/bioemu/output'
|
|
params.cache_dir = '/tmp/bioemu_cache'
|
|
params.num_samples = 10
|
|
params.batch_size = 5
|
|
params.temperature = 300
|
|
params.n_clusters = 5
|
|
|
|
process GENERATE_STRUCTURE_1 {
|
|
container 'harbor.cluster.omic.ai/omic/bioemu:v3'
|
|
publishDir "${params.outdir}/${params.complex_name}/protein1", mode: 'copy'
|
|
|
|
input:
|
|
path fasta
|
|
|
|
output:
|
|
path "protein1_topology.pdb", emit: topology
|
|
path "protein1_samples.xtc", emit: samples
|
|
|
|
script:
|
|
"""
|
|
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
|
mkdir -p ${params.cache_dir}
|
|
|
|
python3 -m bioemu.sample \\
|
|
--sequence "\${SEQUENCE}" \\
|
|
--num_samples ${params.num_samples} \\
|
|
--batch_size_100 ${params.batch_size} \\
|
|
--output_dir . \\
|
|
--cache_embeds_dir ${params.cache_dir}
|
|
|
|
mv topology.pdb protein1_topology.pdb
|
|
mv samples.xtc protein1_samples.xtc
|
|
"""
|
|
}
|
|
|
|
process GENERATE_STRUCTURE_2 {
|
|
container 'harbor.cluster.omic.ai/omic/bioemu:v3'
|
|
publishDir "${params.outdir}/${params.complex_name}/protein2", mode: 'copy'
|
|
|
|
input:
|
|
path fasta
|
|
|
|
output:
|
|
path "protein2_topology.pdb", emit: topology
|
|
path "protein2_samples.xtc", emit: samples
|
|
|
|
script:
|
|
"""
|
|
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
|
mkdir -p ${params.cache_dir}
|
|
|
|
python3 -m bioemu.sample \\
|
|
--sequence "\${SEQUENCE}" \\
|
|
--num_samples ${params.num_samples} \\
|
|
--batch_size_100 ${params.batch_size} \\
|
|
--output_dir . \\
|
|
--cache_embeds_dir ${params.cache_dir}
|
|
|
|
mv topology.pdb protein2_topology.pdb
|
|
mv samples.xtc protein2_samples.xtc
|
|
"""
|
|
}
|
|
|
|
process CALCULATE_BINDING {
|
|
container 'harbor.cluster.omic.ai/omic/bioemu:v3'
|
|
publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy'
|
|
|
|
input:
|
|
path protein1_topology
|
|
path protein1_samples
|
|
path protein2_topology
|
|
path protein2_samples
|
|
|
|
output:
|
|
path "binding_energy.csv"
|
|
path "binding_energy_report.txt"
|
|
path "energy_comparison.png"
|
|
|
|
script:
|
|
"""
|
|
python3 /opt/bioemu/scripts/calculate_binding.py \\
|
|
--protein1_topology ${protein1_topology} \\
|
|
--protein1_samples ${protein1_samples} \\
|
|
--protein2_topology ${protein2_topology} \\
|
|
--protein2_samples ${protein2_samples} \\
|
|
--temperature ${params.temperature} \\
|
|
--n_clusters ${params.n_clusters} \\
|
|
--output binding_energy.csv \\
|
|
--plot energy_comparison.png
|
|
|
|
echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt
|
|
echo "Experimental: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
|
|
PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2)
|
|
echo "Predicted: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
|
|
"""
|
|
}
|
|
|
|
workflow {
|
|
fasta1 = Channel.fromPath(params.protein1_fasta)
|
|
fasta2 = Channel.fromPath(params.protein2_fasta)
|
|
|
|
GENERATE_STRUCTURE_1(fasta1)
|
|
GENERATE_STRUCTURE_2(fasta2)
|
|
|
|
CALCULATE_BINDING(
|
|
GENERATE_STRUCTURE_1.out.topology,
|
|
GENERATE_STRUCTURE_1.out.samples,
|
|
GENERATE_STRUCTURE_2.out.topology,
|
|
GENERATE_STRUCTURE_2.out.samples
|
|
)
|
|
}
|