#!/usr/bin/env nextflow nextflow.enable.dsl=2 params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta' params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta' params.complex_name = 'protein_complex' params.exp_dG = -10.0 params.outdir = '/omic/eureka/bioemu/output' params.cache_dir = '/tmp/bioemu_cache' params.num_samples = 10 params.batch_size = 5 params.temperature = 300 params.n_clusters = 5 process GENERATE_STRUCTURE_1 { container 'harbor.cluster.omic.ai/omic/bioemu:latest' publishDir "${params.outdir}/${params.complex_name}/protein1", mode: 'copy' input: path fasta output: path "protein1_topology.pdb", emit: topology path "protein1_samples.xtc", emit: samples script: """ SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n') mkdir -p ${params.cache_dir} python3 -m bioemu.sample \\ --sequence "\${SEQUENCE}" \\ --num_samples ${params.num_samples} \\ --batch_size_100 ${params.batch_size} \\ --output_dir . \\ --cache_embeds_dir ${params.cache_dir} mv topology.pdb protein1_topology.pdb mv samples.xtc protein1_samples.xtc """ } process GENERATE_STRUCTURE_2 { container 'harbor.cluster.omic.ai/omic/bioemu:latest' publishDir "${params.outdir}/${params.complex_name}/protein2", mode: 'copy' input: path fasta output: path "protein2_topology.pdb", emit: topology path "protein2_samples.xtc", emit: samples script: """ SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n') mkdir -p ${params.cache_dir} python3 -m bioemu.sample \\ --sequence "\${SEQUENCE}" \\ --num_samples ${params.num_samples} \\ --batch_size_100 ${params.batch_size} \\ --output_dir . \\ --cache_embeds_dir ${params.cache_dir} mv topology.pdb protein2_topology.pdb mv samples.xtc protein2_samples.xtc """ } process CALCULATE_BINDING { container 'harbor.cluster.omic.ai/omic/bioemu:latest' publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy' input: path protein1_topology path protein1_samples path protein2_topology path protein2_samples output: path "binding_energy.csv" path "binding_energy_report.txt" path "energy_comparison.png" script: """ python3 /opt/bioemu/scripts/calculate_binding.py \\ --protein1_topology ${protein1_topology} \\ --protein1_samples ${protein1_samples} \\ --protein2_topology ${protein2_topology} \\ --protein2_samples ${protein2_samples} \\ --temperature ${params.temperature} \\ --n_clusters ${params.n_clusters} \\ --output binding_energy.csv \\ --plot energy_comparison.png echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt echo "Experimental: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2) echo "Predicted: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt """ } workflow { fasta1 = Channel.fromPath(params.protein1_fasta) fasta2 = Channel.fromPath(params.protein2_fasta) GENERATE_STRUCTURE_1(fasta1) GENERATE_STRUCTURE_2(fasta2) CALCULATE_BINDING( GENERATE_STRUCTURE_1.out.topology, GENERATE_STRUCTURE_1.out.samples, GENERATE_STRUCTURE_2.out.topology, GENERATE_STRUCTURE_2.out.samples ) }