#!/usr/bin/env nextflow nextflow.enable.dsl=2 // Define parameters — PVC mount paths for k8s execution params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta' params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta' params.complex_name = 'protein_complex' params.exp_dG = -10.0 params.outdir = '/omic/eureka/bioemu/output' params.cache_dir = '/tmp/bioemu_cache' params.num_samples = 10 params.batch_size = 5 params.temperature = 300 params.n_clusters = 5 process GENERATE_STRUCTURE { container 'harbor.cluster.omic.ai/omic/bioemu:latest' publishDir "${params.outdir}/${params.complex_name}", mode: 'copy' input: tuple val(protein_id), path(fasta) output: tuple val(protein_id), path("${protein_id}_topology.pdb"), path("${protein_id}_samples.xtc") script: """ SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n') mkdir -p ${params.cache_dir} python3 -m bioemu.sample \\ --sequence "\${SEQUENCE}" \\ --num_samples ${params.num_samples} \\ --batch_size_100 ${params.batch_size} \\ --output_dir . \\ --cache_embeds_dir ${params.cache_dir} mv topology.pdb ${protein_id}_topology.pdb mv samples.xtc ${protein_id}_samples.xtc """ } process CALCULATE_BINDING { container 'harbor.cluster.omic.ai/omic/bioemu:latest' publishDir "${params.outdir}/${params.complex_name}/analysis", mode: 'copy' input: path protein1_topology path protein1_samples path protein2_topology path protein2_samples output: path "binding_energy.csv" path "binding_energy_report.txt" path "energy_comparison.png" script: """ python3 /opt/bioemu/scripts/calculate_binding.py \\ --protein1_topology ${protein1_topology} \\ --protein1_samples ${protein1_samples} \\ --protein2_topology ${protein2_topology} \\ --protein2_samples ${protein2_samples} \\ --temperature ${params.temperature} \\ --n_clusters ${params.n_clusters} \\ --output binding_energy.csv \\ --plot energy_comparison.png echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt echo "======================================================" >> binding_energy_report.txt echo "## Experimental Value: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt echo "" >> binding_energy_report.txt PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2) echo "## BioEmu Prediction: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt """ } workflow { protein1_ch = Channel.fromPath(params.protein1_fasta) .map { fasta -> tuple("protein1", fasta) } protein2_ch = Channel.fromPath(params.protein2_fasta) .map { fasta -> tuple("protein2", fasta) } all_proteins = protein1_ch.mix(protein2_ch) GENERATE_STRUCTURE(all_proteins) p1 = GENERATE_STRUCTURE.out.filter { it[0] == "protein1" }.map { [it[1], it[2]] }.first() p2 = GENERATE_STRUCTURE.out.filter { it[0] == "protein2" }.map { [it[1], it[2]] }.first() CALCULATE_BINDING(p1[0], p1[1], p2[0], p2[1]) }