From ce1f5dff035af9e76b6baeca7d2f6b79ed09d0be Mon Sep 17 00:00:00 2001
From: Olamide Isreal <olamide@omicmd.com>
Date: Thu, 26 Mar 2026 14:30:02 +0100
Subject: [PATCH] Fix StackOverflowError: use Channel.fromPath instead of
 Channel.fromList with file()

---
 main.nf | 65 +++++++++++++--------------------------------------------
 1 file changed, 14 insertions(+), 51 deletions(-)

diff --git a/main.nf b/main.nf
index fd85c26..fc377bf 100644
--- a/main.nf
+++ b/main.nf
@@ -4,12 +4,10 @@ nextflow.enable.dsl=2
 // Define parameters — PVC mount paths for k8s execution
 params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
 params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
-params.complex_name = "protein_complex"
-params.exp_dG = -10.0  // kcal/mol (placeholder experimental value)
+params.complex_name = 'protein_complex'
+params.exp_dG = -10.0
 params.outdir = '/omic/eureka/bioemu/output'
 params.cache_dir = '/tmp/bioemu_cache'
-
-// Parameters for structure generation and analysis
 params.num_samples = 10
 params.batch_size = 5
 params.temperature = 300
@@ -17,7 +15,7 @@ params.n_clusters = 5
 
 process GENERATE_STRUCTURE {
     container 'harbor.cluster.omic.ai/omic/bioemu:latest'
-    publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy'
+    publishDir "${params.outdir}/${params.complex_name}", mode: 'copy'
 
     input:
         tuple val(protein_id), path(fasta)
@@ -27,13 +25,9 @@ process GENERATE_STRUCTURE {
 
     script:
     """
-    # Extract sequence from FASTA
     SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
-
-    # Create cache dir
     mkdir -p ${params.cache_dir}
 
-    # Run BioEmu
     python3 -m bioemu.sample \\
         --sequence "\${SEQUENCE}" \\
         --num_samples ${params.num_samples} \\
@@ -41,7 +35,6 @@ process GENERATE_STRUCTURE {
         --output_dir . \\
         --cache_embeds_dir ${params.cache_dir}
 
-    # Rename output files
     mv topology.pdb ${protein_id}_topology.pdb
     mv samples.xtc ${protein_id}_samples.xtc
     """
@@ -64,7 +57,6 @@ process CALCULATE_BINDING {
 
     script:
     """
-    # Run binding energy calculation
     python3 /opt/bioemu/scripts/calculate_binding.py \\
         --protein1_topology ${protein1_topology} \\
         --protein1_samples ${protein1_samples} \\
@@ -75,56 +67,27 @@ process CALCULATE_BINDING {
         --output binding_energy.csv \\
         --plot energy_comparison.png
 
-    # Generate report
     echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt
     echo "======================================================" >> binding_energy_report.txt
-    echo "## Experimental Value (Database)" >> binding_energy_report.txt
-    echo "ΔG = ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
+    echo "## Experimental Value: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
     echo "" >> binding_energy_report.txt
-
-    # Extract predicted value
     PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2)
-
-    echo "## BioEmu Prediction" >> binding_energy_report.txt
-    echo "ΔG = \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
-    echo "" >> binding_energy_report.txt
-
-    # Calculate comparison metrics
-    echo "## Comparison" >> binding_energy_report.txt
-    ABS_DIFF=\$(python3 -c "print('%.2f' % abs(float('\${PREDICTED_DG}') - (${params.exp_dG})))")
-    REL_ERROR=\$(python3 -c "print('%.2f' % (((float('\${PREDICTED_DG}') - (${params.exp_dG}))/(${params.exp_dG}))*100))")
-
-    echo "Absolute Difference: \${ABS_DIFF} kcal/mol" >> binding_energy_report.txt
-    echo "Relative Error: \${REL_ERROR}%" >> binding_energy_report.txt
+    echo "## BioEmu Prediction: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
     """
 }
 
 workflow {
-    // Create channel for proteins
-    protein_ch = Channel.fromList([
-        tuple("protein1", file(params.protein1_fasta)),
-        tuple("protein2", file(params.protein2_fasta))
-    ])
+    protein1_ch = Channel.fromPath(params.protein1_fasta)
+                         .map { fasta -> tuple("protein1", fasta) }
+    protein2_ch = Channel.fromPath(params.protein2_fasta)
+                         .map { fasta -> tuple("protein2", fasta) }
 
-    // Generate structures
-    GENERATE_STRUCTURE(protein_ch)
+    all_proteins = protein1_ch.mix(protein2_ch)
 
-    // Extract structure files for each protein
-    protein1_files = GENERATE_STRUCTURE.out
-                      .filter { it[0] == "protein1" }
-                      .map { it -> tuple(it[1], it[2]) }
-                      .first()
+    GENERATE_STRUCTURE(all_proteins)
 
-    protein2_files = GENERATE_STRUCTURE.out
-                      .filter { it[0] == "protein2" }
-                      .map { it -> tuple(it[1], it[2]) }
-                      .first()
+    p1 = GENERATE_STRUCTURE.out.filter { it[0] == "protein1" }.map { [it[1], it[2]] }.first()
+    p2 = GENERATE_STRUCTURE.out.filter { it[0] == "protein2" }.map { [it[1], it[2]] }.first()
 
-    // Calculate binding energy
-    CALCULATE_BINDING(
-        protein1_files[0],  // protein1_topology.pdb
-        protein1_files[1],  // protein1_samples.xtc
-        protein2_files[0],  // protein2_topology.pdb
-        protein2_files[1]   // protein2_samples.xtc
-    )
+    CALCULATE_BINDING(p1[0], p1[1], p2[0], p2[1])
 }