Simplify workflow: separate processes per protein, avoid filter/map chains
Split GENERATE_STRUCTURE into two explicit processes to avoid channel filter/map/first() chains that cause StackOverflowError in Nextflow 24.10.x
This commit is contained in:
69
main.nf
69
main.nf
@@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env nextflow
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// Define parameters — PVC mount paths for k8s execution
|
||||
params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
|
||||
params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
|
||||
params.complex_name = 'protein_complex'
|
||||
@@ -13,15 +12,16 @@ params.batch_size = 5
|
||||
params.temperature = 300
|
||||
params.n_clusters = 5
|
||||
|
||||
process GENERATE_STRUCTURE {
|
||||
process GENERATE_STRUCTURE_1 {
|
||||
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
|
||||
publishDir "${params.outdir}/${params.complex_name}", mode: 'copy'
|
||||
publishDir "${params.outdir}/${params.complex_name}/protein1", mode: 'copy'
|
||||
|
||||
input:
|
||||
tuple val(protein_id), path(fasta)
|
||||
path fasta
|
||||
|
||||
output:
|
||||
tuple val(protein_id), path("${protein_id}_topology.pdb"), path("${protein_id}_samples.xtc")
|
||||
path "protein1_topology.pdb", emit: topology
|
||||
path "protein1_samples.xtc", emit: samples
|
||||
|
||||
script:
|
||||
"""
|
||||
@@ -35,8 +35,36 @@ process GENERATE_STRUCTURE {
|
||||
--output_dir . \\
|
||||
--cache_embeds_dir ${params.cache_dir}
|
||||
|
||||
mv topology.pdb ${protein_id}_topology.pdb
|
||||
mv samples.xtc ${protein_id}_samples.xtc
|
||||
mv topology.pdb protein1_topology.pdb
|
||||
mv samples.xtc protein1_samples.xtc
|
||||
"""
|
||||
}
|
||||
|
||||
process GENERATE_STRUCTURE_2 {
|
||||
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
|
||||
publishDir "${params.outdir}/${params.complex_name}/protein2", mode: 'copy'
|
||||
|
||||
input:
|
||||
path fasta
|
||||
|
||||
output:
|
||||
path "protein2_topology.pdb", emit: topology
|
||||
path "protein2_samples.xtc", emit: samples
|
||||
|
||||
script:
|
||||
"""
|
||||
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
||||
mkdir -p ${params.cache_dir}
|
||||
|
||||
python3 -m bioemu.sample \\
|
||||
--sequence "\${SEQUENCE}" \\
|
||||
--num_samples ${params.num_samples} \\
|
||||
--batch_size_100 ${params.batch_size} \\
|
||||
--output_dir . \\
|
||||
--cache_embeds_dir ${params.cache_dir}
|
||||
|
||||
mv topology.pdb protein2_topology.pdb
|
||||
mv samples.xtc protein2_samples.xtc
|
||||
"""
|
||||
}
|
||||
|
||||
@@ -68,26 +96,23 @@ process CALCULATE_BINDING {
|
||||
--plot energy_comparison.png
|
||||
|
||||
echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt
|
||||
echo "======================================================" >> binding_energy_report.txt
|
||||
echo "## Experimental Value: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
|
||||
echo "" >> binding_energy_report.txt
|
||||
echo "Experimental: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
|
||||
PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2)
|
||||
echo "## BioEmu Prediction: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
|
||||
echo "Predicted: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
|
||||
"""
|
||||
}
|
||||
|
||||
workflow {
|
||||
protein1_ch = Channel.fromPath(params.protein1_fasta)
|
||||
.map { fasta -> tuple("protein1", fasta) }
|
||||
protein2_ch = Channel.fromPath(params.protein2_fasta)
|
||||
.map { fasta -> tuple("protein2", fasta) }
|
||||
fasta1 = Channel.fromPath(params.protein1_fasta)
|
||||
fasta2 = Channel.fromPath(params.protein2_fasta)
|
||||
|
||||
all_proteins = protein1_ch.mix(protein2_ch)
|
||||
GENERATE_STRUCTURE_1(fasta1)
|
||||
GENERATE_STRUCTURE_2(fasta2)
|
||||
|
||||
GENERATE_STRUCTURE(all_proteins)
|
||||
|
||||
p1 = GENERATE_STRUCTURE.out.filter { it[0] == "protein1" }.map { [it[1], it[2]] }.first()
|
||||
p2 = GENERATE_STRUCTURE.out.filter { it[0] == "protein2" }.map { [it[1], it[2]] }.first()
|
||||
|
||||
CALCULATE_BINDING(p1[0], p1[1], p2[0], p2[1])
|
||||
CALCULATE_BINDING(
|
||||
GENERATE_STRUCTURE_1.out.topology,
|
||||
GENERATE_STRUCTURE_1.out.samples,
|
||||
GENERATE_STRUCTURE_2.out.topology,
|
||||
GENERATE_STRUCTURE_2.out.samples
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user