Fix StackOverflowError: use Channel.fromPath instead of Channel.fromList with file()
This commit is contained in:
65
main.nf
65
main.nf
@@ -4,12 +4,10 @@ nextflow.enable.dsl=2
|
|||||||
// Define parameters — PVC mount paths for k8s execution
|
// Define parameters — PVC mount paths for k8s execution
|
||||||
params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
|
params.protein1_fasta = '/omic/eureka/bioemu/input/trp_cage.fasta'
|
||||||
params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
|
params.protein2_fasta = '/omic/eureka/bioemu/input/villin_headpiece.fasta'
|
||||||
params.complex_name = "protein_complex"
|
params.complex_name = 'protein_complex'
|
||||||
params.exp_dG = -10.0 // kcal/mol (placeholder experimental value)
|
params.exp_dG = -10.0
|
||||||
params.outdir = '/omic/eureka/bioemu/output'
|
params.outdir = '/omic/eureka/bioemu/output'
|
||||||
params.cache_dir = '/tmp/bioemu_cache'
|
params.cache_dir = '/tmp/bioemu_cache'
|
||||||
|
|
||||||
// Parameters for structure generation and analysis
|
|
||||||
params.num_samples = 10
|
params.num_samples = 10
|
||||||
params.batch_size = 5
|
params.batch_size = 5
|
||||||
params.temperature = 300
|
params.temperature = 300
|
||||||
@@ -17,7 +15,7 @@ params.n_clusters = 5
|
|||||||
|
|
||||||
process GENERATE_STRUCTURE {
|
process GENERATE_STRUCTURE {
|
||||||
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
|
container 'harbor.cluster.omic.ai/omic/bioemu:latest'
|
||||||
publishDir "${params.outdir}/${params.complex_name}/${protein_id}", mode: 'copy'
|
publishDir "${params.outdir}/${params.complex_name}", mode: 'copy'
|
||||||
|
|
||||||
input:
|
input:
|
||||||
tuple val(protein_id), path(fasta)
|
tuple val(protein_id), path(fasta)
|
||||||
@@ -27,13 +25,9 @@ process GENERATE_STRUCTURE {
|
|||||||
|
|
||||||
script:
|
script:
|
||||||
"""
|
"""
|
||||||
# Extract sequence from FASTA
|
|
||||||
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
||||||
|
|
||||||
# Create cache dir
|
|
||||||
mkdir -p ${params.cache_dir}
|
mkdir -p ${params.cache_dir}
|
||||||
|
|
||||||
# Run BioEmu
|
|
||||||
python3 -m bioemu.sample \\
|
python3 -m bioemu.sample \\
|
||||||
--sequence "\${SEQUENCE}" \\
|
--sequence "\${SEQUENCE}" \\
|
||||||
--num_samples ${params.num_samples} \\
|
--num_samples ${params.num_samples} \\
|
||||||
@@ -41,7 +35,6 @@ process GENERATE_STRUCTURE {
|
|||||||
--output_dir . \\
|
--output_dir . \\
|
||||||
--cache_embeds_dir ${params.cache_dir}
|
--cache_embeds_dir ${params.cache_dir}
|
||||||
|
|
||||||
# Rename output files
|
|
||||||
mv topology.pdb ${protein_id}_topology.pdb
|
mv topology.pdb ${protein_id}_topology.pdb
|
||||||
mv samples.xtc ${protein_id}_samples.xtc
|
mv samples.xtc ${protein_id}_samples.xtc
|
||||||
"""
|
"""
|
||||||
@@ -64,7 +57,6 @@ process CALCULATE_BINDING {
|
|||||||
|
|
||||||
script:
|
script:
|
||||||
"""
|
"""
|
||||||
# Run binding energy calculation
|
|
||||||
python3 /opt/bioemu/scripts/calculate_binding.py \\
|
python3 /opt/bioemu/scripts/calculate_binding.py \\
|
||||||
--protein1_topology ${protein1_topology} \\
|
--protein1_topology ${protein1_topology} \\
|
||||||
--protein1_samples ${protein1_samples} \\
|
--protein1_samples ${protein1_samples} \\
|
||||||
@@ -75,56 +67,27 @@ process CALCULATE_BINDING {
|
|||||||
--output binding_energy.csv \\
|
--output binding_energy.csv \\
|
||||||
--plot energy_comparison.png
|
--plot energy_comparison.png
|
||||||
|
|
||||||
# Generate report
|
|
||||||
echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt
|
echo "# Binding Free Energy Analysis: ${params.complex_name}" > binding_energy_report.txt
|
||||||
echo "======================================================" >> binding_energy_report.txt
|
echo "======================================================" >> binding_energy_report.txt
|
||||||
echo "## Experimental Value (Database)" >> binding_energy_report.txt
|
echo "## Experimental Value: ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
|
||||||
echo "ΔG = ${params.exp_dG} kcal/mol" >> binding_energy_report.txt
|
|
||||||
echo "" >> binding_energy_report.txt
|
echo "" >> binding_energy_report.txt
|
||||||
|
|
||||||
# Extract predicted value
|
|
||||||
PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2)
|
PREDICTED_DG=\$(grep -A1 "binding_free_energy" binding_energy.csv | tail -n1 | cut -d',' -f2)
|
||||||
|
echo "## BioEmu Prediction: \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
|
||||||
echo "## BioEmu Prediction" >> binding_energy_report.txt
|
|
||||||
echo "ΔG = \${PREDICTED_DG} kcal/mol" >> binding_energy_report.txt
|
|
||||||
echo "" >> binding_energy_report.txt
|
|
||||||
|
|
||||||
# Calculate comparison metrics
|
|
||||||
echo "## Comparison" >> binding_energy_report.txt
|
|
||||||
ABS_DIFF=\$(python3 -c "print('%.2f' % abs(float('\${PREDICTED_DG}') - (${params.exp_dG})))")
|
|
||||||
REL_ERROR=\$(python3 -c "print('%.2f' % (((float('\${PREDICTED_DG}') - (${params.exp_dG}))/(${params.exp_dG}))*100))")
|
|
||||||
|
|
||||||
echo "Absolute Difference: \${ABS_DIFF} kcal/mol" >> binding_energy_report.txt
|
|
||||||
echo "Relative Error: \${REL_ERROR}%" >> binding_energy_report.txt
|
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
|
|
||||||
workflow {
|
workflow {
|
||||||
// Create channel for proteins
|
protein1_ch = Channel.fromPath(params.protein1_fasta)
|
||||||
protein_ch = Channel.fromList([
|
.map { fasta -> tuple("protein1", fasta) }
|
||||||
tuple("protein1", file(params.protein1_fasta)),
|
protein2_ch = Channel.fromPath(params.protein2_fasta)
|
||||||
tuple("protein2", file(params.protein2_fasta))
|
.map { fasta -> tuple("protein2", fasta) }
|
||||||
])
|
|
||||||
|
|
||||||
// Generate structures
|
all_proteins = protein1_ch.mix(protein2_ch)
|
||||||
GENERATE_STRUCTURE(protein_ch)
|
|
||||||
|
|
||||||
// Extract structure files for each protein
|
GENERATE_STRUCTURE(all_proteins)
|
||||||
protein1_files = GENERATE_STRUCTURE.out
|
|
||||||
.filter { it[0] == "protein1" }
|
|
||||||
.map { it -> tuple(it[1], it[2]) }
|
|
||||||
.first()
|
|
||||||
|
|
||||||
protein2_files = GENERATE_STRUCTURE.out
|
p1 = GENERATE_STRUCTURE.out.filter { it[0] == "protein1" }.map { [it[1], it[2]] }.first()
|
||||||
.filter { it[0] == "protein2" }
|
p2 = GENERATE_STRUCTURE.out.filter { it[0] == "protein2" }.map { [it[1], it[2]] }.first()
|
||||||
.map { it -> tuple(it[1], it[2]) }
|
|
||||||
.first()
|
|
||||||
|
|
||||||
// Calculate binding energy
|
CALCULATE_BINDING(p1[0], p1[1], p2[0], p2[1])
|
||||||
CALCULATE_BINDING(
|
|
||||||
protein1_files[0], // protein1_topology.pdb
|
|
||||||
protein1_files[1], // protein1_samples.xtc
|
|
||||||
protein2_files[0], // protein2_topology.pdb
|
|
||||||
protein2_files[1] // protein2_samples.xtc
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user