#!/usr/bin/env nextflow nextflow.enable.dsl=2 // Multiple FASTA files to process params.fasta_list = [ "/mnt/OmicNAS/private/old/olamide/bioemu/input/villin_headpiece.fasta", "/mnt/OmicNAS/private/old/olamide/bioemu/input/trp_cage.fasta" ] params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output" params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache" params.scripts_dir = "${baseDir}/scripts" params.num_samples = 10 params.batch_size_100 = 10 params.temperature = 300 params.n_clusters = 5 process BIOEMU { container 'bioemu:latest' containerOptions '--rm --gpus all -v /mnt:/mnt -v /tmp:/tmp' publishDir "${params.outdir}/${protein_id}", mode: 'copy' input: tuple val(protein_id), path(fasta) output: tuple val(protein_id), path("topology.pdb"), path("samples.xtc"), emit: structures path "sequence.fasta", optional: true path "batch_*.npz", optional: true path "run.log" script: """ # Make sure cache directory exists mkdir -p ${params.cache_dir} # Extract the sequence from the FASTA file SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n') # Run BioEmu with the extracted sequence python3 -m bioemu.sample \ --sequence "\${SEQUENCE}" \ --num_samples ${params.num_samples} \ --batch_size_100 ${params.batch_size_100} \ --output_dir . \ --cache_embeds_dir ${params.cache_dir} 2>&1 | tee run.log """ } process CALCULATE_FREE_ENERGY { container 'bioemu:latest' containerOptions '--rm --gpus all -v /mnt:/mnt' publishDir "${params.outdir}/${protein_id}/analysis", mode: 'copy' input: tuple val(protein_id), path(topology), path(samples) output: tuple val(protein_id), path("free_energy.csv"), emit: free_energy path "energy_plot.png", optional: true script: """ # Calculate free energy from sampled structures python3 /opt/bioemu/scripts/calculate_gibbs.py \\ --samples ${samples} \\ --topology ${topology} \\ --temperature ${params.temperature} \\ --n_clusters ${params.n_clusters} \\ --output free_energy.csv \\ --plot energy_plot.png """ } workflow { // Convert fasta_list to a channel of [protein_id, fasta_file] tuples Channel.fromList(params.fasta_list) .map { fasta_path -> def file = file(fasta_path) return [file.baseName, file] } .set { fasta_ch } // Run BioEmu for each protein sequence BIOEMU(fasta_ch) // Calculate Gibbs free energy for each protein CALCULATE_FREE_ENERGY(BIOEMU.out.structures) }