Add BioEmu Nextflow pipeline implementation
This commit is contained in:
89
main.nf
Normal file
89
main.nf
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// Multiple FASTA files to process
|
||||
params.fasta_list = [
|
||||
"/mnt/OmicNAS/private/old/olamide/bioemu/input/villin_headpiece.fasta",
|
||||
"/mnt/OmicNAS/private/old/olamide/bioemu/input/trp_cage.fasta"
|
||||
]
|
||||
params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output"
|
||||
params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache"
|
||||
params.scripts_dir = "${baseDir}/scripts"
|
||||
params.num_samples = 10
|
||||
params.batch_size_100 = 10
|
||||
params.temperature = 300
|
||||
params.n_clusters = 5
|
||||
|
||||
process BIOEMU {
|
||||
container 'bioemu:latest'
|
||||
containerOptions '--rm --gpus all -v /mnt:/mnt -v /tmp:/tmp'
|
||||
publishDir "${params.outdir}/${protein_id}", mode: 'copy'
|
||||
|
||||
input:
|
||||
tuple val(protein_id), path(fasta)
|
||||
|
||||
output:
|
||||
tuple val(protein_id), path("topology.pdb"), path("samples.xtc"), emit: structures
|
||||
path "sequence.fasta", optional: true
|
||||
path "batch_*.npz", optional: true
|
||||
path "run.log"
|
||||
|
||||
script:
|
||||
"""
|
||||
# Make sure cache directory exists
|
||||
mkdir -p ${params.cache_dir}
|
||||
|
||||
# Extract the sequence from the FASTA file
|
||||
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
||||
|
||||
# Run BioEmu with the extracted sequence
|
||||
python3 -m bioemu.sample \
|
||||
--sequence "\${SEQUENCE}" \
|
||||
--num_samples ${params.num_samples} \
|
||||
--batch_size_100 ${params.batch_size_100} \
|
||||
--output_dir . \
|
||||
--cache_embeds_dir ${params.cache_dir} 2>&1 | tee run.log
|
||||
"""
|
||||
}
|
||||
|
||||
process CALCULATE_FREE_ENERGY {
|
||||
container 'bioemu:latest'
|
||||
containerOptions '--rm --gpus all -v /mnt:/mnt'
|
||||
publishDir "${params.outdir}/${protein_id}/analysis", mode: 'copy'
|
||||
|
||||
input:
|
||||
tuple val(protein_id), path(topology), path(samples)
|
||||
|
||||
output:
|
||||
tuple val(protein_id), path("free_energy.csv"), emit: free_energy
|
||||
path "energy_plot.png", optional: true
|
||||
|
||||
script:
|
||||
"""
|
||||
# Calculate free energy from sampled structures
|
||||
python3 /opt/bioemu/scripts/calculate_gibbs.py \\
|
||||
--samples ${samples} \\
|
||||
--topology ${topology} \\
|
||||
--temperature ${params.temperature} \\
|
||||
--n_clusters ${params.n_clusters} \\
|
||||
--output free_energy.csv \\
|
||||
--plot energy_plot.png
|
||||
"""
|
||||
}
|
||||
|
||||
workflow {
|
||||
// Convert fasta_list to a channel of [protein_id, fasta_file] tuples
|
||||
Channel.fromList(params.fasta_list)
|
||||
.map { fasta_path ->
|
||||
def file = file(fasta_path)
|
||||
return [file.baseName, file]
|
||||
}
|
||||
.set { fasta_ch }
|
||||
|
||||
// Run BioEmu for each protein sequence
|
||||
BIOEMU(fasta_ch)
|
||||
|
||||
// Calculate Gibbs free energy for each protein
|
||||
CALCULATE_FREE_ENERGY(BIOEMU.out.structures)
|
||||
}
|
||||
Reference in New Issue
Block a user