90 lines
2.7 KiB
Plaintext
90 lines
2.7 KiB
Plaintext
#!/usr/bin/env nextflow
|
|
|
|
nextflow.enable.dsl=2
|
|
|
|
// Multiple FASTA files to process
|
|
params.fasta_list = [
|
|
"/mnt/OmicNAS/private/old/olamide/bioemu/input/villin_headpiece.fasta",
|
|
"/mnt/OmicNAS/private/old/olamide/bioemu/input/trp_cage.fasta"
|
|
]
|
|
params.outdir = "/mnt/OmicNAS/private/old/olamide/bioemu/output"
|
|
params.cache_dir = "/mnt/OmicNAS/private/old/olamide/bioemu/cache"
|
|
params.scripts_dir = "${baseDir}/scripts"
|
|
params.num_samples = 10
|
|
params.batch_size_100 = 10
|
|
params.temperature = 300
|
|
params.n_clusters = 5
|
|
|
|
process BIOEMU {
|
|
container 'bioemu:latest'
|
|
containerOptions '--rm --gpus all -v /mnt:/mnt -v /tmp:/tmp'
|
|
publishDir "${params.outdir}/${protein_id}", mode: 'copy'
|
|
|
|
input:
|
|
tuple val(protein_id), path(fasta)
|
|
|
|
output:
|
|
tuple val(protein_id), path("topology.pdb"), path("samples.xtc"), emit: structures
|
|
path "sequence.fasta", optional: true
|
|
path "batch_*.npz", optional: true
|
|
path "run.log"
|
|
|
|
script:
|
|
"""
|
|
# Make sure cache directory exists
|
|
mkdir -p ${params.cache_dir}
|
|
|
|
# Extract the sequence from the FASTA file
|
|
SEQUENCE=\$(grep -v ">" ${fasta} | tr -d '\\n')
|
|
|
|
# Run BioEmu with the extracted sequence
|
|
python3 -m bioemu.sample \
|
|
--sequence "\${SEQUENCE}" \
|
|
--num_samples ${params.num_samples} \
|
|
--batch_size_100 ${params.batch_size_100} \
|
|
--output_dir . \
|
|
--cache_embeds_dir ${params.cache_dir} 2>&1 | tee run.log
|
|
"""
|
|
}
|
|
|
|
process CALCULATE_FREE_ENERGY {
|
|
container 'bioemu:latest'
|
|
containerOptions '--rm --gpus all -v /mnt:/mnt'
|
|
publishDir "${params.outdir}/${protein_id}/analysis", mode: 'copy'
|
|
|
|
input:
|
|
tuple val(protein_id), path(topology), path(samples)
|
|
|
|
output:
|
|
tuple val(protein_id), path("free_energy.csv"), emit: free_energy
|
|
path "energy_plot.png", optional: true
|
|
|
|
script:
|
|
"""
|
|
# Calculate free energy from sampled structures
|
|
python3 /opt/bioemu/scripts/calculate_gibbs.py \\
|
|
--samples ${samples} \\
|
|
--topology ${topology} \\
|
|
--temperature ${params.temperature} \\
|
|
--n_clusters ${params.n_clusters} \\
|
|
--output free_energy.csv \\
|
|
--plot energy_plot.png
|
|
"""
|
|
}
|
|
|
|
workflow {
|
|
// Convert fasta_list to a channel of [protein_id, fasta_file] tuples
|
|
Channel.fromList(params.fasta_list)
|
|
.map { fasta_path ->
|
|
def file = file(fasta_path)
|
|
return [file.baseName, file]
|
|
}
|
|
.set { fasta_ch }
|
|
|
|
// Run BioEmu for each protein sequence
|
|
BIOEMU(fasta_ch)
|
|
|
|
// Calculate Gibbs free energy for each protein
|
|
CALCULATE_FREE_ENERGY(BIOEMU.out.structures)
|
|
}
|