Initialize foldseek pipeline for WES

This commit is contained in:
2026-03-30 19:21:29 +01:00
commit ec54fa1c22
4 changed files with 703 additions and 0 deletions

151
main.nf Normal file
View File

@@ -0,0 +1,151 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
params.query = '/mnt/OmicNAS/private/old/olamide/foldseek/input/1CRN.pdb'
params.target = '/mnt/OmicNAS/private/old/olamide/foldseek/input/'
params.outdir = '/mnt/OmicNAS/private/old/olamide/foldseek/output/'
params.mode = 'search'
params.sensitivity = 9.5
params.evalue = 0.001
params.threads = 4
params.alignment_type = 2
params.coverage = 0.0
params.format_output = 'query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits'
process FOLDSEEK_SEARCH {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path query
path target
output:
path "${query.simpleName}_results.m8"
path "run.log"
script:
"""
foldseek easy-search \\
${query} \\
${target} \\
${query.simpleName}_results.m8 \\
tmp \\
--threads ${params.threads} \\
-s ${params.sensitivity} \\
-e ${params.evalue} \\
--alignment-type ${params.alignment_type} \\
-c ${params.coverage} \\
--format-output "${params.format_output}" \\
2>&1 | tee run.log
rm -rf tmp
"""
}
process FOLDSEEK_CLUSTER {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path structures
output:
path "cluster_cluster.tsv"
path "cluster_rep_seq.fasta"
path "cluster_all_seqs.fasta"
path "run.log"
script:
"""
foldseek easy-cluster \\
${structures} \\
cluster \\
tmp \\
--threads ${params.threads} \\
-e ${params.evalue} \\
--alignment-type ${params.alignment_type} \\
-c ${params.coverage} \\
2>&1 | tee run.log
rm -rf tmp
"""
}
process FOLDSEEK_MULTIMER_SEARCH {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path query
path target
output:
path "${query.simpleName}_multimer_results.m8"
path "${query.simpleName}_multimer_results_report"
path "run.log"
script:
"""
foldseek easy-multimersearch \\
${query} \\
${target} \\
${query.simpleName}_multimer_results \\
tmp \\
--threads ${params.threads} \\
-e ${params.evalue} \\
2>&1 | tee run.log
rm -rf tmp
"""
}
process FOLDSEEK_CREATEDB {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path structures
output:
path "structureDB*"
path "run.log"
script:
"""
foldseek createdb \\
${structures} \\
structureDB \\
--threads ${params.threads} \\
2>&1 | tee run.log
"""
}
workflow {
if (params.mode == 'search') {
query_ch = Channel.fromPath(params.query)
target_ch = Channel.fromPath(params.target).collect()
FOLDSEEK_SEARCH(query_ch, target_ch)
}
else if (params.mode == 'cluster') {
structures_ch = Channel.fromPath(params.query).collect()
FOLDSEEK_CLUSTER(structures_ch)
}
else if (params.mode == 'multimersearch') {
query_ch = Channel.fromPath(params.query)
target_ch = Channel.fromPath(params.target).collect()
FOLDSEEK_MULTIMER_SEARCH(query_ch, target_ch)
}
else if (params.mode == 'createdb') {
structures_ch = Channel.fromPath(params.query).collect()
FOLDSEEK_CREATEDB(structures_ch)
}
}