Configure foldseek pipeline for WES execution

- Update container image to harbor.cluster.omic.ai/omic/foldseek:latest
- Update default paths to /omic/eureka/foldseek/ PVC mount paths
- Add k8s profile with eureka-pvc storage
- Remove stageInMode copy and containerOptions for k8s compatibility
- Update params.json defaults to s3://omic/eureka/foldseek/ paths
This commit is contained in:
2026-03-30 20:54:07 +01:00
parent ec54fa1c22
commit bf04f8971e
3 changed files with 197 additions and 31 deletions

22
main.nf
View File

@@ -2,9 +2,9 @@
nextflow.enable.dsl=2
params.query = '/mnt/OmicNAS/private/old/olamide/foldseek/input/1CRN.pdb'
params.target = '/mnt/OmicNAS/private/old/olamide/foldseek/input/'
params.outdir = '/mnt/OmicNAS/private/old/olamide/foldseek/output/'
params.query = '/omic/eureka/foldseek/input/1CRN.pdb'
params.target = '/omic/eureka/foldseek/input/'
params.outdir = '/omic/eureka/foldseek/output'
params.mode = 'search'
params.sensitivity = 9.5
params.evalue = 0.001
@@ -14,10 +14,8 @@ params.coverage = 0.0
params.format_output = 'query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits'
process FOLDSEEK_SEARCH {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
container 'harbor.cluster.omic.ai/omic/foldseek:latest'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path query
@@ -47,10 +45,8 @@ process FOLDSEEK_SEARCH {
}
process FOLDSEEK_CLUSTER {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
container 'harbor.cluster.omic.ai/omic/foldseek:latest'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path structures
@@ -77,10 +73,8 @@ process FOLDSEEK_CLUSTER {
"""
}
process FOLDSEEK_MULTIMER_SEARCH {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
container 'harbor.cluster.omic.ai/omic/foldseek:latest'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path query
@@ -107,10 +101,8 @@ process FOLDSEEK_MULTIMER_SEARCH {
}
process FOLDSEEK_CREATEDB {
container 'foldseek:latest'
containerOptions '-v /mnt:/mnt'
container 'harbor.cluster.omic.ai/omic/foldseek:latest'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path structures

View File

@@ -1,18 +1,15 @@
// Manifest for Nextflow metadata
manifest {
name = 'Foldseek-Nextflow'
author = 'Generated from Foldseek repository'
homePage = 'https://github.com/steineggerlab/foldseek'
name = 'foldseek'
author = 'Olamide'
description = 'Nextflow pipeline for Foldseek - Fast protein structure search and clustering'
mainScript = 'main.nf'
version = '1.0.0'
}
// Global default parameters
params {
query = "/mnt/OmicNAS/private/old/olamide/foldseek/input/1CRN.pdb"
target = "/mnt/OmicNAS/private/old/olamide/foldseek/input/"
outdir = "/mnt/OmicNAS/private/old/olamide/foldseek/output"
query = "/omic/eureka/foldseek/input/1CRN.pdb"
target = "/omic/eureka/foldseek/input/"
outdir = "/omic/eureka/foldseek/output"
mode = "search"
sensitivity = 9.5
evalue = 0.001
@@ -22,22 +19,30 @@ params {
format_output = "query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits"
}
// Container configurations
docker {
enabled = true
runOptions = '-v /mnt:/mnt'
}
// Process configurations
process {
container = 'harbor.cluster.omic.ai/omic/foldseek:latest'
cpus = 4
memory = '16 GB'
}
// Execution configurations
executor {
$local {
cpus = 8
memory = '32 GB'
profiles {
standard {
docker {
temp = 'auto'
}
}
k8s {
process {
executor = 'k8s'
}
k8s {
storageClaimName = 'eureka-pvc'
storageMountPath = '/omic/eureka'
}
}
}

169
params.json Normal file
View File

@@ -0,0 +1,169 @@
{
"params": {
"query": {
"type": "file",
"description": "Path to input protein structure files (PDB/mmCIF format)",
"default": "s3://omic/eureka/foldseek/input/1CRN.pdb",
"required": true,
"pipeline_io": "input",
"var_name": "params.query",
"examples": [
"s3://omic/eureka/foldseek/input/1CRN.pdb",
"s3://omic/eureka/foldseek/input/*.pdb"
],
"pattern": ".*\\.(pdb|cif|mmcif)(\\.gz)?$",
"enum": [],
"validation": {},
"notes": "Can be a single PDB/mmCIF file or a pattern to match multiple files. Supports gzipped files."
},
"target": {
"type": "file",
"description": "Path to target database or directory for structure search",
"default": "s3://omic/eureka/foldseek/input/",
"required": true,
"pipeline_io": "input",
"var_name": "params.target",
"examples": [
"s3://omic/eureka/foldseek/input/",
"s3://omic/eureka/foldseek/databases/"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Can be a Foldseek database or a directory containing structure files."
},
"outdir": {
"type": "folder",
"description": "Directory for Foldseek output results",
"default": "s3://omic/eureka/foldseek/output",
"required": true,
"pipeline_io": "output",
"var_name": "params.outdir",
"examples": [
"s3://omic/eureka/foldseek/output"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Directory where all output files will be stored. Will be created if it doesn't exist."
},
"mode": {
"type": "string",
"description": "Operation mode for Foldseek pipeline",
"default": "search",
"required": true,
"pipeline_io": "parameter",
"var_name": "params.mode",
"examples": [
"search",
"cluster",
"multimersearch",
"createdb"
],
"pattern": "^(search|cluster|multimersearch|createdb)$",
"enum": ["search", "cluster", "multimersearch", "createdb"],
"validation": {},
"notes": "search: structure search, cluster: clustering, multimersearch: complex search, createdb: create database"
},
"sensitivity": {
"type": "number",
"description": "Search sensitivity (higher = more sensitive but slower)",
"default": 9.5,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.sensitivity",
"examples": [
7.5,
9.5
],
"pattern": "^[0-9]+(\\.[0-9]+)?$",
"enum": [],
"validation": {},
"notes": "Fast: 7.5, Default: 9.5. Range: 1.0 to 15.0"
},
"evalue": {
"type": "number",
"description": "E-value threshold for reporting matches",
"default": 0.001,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.evalue",
"examples": [
0.001,
0.01,
10
],
"pattern": "^[0-9]+(\\.[0-9]+)?([eE][+-]?[0-9]+)?$",
"enum": [],
"validation": {},
"notes": "Lower values are more stringent. Default: 0.001"
},
"threads": {
"type": "integer",
"description": "Number of CPU threads to use",
"default": 4,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.threads",
"examples": [
4,
8,
16
],
"pattern": "^[0-9]+$",
"enum": [],
"validation": {},
"notes": "More threads improve speed for larger searches."
},
"alignment_type": {
"type": "integer",
"description": "Type of structural alignment algorithm",
"default": 2,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.alignment_type",
"examples": [
0,
1,
2
],
"pattern": "^[0-3]$",
"enum": [0, 1, 2, 3],
"validation": {},
"notes": "0: 3Di only, 1: TMalign (global), 2: 3Di+AA (default), 3: LoLalign"
},
"coverage": {
"type": "number",
"description": "Minimum fraction of aligned residues",
"default": 0.0,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.coverage",
"examples": [
0.0,
0.5,
0.8
],
"pattern": "^[0-1](\\.[0-9]+)?$",
"enum": [],
"validation": {},
"notes": "Range: 0.0 to 1.0. Higher values produce more global alignments."
},
"format_output": {
"type": "string",
"description": "Output format columns (comma-separated)",
"default": "query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.format_output",
"examples": [
"query,target,fident,evalue,bits",
"query,target,alntmscore,qtmscore,ttmscore"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Available columns: query, target, fident, alnlen, mismatch, gapopen, qstart, qend, tstart, tend, evalue, bits, alntmscore, qtmscore, ttmscore, lddt, prob"
}
}
}