diff --git a/main.nf b/main.nf index 871fe9c..8b2b1b0 100644 --- a/main.nf +++ b/main.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 -params.query = '/mnt/OmicNAS/private/old/olamide/foldseek/input/1CRN.pdb' -params.target = '/mnt/OmicNAS/private/old/olamide/foldseek/input/' -params.outdir = '/mnt/OmicNAS/private/old/olamide/foldseek/output/' +params.query = '/omic/eureka/foldseek/input/1CRN.pdb' +params.target = '/omic/eureka/foldseek/input/' +params.outdir = '/omic/eureka/foldseek/output' params.mode = 'search' params.sensitivity = 9.5 params.evalue = 0.001 @@ -14,10 +14,8 @@ params.coverage = 0.0 params.format_output = 'query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits' process FOLDSEEK_SEARCH { - container 'foldseek:latest' - containerOptions '-v /mnt:/mnt' + container 'harbor.cluster.omic.ai/omic/foldseek:latest' publishDir params.outdir, mode: 'copy' - stageInMode 'copy' input: path query @@ -47,10 +45,8 @@ process FOLDSEEK_SEARCH { } process FOLDSEEK_CLUSTER { - container 'foldseek:latest' - containerOptions '-v /mnt:/mnt' + container 'harbor.cluster.omic.ai/omic/foldseek:latest' publishDir params.outdir, mode: 'copy' - stageInMode 'copy' input: path structures @@ -77,10 +73,8 @@ process FOLDSEEK_CLUSTER { """ } process FOLDSEEK_MULTIMER_SEARCH { - container 'foldseek:latest' - containerOptions '-v /mnt:/mnt' + container 'harbor.cluster.omic.ai/omic/foldseek:latest' publishDir params.outdir, mode: 'copy' - stageInMode 'copy' input: path query @@ -107,10 +101,8 @@ process FOLDSEEK_MULTIMER_SEARCH { } process FOLDSEEK_CREATEDB { - container 'foldseek:latest' - containerOptions '-v /mnt:/mnt' + container 'harbor.cluster.omic.ai/omic/foldseek:latest' publishDir params.outdir, mode: 'copy' - stageInMode 'copy' input: path structures diff --git a/nextflow.config b/nextflow.config index a345d0e..23dccea 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,18 +1,15 @@ -// Manifest for Nextflow metadata manifest { - name = 'Foldseek-Nextflow' - author = 'Generated from Foldseek repository' - homePage = 'https://github.com/steineggerlab/foldseek' + name = 'foldseek' + author = 'Olamide' description = 'Nextflow pipeline for Foldseek - Fast protein structure search and clustering' mainScript = 'main.nf' version = '1.0.0' } -// Global default parameters params { - query = "/mnt/OmicNAS/private/old/olamide/foldseek/input/1CRN.pdb" - target = "/mnt/OmicNAS/private/old/olamide/foldseek/input/" - outdir = "/mnt/OmicNAS/private/old/olamide/foldseek/output" + query = "/omic/eureka/foldseek/input/1CRN.pdb" + target = "/omic/eureka/foldseek/input/" + outdir = "/omic/eureka/foldseek/output" mode = "search" sensitivity = 9.5 evalue = 0.001 @@ -22,22 +19,30 @@ params { format_output = "query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits" } -// Container configurations docker { enabled = true - runOptions = '-v /mnt:/mnt' } -// Process configurations process { + container = 'harbor.cluster.omic.ai/omic/foldseek:latest' cpus = 4 memory = '16 GB' } -// Execution configurations -executor { - $local { - cpus = 8 - memory = '32 GB' +profiles { + standard { + docker { + temp = 'auto' + } + } + + k8s { + process { + executor = 'k8s' + } + k8s { + storageClaimName = 'eureka-pvc' + storageMountPath = '/omic/eureka' + } } } diff --git a/params.json b/params.json new file mode 100644 index 0000000..cdfb5ac --- /dev/null +++ b/params.json @@ -0,0 +1,169 @@ +{ + "params": { + "query": { + "type": "file", + "description": "Path to input protein structure files (PDB/mmCIF format)", + "default": "s3://omic/eureka/foldseek/input/1CRN.pdb", + "required": true, + "pipeline_io": "input", + "var_name": "params.query", + "examples": [ + "s3://omic/eureka/foldseek/input/1CRN.pdb", + "s3://omic/eureka/foldseek/input/*.pdb" + ], + "pattern": ".*\\.(pdb|cif|mmcif)(\\.gz)?$", + "enum": [], + "validation": {}, + "notes": "Can be a single PDB/mmCIF file or a pattern to match multiple files. Supports gzipped files." + }, + "target": { + "type": "file", + "description": "Path to target database or directory for structure search", + "default": "s3://omic/eureka/foldseek/input/", + "required": true, + "pipeline_io": "input", + "var_name": "params.target", + "examples": [ + "s3://omic/eureka/foldseek/input/", + "s3://omic/eureka/foldseek/databases/" + ], + "pattern": ".*", + "enum": [], + "validation": {}, + "notes": "Can be a Foldseek database or a directory containing structure files." + }, + "outdir": { + "type": "folder", + "description": "Directory for Foldseek output results", + "default": "s3://omic/eureka/foldseek/output", + "required": true, + "pipeline_io": "output", + "var_name": "params.outdir", + "examples": [ + "s3://omic/eureka/foldseek/output" + ], + "pattern": ".*", + "enum": [], + "validation": {}, + "notes": "Directory where all output files will be stored. Will be created if it doesn't exist." + }, + "mode": { + "type": "string", + "description": "Operation mode for Foldseek pipeline", + "default": "search", + "required": true, + "pipeline_io": "parameter", + "var_name": "params.mode", + "examples": [ + "search", + "cluster", + "multimersearch", + "createdb" + ], + "pattern": "^(search|cluster|multimersearch|createdb)$", + "enum": ["search", "cluster", "multimersearch", "createdb"], + "validation": {}, + "notes": "search: structure search, cluster: clustering, multimersearch: complex search, createdb: create database" + }, + "sensitivity": { + "type": "number", + "description": "Search sensitivity (higher = more sensitive but slower)", + "default": 9.5, + "required": false, + "pipeline_io": "parameter", + "var_name": "params.sensitivity", + "examples": [ + 7.5, + 9.5 + ], + "pattern": "^[0-9]+(\\.[0-9]+)?$", + "enum": [], + "validation": {}, + "notes": "Fast: 7.5, Default: 9.5. Range: 1.0 to 15.0" + }, + "evalue": { + "type": "number", + "description": "E-value threshold for reporting matches", + "default": 0.001, + "required": false, + "pipeline_io": "parameter", + "var_name": "params.evalue", + "examples": [ + 0.001, + 0.01, + 10 + ], + "pattern": "^[0-9]+(\\.[0-9]+)?([eE][+-]?[0-9]+)?$", + "enum": [], + "validation": {}, + "notes": "Lower values are more stringent. Default: 0.001" + }, + "threads": { + "type": "integer", + "description": "Number of CPU threads to use", + "default": 4, + "required": false, + "pipeline_io": "parameter", + "var_name": "params.threads", + "examples": [ + 4, + 8, + 16 + ], + "pattern": "^[0-9]+$", + "enum": [], + "validation": {}, + "notes": "More threads improve speed for larger searches." + }, + "alignment_type": { + "type": "integer", + "description": "Type of structural alignment algorithm", + "default": 2, + "required": false, + "pipeline_io": "parameter", + "var_name": "params.alignment_type", + "examples": [ + 0, + 1, + 2 + ], + "pattern": "^[0-3]$", + "enum": [0, 1, 2, 3], + "validation": {}, + "notes": "0: 3Di only, 1: TMalign (global), 2: 3Di+AA (default), 3: LoLalign" + }, + "coverage": { + "type": "number", + "description": "Minimum fraction of aligned residues", + "default": 0.0, + "required": false, + "pipeline_io": "parameter", + "var_name": "params.coverage", + "examples": [ + 0.0, + 0.5, + 0.8 + ], + "pattern": "^[0-1](\\.[0-9]+)?$", + "enum": [], + "validation": {}, + "notes": "Range: 0.0 to 1.0. Higher values produce more global alignments." + }, + "format_output": { + "type": "string", + "description": "Output format columns (comma-separated)", + "default": "query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits", + "required": false, + "pipeline_io": "parameter", + "var_name": "params.format_output", + "examples": [ + "query,target,fident,evalue,bits", + "query,target,alntmscore,qtmscore,ttmscore" + ], + "pattern": ".*", + "enum": [], + "validation": {}, + "notes": "Available columns: query, target, fident, alnlen, mismatch, gapopen, qstart, qend, tstart, tend, evalue, bits, alntmscore, qtmscore, ttmscore, lddt, prob" + } + } +}