Initial commit: RoseTTAFold-All-Atom configured for Wes with Harbor images and s3:// paths

2026-03-17 17:57:24 +01:00
commit 6eef3bb748
108 changed files with 28144 additions and 0 deletions
--- a/params.json
+++ b/params.json
@@ -0,0 +1,202 @@
+{
+    "params": {
+        "protein_fasta": {
+            "type": "file",
+            "description": "Path to protein FASTA file for structure prediction",
+            "default": "s3://omic/eureka/rosettafold-all-atom/input/protein.fasta",
+            "required": true,
+            "pipeline_io": "input",
+            "var_name": "params.protein_fasta",
+            "examples": [
+                "s3://omic/eureka/rosettafold-all-atom/input/protein.fasta"
+            ],
+            "pattern": ".*\\.(fasta|fa)$",
+            "enum": [],
+            "validation": {},
+            "notes": "FASTA file containing protein sequence(s). Required for all prediction modes."
+        },
+        "na_fasta": {
+            "type": "file",
+            "description": "Path to nucleic acid FASTA file (DNA or RNA)",
+            "default": "",
+            "required": false,
+            "pipeline_io": "input",
+            "var_name": "params.na_fasta",
+            "examples": [
+                "s3://omic/eureka/rosettafold-all-atom/input/dna_strand.fasta"
+            ],
+            "pattern": ".*\\.(fasta|fa)$",
+            "enum": [],
+            "validation": {},
+            "notes": "Optional. For protein-nucleic acid complex prediction."
+        },
+        "na_type": {
+            "type": "string",
+            "description": "Type of nucleic acid input",
+            "default": "dna",
+            "required": false,
+            "pipeline_io": "parameter",
+            "var_name": "params.na_type",
+            "examples": [
+                "dna",
+                "rna"
+            ],
+            "pattern": "^(dna|rna)$",
+            "enum": ["dna", "rna"],
+            "validation": {},
+            "notes": "Specify whether the nucleic acid is DNA or RNA."
+        },
+        "sm_file": {
+            "type": "file",
+            "description": "Path to small molecule file (SDF format)",
+            "default": "",
+            "required": false,
+            "pipeline_io": "input",
+            "var_name": "params.sm_file",
+            "examples": [
+                "s3://omic/eureka/rosettafold-all-atom/input/ligand.sdf"
+            ],
+            "pattern": ".*\\.sdf$",
+            "enum": [],
+            "validation": {},
+            "notes": "Optional. For protein-small molecule complex prediction."
+        },
+        "sm_type": {
+            "type": "string",
+            "description": "Type of small molecule input",
+            "default": "sdf",
+            "required": false,
+            "pipeline_io": "parameter",
+            "var_name": "params.sm_type",
+            "examples": [
+                "sdf",
+                "smiles"
+            ],
+            "pattern": "^(sdf|smiles)$",
+            "enum": ["sdf", "smiles"],
+            "validation": {},
+            "notes": "Format of small molecule input. SDF recommended for covalent modifications."
+        },
+        "covale_inputs": {
+            "type": "string",
+            "description": "Covalent bond specifications for modified proteins",
+            "default": "",
+            "required": false,
+            "pipeline_io": "parameter",
+            "var_name": "params.covale_inputs",
+            "examples": [
+                "[((\"A\", \"74\", \"ND2\"), (\"B\", \"1\"), (\"CW\", \"null\"))]"
+            ],
+            "pattern": ".*",
+            "enum": [],
+            "validation": {},
+            "notes": "Format: [((chain, residue, atom), (sm_chain, atom_idx), (chirality1, chirality2))]"
+        },
+        "outdir": {
+            "type": "folder",
+            "description": "Directory for RFAA prediction results",
+            "default": "s3://omic/eureka/rosettafold-all-atom/output",
+            "required": true,
+            "pipeline_io": "output",
+            "var_name": "params.outdir",
+            "examples": [
+                "s3://omic/eureka/rosettafold-all-atom/output"
+            ],
+            "pattern": ".*",
+            "enum": [],
+            "validation": {},
+            "notes": "Output directory for PDB structures and confidence metrics."
+        },
+        "job_name": {
+            "type": "string",
+            "description": "Name for the prediction job",
+            "default": "rfaa_prediction",
+            "required": false,
+            "pipeline_io": "parameter",
+            "var_name": "params.job_name",
+            "examples": [
+                "rfaa_prediction",
+                "my_protein"
+            ],
+            "pattern": "^[a-zA-Z0-9_-]+$",
+            "enum": [],
+            "validation": {},
+            "notes": "Used to name output files."
+        },
+        "max_cycle": {
+            "type": "integer",
+            "description": "Number of inference cycles for structure refinement",
+            "default": 4,
+            "required": false,
+            "pipeline_io": "parameter",
+            "var_name": "params.max_cycle",
+            "examples": [
+                4,
+                10
+            ],
+            "enum": [],
+            "validation": {},
+            "notes": "Default is 4. Increase to 10 for difficult cases."
+        },
+        "db_uniref30": {
+            "type": "folder",
+            "description": "Path to UniRef30_2020_06 database directory",
+            "default": "/mnt/databases/UniRef30_2020_06",
+            "required": true,
+            "pipeline_io": "input",
+            "var_name": "params.db_uniref30",
+            "examples": [
+                "/mnt/databases/UniRef30_2020_06"
+            ],
+            "pattern": ".*",
+            "enum": [],
+            "validation": {},
+            "notes": "Required for MSA generation. ~46GB."
+        },
+        "db_bfd": {
+            "type": "folder",
+            "description": "Path to BFD database directory",
+            "default": "/mnt/databases/bfd",
+            "required": true,
+            "pipeline_io": "input",
+            "var_name": "params.db_bfd",
+            "examples": [
+                "/mnt/databases/bfd"
+            ],
+            "pattern": ".*",
+            "enum": [],
+            "validation": {},
+            "notes": "Required for MSA generation. ~272GB."
+        },
+        "db_pdb100": {
+            "type": "folder",
+            "description": "Path to pdb100_2021Mar03 template database",
+            "default": "/mnt/databases/pdb100_2021Mar03",
+            "required": true,
+            "pipeline_io": "input",
+            "var_name": "params.db_pdb100",
+            "examples": [
+                "/mnt/databases/pdb100_2021Mar03"
+            ],
+            "pattern": ".*",
+            "enum": [],
+            "validation": {},
+            "notes": "Required for template search. ~81GB."
+        },
+        "weights": {
+            "type": "file",
+            "description": "Path to RFAA model weights file",
+            "default": "/mnt/databases/RFAA_paper_weights.pt",
+            "required": true,
+            "pipeline_io": "input",
+            "var_name": "params.weights",
+            "examples": [
+                "/mnt/databases/RFAA_paper_weights.pt"
+            ],
+            "pattern": ".*\\.pt$",
+            "enum": [],
+            "validation": {},
+            "notes": "Download from: http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt"
+        }
+    }
+}