Initial commit: RoseTTAFold-All-Atom configured for Wes with Harbor images and s3:// paths

This commit is contained in:
2026-03-17 17:57:24 +01:00
commit 6eef3bb748
108 changed files with 28144 additions and 0 deletions

202
params.json Normal file
View File

@@ -0,0 +1,202 @@
{
"params": {
"protein_fasta": {
"type": "file",
"description": "Path to protein FASTA file for structure prediction",
"default": "s3://omic/eureka/rosettafold-all-atom/input/protein.fasta",
"required": true,
"pipeline_io": "input",
"var_name": "params.protein_fasta",
"examples": [
"s3://omic/eureka/rosettafold-all-atom/input/protein.fasta"
],
"pattern": ".*\\.(fasta|fa)$",
"enum": [],
"validation": {},
"notes": "FASTA file containing protein sequence(s). Required for all prediction modes."
},
"na_fasta": {
"type": "file",
"description": "Path to nucleic acid FASTA file (DNA or RNA)",
"default": "",
"required": false,
"pipeline_io": "input",
"var_name": "params.na_fasta",
"examples": [
"s3://omic/eureka/rosettafold-all-atom/input/dna_strand.fasta"
],
"pattern": ".*\\.(fasta|fa)$",
"enum": [],
"validation": {},
"notes": "Optional. For protein-nucleic acid complex prediction."
},
"na_type": {
"type": "string",
"description": "Type of nucleic acid input",
"default": "dna",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.na_type",
"examples": [
"dna",
"rna"
],
"pattern": "^(dna|rna)$",
"enum": ["dna", "rna"],
"validation": {},
"notes": "Specify whether the nucleic acid is DNA or RNA."
},
"sm_file": {
"type": "file",
"description": "Path to small molecule file (SDF format)",
"default": "",
"required": false,
"pipeline_io": "input",
"var_name": "params.sm_file",
"examples": [
"s3://omic/eureka/rosettafold-all-atom/input/ligand.sdf"
],
"pattern": ".*\\.sdf$",
"enum": [],
"validation": {},
"notes": "Optional. For protein-small molecule complex prediction."
},
"sm_type": {
"type": "string",
"description": "Type of small molecule input",
"default": "sdf",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.sm_type",
"examples": [
"sdf",
"smiles"
],
"pattern": "^(sdf|smiles)$",
"enum": ["sdf", "smiles"],
"validation": {},
"notes": "Format of small molecule input. SDF recommended for covalent modifications."
},
"covale_inputs": {
"type": "string",
"description": "Covalent bond specifications for modified proteins",
"default": "",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.covale_inputs",
"examples": [
"[((\"A\", \"74\", \"ND2\"), (\"B\", \"1\"), (\"CW\", \"null\"))]"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Format: [((chain, residue, atom), (sm_chain, atom_idx), (chirality1, chirality2))]"
},
"outdir": {
"type": "folder",
"description": "Directory for RFAA prediction results",
"default": "s3://omic/eureka/rosettafold-all-atom/output",
"required": true,
"pipeline_io": "output",
"var_name": "params.outdir",
"examples": [
"s3://omic/eureka/rosettafold-all-atom/output"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Output directory for PDB structures and confidence metrics."
},
"job_name": {
"type": "string",
"description": "Name for the prediction job",
"default": "rfaa_prediction",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.job_name",
"examples": [
"rfaa_prediction",
"my_protein"
],
"pattern": "^[a-zA-Z0-9_-]+$",
"enum": [],
"validation": {},
"notes": "Used to name output files."
},
"max_cycle": {
"type": "integer",
"description": "Number of inference cycles for structure refinement",
"default": 4,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.max_cycle",
"examples": [
4,
10
],
"enum": [],
"validation": {},
"notes": "Default is 4. Increase to 10 for difficult cases."
},
"db_uniref30": {
"type": "folder",
"description": "Path to UniRef30_2020_06 database directory",
"default": "/mnt/databases/UniRef30_2020_06",
"required": true,
"pipeline_io": "input",
"var_name": "params.db_uniref30",
"examples": [
"/mnt/databases/UniRef30_2020_06"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Required for MSA generation. ~46GB."
},
"db_bfd": {
"type": "folder",
"description": "Path to BFD database directory",
"default": "/mnt/databases/bfd",
"required": true,
"pipeline_io": "input",
"var_name": "params.db_bfd",
"examples": [
"/mnt/databases/bfd"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Required for MSA generation. ~272GB."
},
"db_pdb100": {
"type": "folder",
"description": "Path to pdb100_2021Mar03 template database",
"default": "/mnt/databases/pdb100_2021Mar03",
"required": true,
"pipeline_io": "input",
"var_name": "params.db_pdb100",
"examples": [
"/mnt/databases/pdb100_2021Mar03"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Required for template search. ~81GB."
},
"weights": {
"type": "file",
"description": "Path to RFAA model weights file",
"default": "/mnt/databases/RFAA_paper_weights.pt",
"required": true,
"pipeline_io": "input",
"var_name": "params.weights",
"examples": [
"/mnt/databases/RFAA_paper_weights.pt"
],
"pattern": ".*\\.pt$",
"enum": [],
"validation": {},
"notes": "Download from: http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt"
}
}
}