Add LigandMPNN Nextflow pipeline for protein sequence design

This commit is contained in:
2026-03-18 22:31:13 +01:00
commit e7261ba7ce
15 changed files with 6825 additions and 0 deletions

131
params.json Normal file
View File

@@ -0,0 +1,131 @@
{
"params": {
"pdb": {
"type": "file",
"description": "Path to input PDB file for protein sequence design",
"default": "/mnt/OmicNAS/private/old/olamide/ligandmpnn/input/1BC8.pdb",
"required": true,
"pipeline_io": "input",
"var_name": "params.pdb",
"examples": [
"/mnt/workflow/input/protein.pdb",
"/mnt/workflow/input/*.pdb"
],
"pattern": ".*\\.pdb$",
"enum": [],
"validation": {},
"notes": "Input PDB file containing the protein structure for sequence design."
},
"outdir": {
"type": "folder",
"description": "Directory for LigandMPNN output results",
"default": "/mnt/OmicNAS/private/old/olamide/ligandmpnn/output",
"required": true,
"pipeline_io": "output",
"var_name": "params.outdir",
"examples": [
"/mnt/workflow/output",
"/path/to/results"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Directory where designed sequences and backbone PDBs will be saved."
},
"model_type": {
"type": "string",
"description": "Type of MPNN model to use for sequence design",
"default": "ligand_mpnn",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.model_type",
"examples": [
"protein_mpnn",
"ligand_mpnn",
"soluble_mpnn"
],
"pattern": "^(protein_mpnn|ligand_mpnn|soluble_mpnn|global_label_membrane_mpnn|per_residue_label_membrane_mpnn)$",
"enum": ["protein_mpnn", "ligand_mpnn", "soluble_mpnn", "global_label_membrane_mpnn", "per_residue_label_membrane_mpnn"],
"validation": {},
"notes": "protein_mpnn: Original ProteinMPNN. ligand_mpnn: Context-aware with ligands. soluble_mpnn: Trained on soluble proteins."
},
"temperature": {
"type": "number",
"description": "Sampling temperature for sequence generation",
"default": 0.1,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.temperature",
"examples": [0.05, 0.1, 0.2],
"pattern": null,
"enum": [],
"validation": {},
"notes": "Higher temperature gives more sequence diversity. Recommended range: 0.05-0.5"
},
"seed": {
"type": "integer",
"description": "Random seed for reproducibility",
"default": 111,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.seed",
"examples": [111, 42, 12345],
"pattern": null,
"enum": [],
"validation": {},
"notes": "Set for reproducible results."
},
"batch_size": {
"type": "integer",
"description": "Number of sequences to generate per batch",
"default": 1,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.batch_size",
"examples": [1, 3, 5],
"pattern": null,
"enum": [],
"validation": {},
"notes": "Higher batch sizes require more GPU memory."
},
"number_of_batches": {
"type": "integer",
"description": "Number of batches to run",
"default": 1,
"required": false,
"pipeline_io": "parameter",
"var_name": "params.number_of_batches",
"examples": [1, 5, 10],
"pattern": null,
"enum": [],
"validation": {},
"notes": "Total sequences = batch_size × number_of_batches"
},
"chains_to_design": {
"type": "string",
"description": "Comma-separated chain IDs to redesign",
"default": "",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.chains_to_design",
"examples": ["A", "A,B", "A,B,C"],
"pattern": "^([A-Z],?)*$",
"enum": [],
"validation": {},
"notes": "Leave empty to design all chains."
},
"fixed_residues": {
"type": "string",
"description": "Space-separated list of residues to keep fixed",
"default": "",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.fixed_residues",
"examples": ["A1 A2 A3", "A12 B25 B26"],
"pattern": null,
"enum": [],
"validation": {},
"notes": "Format: ChainResidue (e.g., A12). Leave empty to design all residues."
}
}
}