Initial commit: Chai-1 protein structure prediction pipeline for WES

- Nextflow pipeline using chai1 Docker image from Harbor
- S3-based input/output paths (s3://omic/eureka/chai-lab/)
- GPU-accelerated protein folding with MSA support

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-16 12:55:08 +01:00
commit f971fd0e21
26 changed files with 1289 additions and 0 deletions

24
tests/test_rdkit.py Executable file
View File

@@ -0,0 +1,24 @@
# Copyright (c) 2024 Chai Discovery, Inc.
# Licensed under the Apache License, Version 2.0.
# See the LICENSE file for details.
from chai_lab.data.sources.rdkit import RefConformerGenerator
def test_ref_conformer_from_smiles():
"""Test ref conformer generation from SMILES."""
smiles = "Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C"
rcg = RefConformerGenerator()
conformer = rcg.generate(smiles)
assert len(set(conformer.atom_names)) == conformer.num_atoms
def test_ref_conformer_glycan_ccd():
"""Ref conformer from CCD code for a sugar ring."""
rcg = RefConformerGenerator()
conformer = rcg.get("MAN")
assert conformer is not None
assert len(set(conformer.atom_names)) == conformer.num_atoms