- Nextflow pipeline using chai1 Docker image from Harbor - S3-based input/output paths (s3://omic/eureka/chai-lab/) - GPU-accelerated protein folding with MSA support Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
80 lines
2.5 KiB
Python
Executable File
80 lines
2.5 KiB
Python
Executable File
# Copyright (c) 2024 Chai Discovery, Inc.
|
|
# Licensed under the Apache License, Version 2.0.
|
|
# See the LICENSE file for details.
|
|
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
|
|
from chai_lab.data.parsing.fasta import read_fasta
|
|
from chai_lab.data.parsing.input_validation import (
|
|
constituents_of_modified_fasta,
|
|
identify_potential_entity_types,
|
|
)
|
|
from chai_lab.data.parsing.structure.entity_type import EntityType
|
|
|
|
from .example_inputs import example_dna, example_ligands, example_proteins, example_rna
|
|
|
|
|
|
def test_simple_protein_fasta():
|
|
parts = constituents_of_modified_fasta("RKDES")
|
|
assert parts is not None
|
|
assert all(x == y for x, y in zip(parts, ["R", "K", "D", "E", "S"]))
|
|
|
|
|
|
def test_modified_protein_fasta():
|
|
parts = constituents_of_modified_fasta("(KCJ)(SEP)(PPN)(B3S)(BAL)(PPN)KX(NH2)")
|
|
assert parts is not None
|
|
expected = ["KCJ", "SEP", "PPN", "B3S", "BAL", "PPN", "K", "X", "NH2"]
|
|
assert all(x == y for x, y in zip(parts, expected))
|
|
|
|
|
|
def test_rna_fasta():
|
|
seq = "ACUGACG"
|
|
parts = constituents_of_modified_fasta(seq)
|
|
assert parts is not None
|
|
assert all(x == y for x, y in zip(parts, seq))
|
|
|
|
|
|
def test_dna_fasta():
|
|
seq = "ACGACTAGCAT"
|
|
parts = constituents_of_modified_fasta(seq)
|
|
assert parts is not None
|
|
assert all(x == y for x, y in zip(parts, seq))
|
|
|
|
|
|
def test_parsing():
|
|
for ligand in example_ligands:
|
|
assert EntityType.LIGAND in identify_potential_entity_types(ligand)
|
|
|
|
for protein in example_proteins:
|
|
assert EntityType.PROTEIN in identify_potential_entity_types(protein)
|
|
|
|
for dna in example_dna:
|
|
assert EntityType.DNA in identify_potential_entity_types(dna)
|
|
|
|
for rna in example_rna:
|
|
assert EntityType.RNA in identify_potential_entity_types(rna)
|
|
|
|
|
|
def test_fasta_parsing():
|
|
test_string = """>foo\nRKDES\n>bar\nKEDESRRR"""
|
|
with TemporaryDirectory() as tmpdir:
|
|
fa_file = Path(tmpdir) / "test.fasta"
|
|
fa_file.write_text(test_string)
|
|
records = read_fasta(fa_file)
|
|
|
|
assert len(records) == 2
|
|
assert records[0].header == "foo"
|
|
assert records[0].sequence == "RKDES"
|
|
assert records[1].header == "bar"
|
|
assert records[1].sequence == "KEDESRRR"
|
|
|
|
|
|
def test_smiles_parsing():
|
|
smiles = ">smiles\nCc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C"
|
|
with TemporaryDirectory() as tmpdir:
|
|
fa_file = Path(tmpdir) / "test.fasta"
|
|
fa_file.write_text(smiles)
|
|
records = read_fasta(fa_file)
|
|
assert len(records) == 1
|