Configure ImmuneBuilder pipeline for WES execution
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
- Update container image to harbor.cluster.omic.ai/omic/immunebuilder:latest - Update input/output paths to S3 (s3://omic/eureka/immunebuilder/) - Remove local mount containerOptions (not needed in k8s) - Update homepage to Gitea repo URL - Clean history to remove large model weight blobs
This commit is contained in:
40
ImmuneBuilder/sequence_checks.py
Normal file
40
ImmuneBuilder/sequence_checks.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from anarci import validate_sequence, anarci, scheme_short_to_long
|
||||
|
||||
def number_single_sequence(sequence, chain, scheme="imgt", allowed_species=['human','mouse']):
|
||||
validate_sequence(sequence)
|
||||
|
||||
try:
|
||||
if scheme != "raw":
|
||||
scheme = scheme_short_to_long[scheme.lower()]
|
||||
except KeyError:
|
||||
raise NotImplementedError(f"Unimplemented numbering scheme: {scheme}")
|
||||
|
||||
assert len(sequence) > 70, f"Sequence too short to be an Ig domain. Please give whole sequence:\n{sequence}"
|
||||
|
||||
allow = [chain]
|
||||
if chain == "L":
|
||||
allow.append("K")
|
||||
|
||||
# Use imgt scheme for numbering sanity checks
|
||||
numbered, _, _ = anarci([("sequence", sequence)], scheme='imgt', output=False, allow=set(allow), allowed_species=allowed_species)
|
||||
|
||||
assert numbered[0], f"Sequence provided as an {chain} chain is not recognised as an {chain} chain."
|
||||
|
||||
output = [x for x in numbered[0][0][0] if x[1] != "-"]
|
||||
numbers = [x[0][0] for x in output]
|
||||
|
||||
# Check for missing residues assuming imgt numbering
|
||||
assert (max(numbers) > 120) and (min(numbers) < 8), f"Sequence missing too many residues to model correctly. Please give whole sequence:\n{sequence}"
|
||||
|
||||
# Renumber once sanity checks done
|
||||
if scheme == "raw":
|
||||
output = [((i+1, " "),x[1]) for i,x in enumerate(output)]
|
||||
elif scheme != 'imgt':
|
||||
numbered, _, _ = anarci([("sequence", sequence)], scheme=scheme, output=False, allow=set(allow), allowed_species=allowed_species)
|
||||
output = [x for x in numbered[0][0][0] if x[1] != "-"]
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def number_sequences(seqs, scheme="imgt", allowed_species=['human','mouse']):
|
||||
return {chain: number_single_sequence(seqs[chain], chain, scheme=scheme, allowed_species=allowed_species) for chain in seqs}
|
||||
Reference in New Issue
Block a user