Configure ImmuneBuilder pipeline for WES execution
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
- Update container image to harbor.cluster.omic.ai/omic/immunebuilder:latest - Update input/output paths to S3 (s3://omic/eureka/immunebuilder/) - Remove local mount containerOptions (not needed in k8s) - Update homepage to Gitea repo URL - Clean history to remove large model weight blobs
This commit is contained in:
70
reformat_fasta.sh
Executable file
70
reformat_fasta.sh
Executable file
@@ -0,0 +1,70 @@
|
||||
#!/bin/bash
|
||||
# reformat_fasta.sh
|
||||
# Script to reformat RCSB FASTA files for ImmuneBuilder
|
||||
|
||||
INPUT_DIR="/mnt/OmicNAS/private/old/olamide/ImmuneBuilder/input"
|
||||
|
||||
# Reformat antibody FASTA (1n8z.fasta)
|
||||
# Heavy chain is typically the longer sequence, Light chain is shorter
|
||||
echo "Reformatting antibody FASTA..."
|
||||
awk '
|
||||
BEGIN { chain_count = 0; seq = "" }
|
||||
/^>/ {
|
||||
if (seq != "") {
|
||||
if (chain_count == 1) print ">H\n" seq
|
||||
else if (chain_count == 2) print ">L\n" seq
|
||||
}
|
||||
chain_count++
|
||||
seq = ""
|
||||
next
|
||||
}
|
||||
{ seq = seq $0 }
|
||||
END {
|
||||
if (chain_count == 1) print ">H\n" seq
|
||||
else if (chain_count == 2) print ">L\n" seq
|
||||
}
|
||||
' "${INPUT_DIR}/1n8z.fasta" > "${INPUT_DIR}/1n8z_reformatted.fasta"
|
||||
|
||||
# Reformat nanobody FASTA (3ogo_nanobody.fasta)
|
||||
# Nanobodies have only one chain (heavy)
|
||||
echo "Reformatting nanobody FASTA..."
|
||||
awk '
|
||||
BEGIN { first = 1; seq = "" }
|
||||
/^>/ {
|
||||
if (seq != "" && first) { print ">H\n" seq; first = 0 }
|
||||
seq = ""
|
||||
next
|
||||
}
|
||||
{ seq = seq $0 }
|
||||
END { if (first) print ">H\n" seq }
|
||||
' "${INPUT_DIR}/3ogo_nanobody.fasta" > "${INPUT_DIR}/3ogo_nanobody_reformatted.fasta"
|
||||
|
||||
# Reformat TCR FASTA (1oga_tcr.fasta)
|
||||
# Alpha chain first, then Beta chain
|
||||
echo "Reformatting TCR FASTA..."
|
||||
awk '
|
||||
BEGIN { chain_count = 0; seq = "" }
|
||||
/^>/ {
|
||||
if (seq != "") {
|
||||
if (chain_count == 1) print ">A\n" seq
|
||||
else if (chain_count == 2) print ">B\n" seq
|
||||
}
|
||||
chain_count++
|
||||
seq = ""
|
||||
next
|
||||
}
|
||||
{ seq = seq $0 }
|
||||
END {
|
||||
if (chain_count == 1) print ">A\n" seq
|
||||
else if (chain_count == 2) print ">B\n" seq
|
||||
}
|
||||
' "${INPUT_DIR}/1oga_tcr.fasta" > "${INPUT_DIR}/1oga_tcr_reformatted.fasta"
|
||||
|
||||
echo ""
|
||||
echo "Reformatted files created:"
|
||||
ls -la "${INPUT_DIR}"/*_reformatted.fasta 2>/dev/null || echo "No reformatted files found"
|
||||
echo ""
|
||||
echo "Please verify the reformatted files have correct chain labels:"
|
||||
echo "- Antibody: >H and >L"
|
||||
echo "- Nanobody: >H only"
|
||||
echo "- TCR: >A and >B"
|
||||
Reference in New Issue
Block a user