Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
- Update container image to harbor.cluster.omic.ai/omic/immunebuilder:latest - Update input/output paths to S3 (s3://omic/eureka/immunebuilder/) - Remove local mount containerOptions (not needed in k8s) - Update homepage to Gitea repo URL - Clean history to remove large model weight blobs
71 lines
1.9 KiB
Bash
Executable File
71 lines
1.9 KiB
Bash
Executable File
#!/bin/bash
|
|
# reformat_fasta.sh
|
|
# Script to reformat RCSB FASTA files for ImmuneBuilder
|
|
|
|
INPUT_DIR="/mnt/OmicNAS/private/old/olamide/ImmuneBuilder/input"
|
|
|
|
# Reformat antibody FASTA (1n8z.fasta)
|
|
# Heavy chain is typically the longer sequence, Light chain is shorter
|
|
echo "Reformatting antibody FASTA..."
|
|
awk '
|
|
BEGIN { chain_count = 0; seq = "" }
|
|
/^>/ {
|
|
if (seq != "") {
|
|
if (chain_count == 1) print ">H\n" seq
|
|
else if (chain_count == 2) print ">L\n" seq
|
|
}
|
|
chain_count++
|
|
seq = ""
|
|
next
|
|
}
|
|
{ seq = seq $0 }
|
|
END {
|
|
if (chain_count == 1) print ">H\n" seq
|
|
else if (chain_count == 2) print ">L\n" seq
|
|
}
|
|
' "${INPUT_DIR}/1n8z.fasta" > "${INPUT_DIR}/1n8z_reformatted.fasta"
|
|
|
|
# Reformat nanobody FASTA (3ogo_nanobody.fasta)
|
|
# Nanobodies have only one chain (heavy)
|
|
echo "Reformatting nanobody FASTA..."
|
|
awk '
|
|
BEGIN { first = 1; seq = "" }
|
|
/^>/ {
|
|
if (seq != "" && first) { print ">H\n" seq; first = 0 }
|
|
seq = ""
|
|
next
|
|
}
|
|
{ seq = seq $0 }
|
|
END { if (first) print ">H\n" seq }
|
|
' "${INPUT_DIR}/3ogo_nanobody.fasta" > "${INPUT_DIR}/3ogo_nanobody_reformatted.fasta"
|
|
|
|
# Reformat TCR FASTA (1oga_tcr.fasta)
|
|
# Alpha chain first, then Beta chain
|
|
echo "Reformatting TCR FASTA..."
|
|
awk '
|
|
BEGIN { chain_count = 0; seq = "" }
|
|
/^>/ {
|
|
if (seq != "") {
|
|
if (chain_count == 1) print ">A\n" seq
|
|
else if (chain_count == 2) print ">B\n" seq
|
|
}
|
|
chain_count++
|
|
seq = ""
|
|
next
|
|
}
|
|
{ seq = seq $0 }
|
|
END {
|
|
if (chain_count == 1) print ">A\n" seq
|
|
else if (chain_count == 2) print ">B\n" seq
|
|
}
|
|
' "${INPUT_DIR}/1oga_tcr.fasta" > "${INPUT_DIR}/1oga_tcr_reformatted.fasta"
|
|
|
|
echo ""
|
|
echo "Reformatted files created:"
|
|
ls -la "${INPUT_DIR}"/*_reformatted.fasta 2>/dev/null || echo "No reformatted files found"
|
|
echo ""
|
|
echo "Please verify the reformatted files have correct chain labels:"
|
|
echo "- Antibody: >H and >L"
|
|
echo "- Nanobody: >H only"
|
|
echo "- TCR: >A and >B"
|