Files
immunebuilder/reformat_fasta.sh
Olamide Isreal 8887cbe592
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Configure ImmuneBuilder pipeline for WES execution
- Update container image to harbor.cluster.omic.ai/omic/immunebuilder:latest
- Update input/output paths to S3 (s3://omic/eureka/immunebuilder/)
- Remove local mount containerOptions (not needed in k8s)
- Update homepage to Gitea repo URL
- Clean history to remove large model weight blobs
2026-03-16 15:31:53 +01:00

71 lines
1.9 KiB
Bash
Executable File

#!/bin/bash
# reformat_fasta.sh
# Script to reformat RCSB FASTA files for ImmuneBuilder
INPUT_DIR="/mnt/OmicNAS/private/old/olamide/ImmuneBuilder/input"
# Reformat antibody FASTA (1n8z.fasta)
# Heavy chain is typically the longer sequence, Light chain is shorter
echo "Reformatting antibody FASTA..."
awk '
BEGIN { chain_count = 0; seq = "" }
/^>/ {
if (seq != "") {
if (chain_count == 1) print ">H\n" seq
else if (chain_count == 2) print ">L\n" seq
}
chain_count++
seq = ""
next
}
{ seq = seq $0 }
END {
if (chain_count == 1) print ">H\n" seq
else if (chain_count == 2) print ">L\n" seq
}
' "${INPUT_DIR}/1n8z.fasta" > "${INPUT_DIR}/1n8z_reformatted.fasta"
# Reformat nanobody FASTA (3ogo_nanobody.fasta)
# Nanobodies have only one chain (heavy)
echo "Reformatting nanobody FASTA..."
awk '
BEGIN { first = 1; seq = "" }
/^>/ {
if (seq != "" && first) { print ">H\n" seq; first = 0 }
seq = ""
next
}
{ seq = seq $0 }
END { if (first) print ">H\n" seq }
' "${INPUT_DIR}/3ogo_nanobody.fasta" > "${INPUT_DIR}/3ogo_nanobody_reformatted.fasta"
# Reformat TCR FASTA (1oga_tcr.fasta)
# Alpha chain first, then Beta chain
echo "Reformatting TCR FASTA..."
awk '
BEGIN { chain_count = 0; seq = "" }
/^>/ {
if (seq != "") {
if (chain_count == 1) print ">A\n" seq
else if (chain_count == 2) print ">B\n" seq
}
chain_count++
seq = ""
next
}
{ seq = seq $0 }
END {
if (chain_count == 1) print ">A\n" seq
else if (chain_count == 2) print ">B\n" seq
}
' "${INPUT_DIR}/1oga_tcr.fasta" > "${INPUT_DIR}/1oga_tcr_reformatted.fasta"
echo ""
echo "Reformatted files created:"
ls -la "${INPUT_DIR}"/*_reformatted.fasta 2>/dev/null || echo "No reformatted files found"
echo ""
echo "Please verify the reformatted files have correct chain labels:"
echo "- Antibody: >H and >L"
echo "- Nanobody: >H only"
echo "- TCR: >A and >B"