#!/bin/bash # reformat_fasta.sh # Script to reformat RCSB FASTA files for ImmuneBuilder INPUT_DIR="/mnt/OmicNAS/private/old/olamide/ImmuneBuilder/input" # Reformat antibody FASTA (1n8z.fasta) # Heavy chain is typically the longer sequence, Light chain is shorter echo "Reformatting antibody FASTA..." awk ' BEGIN { chain_count = 0; seq = "" } /^>/ { if (seq != "") { if (chain_count == 1) print ">H\n" seq else if (chain_count == 2) print ">L\n" seq } chain_count++ seq = "" next } { seq = seq $0 } END { if (chain_count == 1) print ">H\n" seq else if (chain_count == 2) print ">L\n" seq } ' "${INPUT_DIR}/1n8z.fasta" > "${INPUT_DIR}/1n8z_reformatted.fasta" # Reformat nanobody FASTA (3ogo_nanobody.fasta) # Nanobodies have only one chain (heavy) echo "Reformatting nanobody FASTA..." awk ' BEGIN { first = 1; seq = "" } /^>/ { if (seq != "" && first) { print ">H\n" seq; first = 0 } seq = "" next } { seq = seq $0 } END { if (first) print ">H\n" seq } ' "${INPUT_DIR}/3ogo_nanobody.fasta" > "${INPUT_DIR}/3ogo_nanobody_reformatted.fasta" # Reformat TCR FASTA (1oga_tcr.fasta) # Alpha chain first, then Beta chain echo "Reformatting TCR FASTA..." awk ' BEGIN { chain_count = 0; seq = "" } /^>/ { if (seq != "") { if (chain_count == 1) print ">A\n" seq else if (chain_count == 2) print ">B\n" seq } chain_count++ seq = "" next } { seq = seq $0 } END { if (chain_count == 1) print ">A\n" seq else if (chain_count == 2) print ">B\n" seq } ' "${INPUT_DIR}/1oga_tcr.fasta" > "${INPUT_DIR}/1oga_tcr_reformatted.fasta" echo "" echo "Reformatted files created:" ls -la "${INPUT_DIR}"/*_reformatted.fasta 2>/dev/null || echo "No reformatted files found" echo "" echo "Please verify the reformatted files have correct chain labels:" echo "- Antibody: >H and >L" echo "- Nanobody: >H only" echo "- TCR: >A and >B"