302 lines
9.7 KiB
Python
302 lines
9.7 KiB
Python
import sys
|
|
from io import TextIOWrapper
|
|
from typing import Optional, TextIO, Union
|
|
|
|
from Bio.PDB.Model import Model
|
|
from Bio.PDB.NeighborSearch import NeighborSearch
|
|
#from Bio.PDB.Structure import Structure
|
|
|
|
from prodigy_prot.modules import aa_properties
|
|
from prodigy_prot.modules.freesasa_tools import execute_freesasa_api
|
|
from prodigy_prot.modules.models import IC_NIS
|
|
from prodigy_prot.modules.utils import dg_to_kd
|
|
|
|
|
|
def calculate_ic(
|
|
model: Model, d_cutoff: float = 5.5, selection: Optional[dict[str, int]] = None
|
|
) -> list:
|
|
"""
|
|
Calculates intermolecular contacts in a parsed struct object.
|
|
"""
|
|
atom_list = list(model.get_atoms())
|
|
ns = NeighborSearch(atom_list)
|
|
all_list = ns.search_all(radius=d_cutoff, level="R")
|
|
|
|
assert all_list is not None
|
|
|
|
if selection:
|
|
_sd = selection
|
|
|
|
def _chain(x):
|
|
return x.parent.id
|
|
|
|
ic_list = [
|
|
c
|
|
for c in all_list
|
|
if (_chain(c[0]) in _sd and _chain(c[1]) in _sd)
|
|
and (_sd[_chain(c[0])] != _sd[_chain(c[1])])
|
|
]
|
|
else:
|
|
ic_list = [c for c in all_list if c[0].parent.id != c[1].parent.id]
|
|
|
|
if not ic_list:
|
|
raise ValueError("No contacts found for selection")
|
|
|
|
ic_list.sort()
|
|
return ic_list
|
|
|
|
|
|
def analyse_contacts(contact_list: list) -> dict[str, float]:
|
|
"""
|
|
Enumerates and classifies contacts based on the chemical characteristics
|
|
of the participating amino acids.
|
|
"""
|
|
|
|
bins = {
|
|
"AA": 0.0,
|
|
"PP": 0.0,
|
|
"CC": 0.0,
|
|
"AP": 0.0,
|
|
"CP": 0.0,
|
|
"AC": 0.0,
|
|
"LL": 0.0,
|
|
"BL": 0.0,
|
|
"BB": 0.0
|
|
}
|
|
|
|
_data = aa_properties.aa_character_ic
|
|
for res_i, res_j in contact_list:
|
|
i = _data.get(res_i.resname)
|
|
j = _data.get(res_j.resname)
|
|
if i is not None and j is not None:
|
|
contact_type = "".join(sorted((i, j)))
|
|
bins[contact_type] += 1
|
|
|
|
_data = aa_properties.aa_character_hydro
|
|
for res_i, res_j in contact_list:
|
|
i = _data.get(res_i.resname)
|
|
j = _data.get(res_j.resname)
|
|
if i is not None and j is not None:
|
|
contact_type = "".join(sorted((i, j)))
|
|
bins[contact_type] += 1
|
|
|
|
return bins
|
|
|
|
|
|
def analyse_nis(sasa_dict: dict, acc_threshold: float = 0.05) -> list[float]:
|
|
"""
|
|
Returns the percentages of apolar, polar, and charged
|
|
residues at the interface, according to an accessibility
|
|
criterion.
|
|
"""
|
|
|
|
_data = aa_properties.aa_character_protorp
|
|
|
|
def _char_to_index(x):
|
|
return {"A": 0, "C": 1, "P": 2}.get(x)
|
|
|
|
count = [0, 0, 0]
|
|
|
|
for res, rsa in sasa_dict.items():
|
|
_, resn, _ = res
|
|
if rsa >= acc_threshold:
|
|
aa_character = _data[resn]
|
|
aa_index = _char_to_index(aa_character)
|
|
assert aa_index is not None
|
|
count[aa_index] += 1
|
|
|
|
percentages = [100.0 * x / sum(count) for x in count]
|
|
return percentages
|
|
|
|
|
|
class Prodigy:
|
|
# init parameters
|
|
def __init__(
|
|
self,
|
|
model: Model,
|
|
name: str = "",
|
|
selection: Optional[list[str]] = None,
|
|
temp: float = 25.0,
|
|
):
|
|
self.temp = float(temp)
|
|
if selection is None:
|
|
self.selection = [chain.id for chain in model.get_chains()]
|
|
else:
|
|
self.selection = selection
|
|
self.model = model
|
|
self.name = name
|
|
self.ic_network: list = []
|
|
self.bins: dict[str, float] = {
|
|
"CC": 0.0,
|
|
"CP": 0.0,
|
|
"AC": 0.0,
|
|
"PP": 0.0,
|
|
"AP": 0.0,
|
|
"AA": 0.0,
|
|
"LL": 0.0,
|
|
"BL": 0.0,
|
|
"BB": 0.0
|
|
}
|
|
|
|
self.nis_a = 0.0
|
|
self.nis_c = 0.0
|
|
self.nis_p = 0.0
|
|
self.ba_val = 0.0
|
|
self.kd_val = 0.0
|
|
|
|
def predict(
|
|
self,
|
|
temp: Optional[float] = None,
|
|
distance_cutoff: float = 5.5,
|
|
acc_threshold: float = 0.05,
|
|
):
|
|
if temp is not None:
|
|
self.temp = temp
|
|
# Make selection dict from user option or PDB chains
|
|
selection_dict: dict[str, int] = {}
|
|
for igroup, group in enumerate(self.selection):
|
|
chains = group.split(",")
|
|
for chain in chains:
|
|
if chain in selection_dict:
|
|
errmsg = "Selections must be disjoint sets: " f"{chain} is repeated"
|
|
raise ValueError(errmsg)
|
|
selection_dict[chain] = igroup
|
|
|
|
# Contacts
|
|
self.ic_network = calculate_ic(
|
|
self.model, d_cutoff=distance_cutoff, selection=selection_dict
|
|
)
|
|
|
|
self.bins = analyse_contacts(self.ic_network)
|
|
# SASA
|
|
_, cmplx_sasa = execute_freesasa_api(self.model)
|
|
self.nis_a, self.nis_c, self.nis_p = analyse_nis(cmplx_sasa, acc_threshold=acc_threshold)
|
|
|
|
# Affinity Calculation
|
|
self.ba_val = IC_NIS(
|
|
self.bins["CC"],
|
|
self.bins["AC"],
|
|
self.bins["PP"],
|
|
self.bins["AP"],
|
|
self.nis_a,
|
|
self.nis_c,
|
|
)
|
|
self.kd_val = dg_to_kd(self.ba_val, self.temp)
|
|
|
|
def as_dict(self) -> dict:
|
|
return_dict = {
|
|
"model": self.model.id,
|
|
"selection": self.selection,
|
|
"temp": self.temp,
|
|
"ICs": len(self.ic_network),
|
|
"nis_a": self.nis_a,
|
|
"nis_c": self.nis_c,
|
|
"nis_p": self.nis_p,
|
|
"ba_val": self.ba_val,
|
|
"kd_val": self.kd_val,
|
|
}
|
|
return_dict.update(self.bins)
|
|
return return_dict
|
|
|
|
def print_prediction(self, outfile: str = "", quiet: bool = False, showall: bool = False) -> None:
|
|
handle: Union[TextIOWrapper, TextIO]
|
|
if outfile:
|
|
handle = open(outfile, "w")
|
|
else:
|
|
handle = sys.stdout
|
|
|
|
if quiet:
|
|
handle.write("{0}\t{1:8.3f}\n".format(self.name, self.ba_val))
|
|
else:
|
|
# Collect output lines in order
|
|
lines = []
|
|
lines.append(f"[+] No. of intermolecular contacts: {len(self.ic_network)}\n")
|
|
lines.append(f"[+] No. of Charged-Charged contacts: {self.bins['CC']}\n")
|
|
lines.append(f"[+] No. of Charged-Polar contacts: {self.bins['CP']}\n")
|
|
lines.append(f"[+] No. of Charged-Apolar contacts: {self.bins['AC']}\n")
|
|
lines.append(f"[+] No. of Polar-Polar contacts: {self.bins['PP']}\n")
|
|
lines.append(f"[+] No. of Apolar-Polar contacts: {self.bins['AP']}\n")
|
|
lines.append(f"[+] No. of Apolar-Apolar contacts: {self.bins['AA']}\n")
|
|
|
|
if showall:
|
|
lines.append(f"[+] No. of hydrophiLic-hydrophiLic contacts: {self.bins['LL']}\n")
|
|
lines.append(f"[+] No. of hydrophoBic-hydrophiLic contacts: {self.bins['BL']}\n")
|
|
lines.append(f"[+] No. of hydrophoBic-hydrophoBic contacts: {self.bins['BB']}\n")
|
|
lines.append(f"[+] Percentage of Polar NIS residues: {self.nis_p:3.2f}\n")
|
|
|
|
lines.append(f"[+] Percentage of Apolar NIS residues: {self.nis_a:3.2f}\n")
|
|
lines.append(f"[+] Percentage of Charged NIS residues: {self.nis_c:3.2f}\n")
|
|
lines.append(f"[++] predicted binding affinity (kcal.mol-1): {self.ba_val:8.1f}\n")
|
|
lines.append(f"[++] predicted dissociation constant (M) at {self.temp:.1f}˚C: {self.kd_val:8.1e}\n")
|
|
|
|
handle.writelines(lines)
|
|
|
|
if handle is not sys.stdout:
|
|
handle.close()
|
|
|
|
def print_contacts(self, outfile: str = "") -> None:
|
|
handle: Union[TextIOWrapper, TextIO]
|
|
if outfile:
|
|
handle = open(outfile, "w")
|
|
else:
|
|
handle = sys.stdout
|
|
|
|
for res1, res2 in self.ic_network:
|
|
_fmt_str = (
|
|
"{0.resname:>5s} {0.id[1]:5} {0.parent.id:>3s} {1.resname:>5s}"
|
|
" {1.id[1]:5} {1.parent.id:>3s}\n"
|
|
)
|
|
if res1.parent.id not in self.selection[0]:
|
|
res1, res2 = res2, res1
|
|
handle.write(_fmt_str.format(res1, res2))
|
|
|
|
if handle is not sys.stdout:
|
|
handle.close()
|
|
|
|
def print_pymol_script(self, outfile: str = "") -> None:
|
|
# Writing output PYMOL: pml script
|
|
# initialize array with chains and save chain selection string
|
|
selection_strings = []
|
|
chains: dict[str, set] = {}
|
|
for s in self.selection:
|
|
selection_strings.append(s.replace(",", "+"))
|
|
for c in s.split(","):
|
|
chains[c] = set()
|
|
|
|
# loop over pairs and add interface residues to respective chains
|
|
for pair in self.ic_network:
|
|
for r in pair:
|
|
chains[r.parent.id].add(str(r.id[1]))
|
|
|
|
# set output stream
|
|
handle = open(outfile, "w") if outfile else sys.stdout
|
|
|
|
# write default setup strings
|
|
handle.writelines(
|
|
[
|
|
"color silver\n",
|
|
"as cartoon\n",
|
|
"bg_color white\n",
|
|
"center\n",
|
|
"color lightblue, chain {}\n".format(selection_strings[0]),
|
|
"color lightpink, chain {}\n".format(selection_strings[1]),
|
|
]
|
|
)
|
|
|
|
# loop over interfaces construct selection strings
|
|
# and write interface related commands
|
|
for color, iface in [("blue", 1), ("hotpink", 2)]:
|
|
p_sel_string = " or ".join(
|
|
[
|
|
"chain {} and resi {}".format(c, "+".join(chains[c]))
|
|
for c in selection_strings[iface - 1].split("+")
|
|
]
|
|
)
|
|
handle.write("select iface{}, {}\n".format(iface, p_sel_string))
|
|
handle.write("color {}, iface{}\n".format(color, iface))
|
|
handle.write("show sticks, iface{}\n".format(iface))
|
|
|
|
# close file handle if applicable
|
|
if handle is not sys.stdout:
|
|
handle.close()
|