From d882a2f5d1c788ccdc3c0c7482c969191b62c895 Mon Sep 17 00:00:00 2001 From: Jimmy Charnley Kromann Date: Thu, 15 Nov 2012 17:47:47 +0100 Subject: [PATCH] downloaded from propka.ki.ku.dk --- .gitignore | 5 + README.md | 42 +- Source/__init__.py | 3 + Source/atom.py | 387 +++++++++ Source/bonds.py | 485 +++++++++++ Source/calculations.py | 891 ++++++++++++++++++++ Source/conformation_container.py | 439 ++++++++++ Source/coupled_groups.py | 318 ++++++++ Source/determinant.py | 26 + Source/determinants.py | 238 ++++++ Source/group.py | 1315 ++++++++++++++++++++++++++++++ Source/iterative.py | 358 ++++++++ Source/lib.py | 198 +++++ Source/ligand.py | 403 +++++++++ Source/ligand_pka_values.py | 118 +++ Source/molecular_container.py | 247 ++++++ Source/output.py | 391 +++++++++ Source/parameters.py | 529 ++++++++++++ Source/pdb.py | 323 ++++++++ Source/protein_bonds.dat | 568 +++++++++++++ Source/protonate.py | 443 ++++++++++ Source/vector_algebra.py | 315 +++++++ Source/version.py | 212 +++++ propka.cfg | 398 +++++++++ propka.py | 23 + 25 files changed, 8673 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 Source/__init__.py create mode 100644 Source/atom.py create mode 100644 Source/bonds.py create mode 100644 Source/calculations.py create mode 100644 Source/conformation_container.py create mode 100644 Source/coupled_groups.py create mode 100644 Source/determinant.py create mode 100644 Source/determinants.py create mode 100644 Source/group.py create mode 100644 Source/iterative.py create mode 100755 Source/lib.py create mode 100755 Source/ligand.py create mode 100644 Source/ligand_pka_values.py create mode 100755 Source/molecular_container.py create mode 100644 Source/output.py create mode 100644 Source/parameters.py create mode 100644 Source/pdb.py create mode 100644 Source/protein_bonds.dat create mode 100755 Source/protonate.py create mode 100644 Source/vector_algebra.py create mode 100644 Source/version.py create mode 100644 propka.cfg create mode 100755 propka.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..97a38a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ + +# Compiled python files +*.py[cod] + + diff --git a/README.md b/README.md index f5171dd..30f5e8f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,40 @@ -propka-3.1 -========== \ No newline at end of file +# PROPKA 3.1 + + + See [propka.ki.ku.dk](http://propka.ki.ku.dk/) + for more information. + +## Installation + +No installation needed. Just clone and run. + +## Requirements + +* Python 3.1 or higher + +## Getting started + +1. Clone the code from GitHub +2. Run 'propka.py' with a .pdb file (see Examples) + +## Examples + +Calculate + + propka.py 1hpx.pdb + + +If you don't have Python 3.1 installed, but +Python 3.2 (most Ubuntu distributions), then run +propka like; + + python3.2 propka.py 1hpx.pdb + + +## Testing (for developers) + +Please run test before pushing commits. + + + + diff --git a/Source/__init__.py b/Source/__init__.py new file mode 100644 index 0000000..e286beb --- /dev/null +++ b/Source/__init__.py @@ -0,0 +1,3 @@ +# +__all__ = ['coupled_residues', 'lib', 'parameters', 'residue', 'bonds', 'debug', 'ligand', 'pdb', 'calculator', 'determinants', 'mutate', 'protein', 'chain', 'iterative', 'output', 'protonate'] + diff --git a/Source/atom.py b/Source/atom.py new file mode 100644 index 0000000..c964930 --- /dev/null +++ b/Source/atom.py @@ -0,0 +1,387 @@ +import string, Source.lib, Source.group + + +class Atom: + """ + Atom class - contains all atom information found in the pdbfile + """ + + def __init__(self, line=None, verbose=False): + + self.set_properties(line) + + self.residue_label = "%-3s%4d%2s" % (self.name,self.resNumb, self.chainID) + + self.groups_extracted = 0 + self.group = None + self.group_type = None + self.number_of_bonded_elements = {} + + self.cysteine_bridge = False + + self.bonded_atoms = [] + self.residue = None + self.conformation_container = None + self.molecular_container = None + self.is_protonated = False + self.steric_number_and_lone_pairs_set = False + + self.terminal = None + self.charge = 0 + self.charge_set = False + self.steric_number = 0 + self.number_of_lone_pairs = 0 + self.number_of_protons_to_add = 0 + self.number_of_pi_electrons_in_double_and_triple_bonds = 0 + self.number_of_pi_electrons_in_conjugate_double_and_triple_bonds = 0 + + # ligand atom types + self.sybyl_type = '' + self.sybyl_assigned = False + self.marvin_pka = False + + + return + + + + + def set_properties(self, line): + + self.name = '' + self.numb = 0 + self.x = 0.0 + self.y = 0.0 + self.z = 0.0 + self.resNumb = 0 + self.resName = '' + self.chainID = 'A' + self.type = '' + self.occ = '1.0' + self.beta = '0.0' + self.element = '' + self.icode = '' + + if line: + self.name = line[12:16].strip() + self.numb = int( line[ 6:11].strip() ) + self.x = float( line[30:38].strip() ) + self.y = float( line[38:46].strip() ) + self.z = float( line[46:54].strip() ) + self.resNumb = int( line[22:26].strip() ) + self.resName = "%-3s" % (line[17:20].strip()) + self.chainID = max(line[21], 'A') # set chain id to A in case of white space + self.type = line[:6].strip().lower() + + if self.resName in ['DA ','DC ','DG ','DT ']: + self.type = 'hetatm' + + self.occ = line[55:60].strip() + self.beta = line[60:66].strip() + self.icode = line[26:27] + + # Set the element using the position of the name in the pdb file + self.element = line[12:14].strip().strip(string.digits) + if len(self.name) == 4: + self.element = self.element[0] + if len(self.element)==2: + self.element = '%1s%1s'%(self.element[0], self.element[1].lower()) + + return + + def set_group_type(self, type): + self.group_type = type + return + + def count_bonded_elements(self, element): + res = 0 + for ba in self.bonded_atoms: + if element == ba.element: + res +=1 + return res + + + def get_bonded_elements(self, element): + res = [] + for ba in self.bonded_atoms: + if ba.element == element: + res.append(ba) + return res + + def get_bonded_heavy_atoms(self): + return [ba for ba in self.bonded_atoms if ba.element!='H'] + + + def is_atom_within_bond_distance(self, other_atom, max_bonds, cur_bond): + """ check if is found within bonds of self """ + for ba in self.bonded_atoms: + if ba == other_atom: + return True + if max_bonds > cur_bond: + if ba.is_atom_within_bond_distance(other_atom, max_bonds, cur_bond+1): + return True + + return False + + + + def setProperty(self, + numb = None, + name = None, + resName = None, + chainID = None, + resNumb = None, + x = None, + y = None, + z = None, + occ = None, + beta = None): + """ + sets properties of the atom object + """ + + if numb != None: self.numb = numb + if name != None: self.name = name + if resName != None: self.resName = resName + if chainID != None: self.chainID = chainID + if resNumb != None: self.resNumb = resNumb + if x != None: self.x = x + if y != None: self.y = y + if z != None: self.z = z + if occ != None: self.occ = occ + if beta != None: self.beta = beta + + + + def makeCopy(self): + """ + making a copy of this atom + """ + + newAtom = Atom() + newAtom.type = self.type + newAtom.numb = self.numb + newAtom.name = self.name + newAtom.element = self.element + newAtom.resName = self.resName + newAtom.resNumb = self.resNumb + newAtom.chainID = self.chainID + newAtom.x = self.x + newAtom.y = self.y + newAtom.z = self.z + newAtom.occ = self.occ + newAtom.beta = self.beta + newAtom.terminal = self.terminal + newAtom.residue_label = self.residue_label + newAtom.icode = self.icode + + + + return newAtom + + + def make_input_line(self): + # making string + group = '-' + model_pka = '-' + if self.group: + group = self.group.type + if self.terminal == 'C-': + group = 'C-' ## circumventing C-/COO parameter unification + + if self.group.titratable: + model_pka = '%6.2f'%self.group.model_pka + + + str = "%-6s%5d %s %s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % (self.type.upper(), + self.numb, + Source.lib.makeTidyAtomLabel(self.name, + self.element), + self.resName, + self.chainID, + self.resNumb, + self.x, + self.y, + self.z, + group, + model_pka) + + + + + return str + + def make_conect_line(self): + res = 'CONECT%5d'%self.numb + + # extract and sort numbers of bonded residues + bonded = [] + for atom in self.bonded_atoms: + bonded.append(atom.numb) + bonded.sort() + + # write 'em out + for b in bonded: + res += '%5d'%b + res += '\n' + return res + + + def get_input_parameters(self): + """ Method for getting the input parameters stored in the + occupancy and b-factor fields in input files""" + + # Set the group type + if self.occ != '-': + # make sure to set the terminal + if self.occ in ['N+','C-']: + self.terminal = self.occ + + # save the ligand group charge + if self.occ =='BLG': + self.charge=+1 + elif self.occ =='ALG': + self.charge=-1 + + # generic ions + if self.occ in ['1P','2P','1N','2N']: + self.resName=self.occ + self.occ='Ion' + + # correct the group type + self.occ = self.occ.replace('N+','Nterm') + self.occ = self.occ.replace('C-','Cterm') + self.occ = self.occ.replace('ION','Ion') + self.occ = self.occ.replace('ALG','titratable_ligand') + self.occ = self.occ.replace('BLG','titratable_ligand') + self.occ = self.occ.replace('LG','non_titratable_ligand') + + # try to initialise the group + try: + exec('self.group = Source.group.%s_group(self)'%self.occ) + except: + raise Exception('%s in input_file is not recognized as a group'%self.occ) + + # set the model pKa value + if self.beta != '-': + self.group.model_pka = float(self.beta) + self.group.model_pka_set = True + + # set occ and beta to standard values + self.occ = '1.00' + self.beta = '0.00' + + + + return + + + + def make_pdb_line(self): + + # making string + str = "%-6s%5d %s %s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s\n" % (self.type.upper(), + self.numb, + Source.lib.makeTidyAtomLabel(self.name, + self.element), + self.resName, + self.chainID, + self.resNumb, + self.x, + self.y, + self.z, + self.occ, + self.beta) + + + return str + + + + def make_mol2_line(self,id): +#1 S1 3.6147 2.0531 1.4795 S.3 1 noname -0.1785 + # making string + str = "%-4d %-4s %10.4f %10.4f %10.4f %6s %6d %10s %10.4f\n" % (id, + Source.lib.makeTidyAtomLabel(self.name, + self.element), + self.x, + self.y, + self.z, + self.sybyl_type.replace('-',''), + self.resNumb, + self.resName, + 0.0)#self.charge) + + + return str + + + + def makePDBLine(self, + numb = None, + name = None, + resName = None, + chainID = None, + resNumb = None, + x = None, + y = None, + z = None, + occ = None, + beta = None): + """ + returns a pdb ATOM-line for various purposes; + specifying arguments over-writes. + """ + if numb == None: numb = self.numb + if name == None: name = self.name + if resName == None: resName = self.resName + if chainID == None: chainID = self.chainID + if resNumb == None: resNumb = self.resNumb + if x == None: x = self.x + if y == None: y = self.y + if z == None: z = self.z + if occ == None: occ = self.occ + if beta == None: beta = self.beta + + # making string + str = "ATOM " + str += "%6d" % (numb) + str += " %s" % (Source.lib.makeTidyAtomLabel(name,self.element)) + str += " %s" % (resName) + str += "%2s" % (chainID) + str += "%4d" % (resNumb) + str += "%12.3lf" % (x) + str += "%8.3lf" % (y) + str += "%8.3lf" % (z) + str += "%6.2lf" % (occ) + str += "%6.2lf" % (beta) + str += '\n' + return str + + + def getTidyLabel(self): + """ + Returns a 'tidier' atom label for printing the new pdbfile + """ + return Source.lib.makeTidyAtomLabel(self.name,self.element) + + + def __str__(self): + return '%5d-%4s %5d-%3s (%1s) [%8.3f %8.3f %8.3f] %s' %(self.numb, self.name, self.resNumb, self.resName, self.chainID, self.x, self.y, self.z,self.element) + + +# def get_element(self): +# """ try to extract element if not already done""" +# if self.element == '': +# self.element = self.name.strip(string.digits)[0] +# return self.element + + + def set_residue(self, residue): + """ Makes a references to the parent residue""" + if self.residue == None: + self.residue = residue + + + + diff --git a/Source/bonds.py b/Source/bonds.py new file mode 100644 index 0000000..bd3e72f --- /dev/null +++ b/Source/bonds.py @@ -0,0 +1,485 @@ +import pickle,sys,os,math,Source.calculations + + +class bondmaker: + def __init__(self): + + # predefined bonding distances + self.distances = { + 'S-S':2.5, + 'F-F':1.7} + + self.distances_squared = {} + for k in self.distances.keys(): + self.distances_squared[k]=self.distances[k]*self.distances[k] + + self.H_dist = 1.5; + self.default_dist = 2.0; + + self.H_dist_squared = self.H_dist * self.H_dist + self.default_dist_squared = self.default_dist * self.default_dist + + self.max_sq_distance = max(list(self.distances_squared.values())+[self.default_dist_squared]) + + # protein bonding data + path = os.path.split(__file__)[0] + self.data_file_name = os.path.join(path,'protein_bonds.dat') + + data = open(self.data_file_name,'rb') + self.protein_bonds = pickle.load(data) + data.close() + + + self.intra_residue_backbone_bonds = {'N': ['CA'], + 'CA':['N','C'], + 'C': ['CA','O'], + 'O': ['C']} + + self.number_of_pi_electrons_in_bonds_in_backbone = {'C':1, + 'O':1} + + self.number_of_pi_electrons_in_conjugate_bonds_in_backbone = {'N':1} + + self.number_of_pi_electrons_in_bonds_in_sidechains = {'ARG-CZ' :1, + 'ARG-NH1':1, + 'ASN-OD1':1, + 'ASN-CG' :1, + 'ASP-OD1':1, + 'ASP-CG' :1, + 'GLU-OE1':1, + 'GLU-CD' :1, + 'GLN-OE1':1, + 'GLN-CD' :1, + 'HIS-CG' :1, + 'HIS-CD2':1, + 'HIS-ND1':1, + 'HIS-CE1':1, + 'PHE-CG' :1, + 'PHE-CD1':1, + 'PHE-CE1':1, + 'PHE-CZ' :1, + 'PHE-CE2':1, + 'PHE-CD2':1, + 'TRP-CG' :1, + 'TRP-CD1':1, + 'TRP-CE2':1, + 'TRP-CD2':1, + 'TRP-CE3':1, + 'TRP-CZ3':1, + 'TRP-CH2':1, + 'TRP-CZ2':1, + 'TYR-CG' :1, + 'TYR-CD1':1, + 'TYR-CE1':1, + 'TYR-CZ' :1, + 'TYR-CE2':1, + 'TYR-CD2':1} + + + self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains = {'ARG-NE' :1, + 'ARG-NH2':1, + 'ASN-ND2':1, + 'GLN-NE2':1, + 'HIS-NE2':1, + 'TRP-NE1':1} + + self.number_of_pi_electrons_in_bonds_ligands = {'C.ar':1, + 'N.pl3':0, + 'C.2':1, + 'O.2':1, + 'O.co2':1, + 'N.ar':1, + 'C.1':2, + 'N.1':2} + + self.number_of_pi_electrons_in_conjugate_bonds_in_ligands = {'N.am':1,'N.pl3':1} + + + self.backbone_atoms = list(self.intra_residue_backbone_bonds.keys()) + + self.terminal_oxygen_names = ['OXT','O\'\''] + + + return + + + def find_bonds_for_protein(self, protein): + """ Finds bonds proteins based on the way atoms + normally bond in proteins""" + + print('++++ Side chains ++++') + # side chains + for chain in protein.chains: + for residue in chain.residues: + if residue.resName.replace(' ','') not in ['N+','C-']: + self.find_bonds_for_side_chain(residue.atoms) + + print('++++ Backbones ++++') + # backbone + last_residues = [] + for chain in protein.chains: + for i in range(1,len(chain.residues)): + if chain.residues[i-1].resName.replace(' ','') not in ['N+','C-']: + if chain.residues[i].resName.replace(' ','') not in ['N+','C-']: + self.connect_backbone(chain.residues[i-1], chain.residues[i]) + last_residues.append(chain.residues[i]) + + print('++++ terminal oxygen ++++') + # terminal OXT + for last_residue in last_residues: + self.find_bonds_for_terminal_oxygen(last_residue) + + print('++++ cysteines ++++') + # Cysteines + for chain in protein.chains: + for i in range(0,len(chain.residues)): + if chain.residues[i].resName == 'CYS': + for j in range(0,len(chain.residues)): + if chain.residues[j].resName == 'CYS' and j != i: + self.check_for_cysteine_bonds(chain.residues[i], + chain.residues[j]) + return + + def check_for_cysteine_bonds(self, cys1, cys2): + for atom1 in cys1.atoms: + if atom1.name == 'SG': + for atom2 in cys2.atoms: + if atom2.name == 'SG': + if Source.calculations.squared_distance(atom1,atom2) < self.SS_dist_squared: + self.make_bond(atom1, atom2) + + + return + + def find_bonds_for_terminal_oxygen(self, residue): + for atom1 in residue.atoms: + if atom1.name in self.terminal_oxygen_names: + for atom2 in residue.atoms: + if atom2.name == 'C': + self.make_bond(atom1, atom2) + + return + + + def connect_backbone(self, residue1, residue2): + """ Sets up bonds in the backbone """ + # residue 1 + self.find_bonds_for_residue_backbone(residue1) + + # residue 2 + self.find_bonds_for_residue_backbone(residue2) + + # inter-residue bond + for atom1 in residue1.atoms: + if atom1.name == 'C': + for atom2 in residue2.atoms: + if atom2.name == 'N': + if Source.calculations.squared_distance(atom1,atom2) < self.default_dist_squared: + self.make_bond(atom1, atom2) + + return + + def find_bonds_for_residue_backbone(self, residue): + for atom1 in residue.atoms: + if atom1.name in list(self.number_of_pi_electrons_in_bonds_in_backbone.keys()): + atom1.number_of_pi_electrons_in_double_and_triple_bonds = self.number_of_pi_electrons_in_bonds_in_backbone[atom1.name] + if atom1.name in list(self.number_of_pi_electrons_in_conjugate_bonds_in_backbone.keys()) and len(atom1.bonded_atoms)>1: # last part to avoid including N-term + atom1.number_of_pi_electrons_in_conjugate_double_and_triple_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_backbone[atom1.name] + + if atom1.name in self.backbone_atoms: + for atom2 in residue.atoms: + if atom2.name in self.intra_residue_backbone_bonds[atom1.name]: + self.make_bond(atom1, atom2) + + return + + + def find_bonds_for_side_chain(self, atoms): + """ Finds bonds for a side chain """ + for atom1 in atoms: + + key = '%s-%s'%(atom1.resName,atom1.name) + if key in list(self.number_of_pi_electrons_in_bonds_in_sidechains.keys()): + atom1.number_of_pi_electrons_in_double_and_triple_bonds = self.number_of_pi_electrons_in_bonds_in_sidechains[key] + if key in list(self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains.keys()): + atom1.number_of_pi_electrons_in_conjugate_double_and_triple_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains[key] + + if not atom1.name in self.backbone_atoms: + if not atom1.name in self.terminal_oxygen_names: + for atom2 in atoms: + if atom2.name in self.protein_bonds[atom1.resName][atom1.name]: + self.make_bond(atom1,atom2) + + return + + + def find_bonds_for_ligand(self, ligand): + """ Finds bonds for all atoms in the molecule """ + # identify bonding atoms + self.find_bonds_for_atoms(ligand.atoms) + self.add_pi_electron_table_info(ligand.atoms) + + return + + def add_pi_electron_table_info(self, atoms): + # apply table information on pi-electrons + for atom in atoms: + # for ligands + if atom.type == 'hetatm': + if atom.sybyl_type in self.number_of_pi_electrons_in_bonds_ligands.keys(): + atom.number_of_pi_electrons_in_double_and_triple_bonds = self.number_of_pi_electrons_in_bonds_ligands[atom.sybyl_type] + if atom.sybyl_type in self.number_of_pi_electrons_in_conjugate_bonds_in_ligands.keys(): + atom.number_of_pi_electrons_in_conjugate_double_and_triple_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_ligands[atom.sybyl_type] + + # for protein + if atom.type == 'atom': + key = '%s-%s'%(atom.resName,atom.name) + if key in list(self.number_of_pi_electrons_in_bonds_in_sidechains.keys()): + atom.number_of_pi_electrons_in_double_and_triple_bonds = self.number_of_pi_electrons_in_bonds_in_sidechains[key] + if key in list(self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains.keys()): + atom.number_of_pi_electrons_in_conjugate_double_and_triple_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains[key] + + if atom.name in list(self.number_of_pi_electrons_in_bonds_in_backbone.keys()): + atom.number_of_pi_electrons_in_double_and_triple_bonds = self.number_of_pi_electrons_in_bonds_in_backbone[atom.name] + if atom.name in list(self.number_of_pi_electrons_in_conjugate_bonds_in_backbone.keys()) and len(atom.bonded_atoms)>1: # last part to avoid including N-term + atom.number_of_pi_electrons_in_conjugate_double_and_triple_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_backbone[atom.name] + + return + + + def find_bonds_for_protein_by_distance(self, molecule): + """ Finds bonds for all atoms in the molecule """ + #self.find_bonds_for_protein(molecule) + atoms = [] + for chain in molecule.chains: + for residue in chain.residues: + if residue.resName.replace(' ','') not in ['N+','C-']: + for atom in residue.atoms: + atoms.append(atom) + + self.find_bonds_for_atoms_using_boxes(atoms) ##### + #self.find_bonds_for_atoms(atoms) ##### + return atoms + + def find_bonds_for_atoms(self, atoms): + """ Finds all bonds for a list of atoms""" + no_atoms = len(atoms) + + for i in range(no_atoms): + for j in range(i+1,no_atoms): + + if atoms[i] in atoms[j].bonded_atoms: + continue + + if self.check_distance(atoms[i], atoms[j]): + self.make_bond(atoms[i],atoms[j]) + # di-sulphide bonds + if atoms[i].element == 'S' and atoms[j].element == 'S': + atoms[i].cysteine_bridge = True + atoms[j].cysteine_bridge = True + + + return + + + def check_distance(self, atom1, atom2): + sq_dist = Source.calculations.squared_distance(atom1, atom2) + + if sq_dist > self.max_sq_distance: + return False + + key = '%s-%s'%(atom1.element,atom2.element) + h_count = key.count('H') + + if sq_dist < self.H_dist_squared and h_count==1: + return True + if sq_dist < self.default_dist_squared and h_count==0: + return True + if key in self.distances_squared.keys(): + if sq_dist < self.distances_squared[key]: + return True + + + + return False + + def find_bonds_for_molecules_using_boxes(self, molecules): + """ Finds all bonds for a molecular container""" + for name in molecules.conformation_names: + self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].atoms) + #for chain in molecules.conformations[name].chains: + # self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].get_chain(chain)) + + return + + def add_pi_electron_information(self, molecules): + for name in molecules.conformation_names: + self.add_pi_electron_table_info(molecules.conformations[name].atoms) + return + + + def find_bonds_for_atoms_using_boxes(self, atoms): + """ Finds all bonds for a list of atoms""" + + box_size = 2.5 + + # find min and max coordinates + xmin = 1e6; xmax = -1e6 + ymin = 1e6; ymax = -1e6 + zmin = 1e6; zmax = -1e6 + for atom in atoms: + if atom.x > xmax: + xmax = atom.x + if atom.y > ymax: + ymax = atom.y + if atom.z > zmax: + zmax = atom.z + if atom.x < xmin: + xmin = atom.x + if atom.y < ymin: + ymin = atom.y + if atom.z < zmin: + zmin = atom.z + + xlen = xmax-xmin + ylen = ymax-ymin + zlen = zmax-zmin + + #print('x range: [%6.2f;%6.2f] %6.2f'%(xmin,xmax,xlen)) + #print('y range: [%6.2f;%6.2f] %6.2f'%(ymin,ymax,ylen)) + #print('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen)) + + # how many boxes do we need in each dimension? + self.no_box_x = max(1, math.ceil(xlen/box_size)) + self.no_box_y = max(1, math.ceil(ylen/box_size)) + self.no_box_z = max(1, math.ceil(zlen/box_size)) + + #print('No. box x: %6.2f'%self.no_box_x) + #print('No. box y: %6.2f'%self.no_box_y) + #print('No. box z: %6.2f'%self.no_box_z) + + # initialize boxes + self.boxes = {} + for x in range(self.no_box_x): + for y in range(self.no_box_y): + for z in range(self.no_box_z): + self.boxes[self.box_key(x,y,z)] = [] + + # put atoms into boxes + for atom in atoms: + x = math.floor((atom.x-xmin)/box_size) + y = math.floor((atom.y-ymin)/box_size) + z = math.floor((atom.z-zmin)/box_size) + self.put_atom_in_box(x,y,z,atom) + + # assign bonds + keys = self.boxes.keys() + for key in keys: + self.find_bonds_for_atoms(self.boxes[key]) + + + + return + + def put_atom_in_box(self,x,y,z,atom): + # atom in the x,y,z box and the up to 7 neighboring boxes on + # one side of the x,y,z box in each dimension + for bx in [x,x+1]: + for by in [y,y+1]: + for bz in [z,z+1]: + key = self.box_key(bx,by,bz) + if key in self.boxes.keys(): + self.boxes[key].append(atom) + + #print(atom,'->',key,':',len(self.boxes[key])) + + return + + def box_key(self, x, y, z): + return '%d-%d-%d'%(x,y,z) + + + def has_bond(self, atom1, atom2): + if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms: + return True + return False + + + def make_bond(self, atom1, atom2): + """ Makes a bond between atom1 and atom2 """ + if atom1 == atom2: + return + + #print('making bond for',atom1,atom2) + if not atom1 in atom2.bonded_atoms: + atom2.bonded_atoms.append(atom1) + + if not atom2 in atom1.bonded_atoms: + atom1.bonded_atoms.append(atom2) + + return + + + def generate_protein_bond_dictionary(self, atoms): + + for atom in atoms: + for bonded_atom in atom.bonded_atoms: + resi_i = atom.resName + name_i = atom.name + resi_j = bonded_atom.resName + name_j = bonded_atom.name + + if not name_i in self.backbone_atoms or\ + not name_j in self.backbone_atoms: + if not name_i in self.terminal_oxygen_names and\ + not name_j in self.terminal_oxygen_names: + + if not resi_i in list(self.protein_bonds.keys()): + self.protein_bonds[resi_i] = {} + if not name_i in self.protein_bonds[resi_i]: + self.protein_bonds[resi_i][name_i] = [] + + if not name_j in self.protein_bonds[resi_i][name_i]: + self.protein_bonds[resi_i][name_i].append(name_j) + + + if not resi_j in list(self.protein_bonds.keys()): + self.protein_bonds[resi_j] = {} + if not name_j in self.protein_bonds[resi_j]: + self.protein_bonds[resi_j][name_j] = [] + + if not name_i in self.protein_bonds[resi_j][name_j]: + self.protein_bonds[resi_j][name_j].append(name_i) + + + return + + +if __name__ == '__main__': + # If called directly, set up protein bond dictionary + import protein, pdb, sys,os + arguments = sys.argv + if len(arguments) != 2: + print('Usage: bonds.py ') + sys.exit(0) + + filename = arguments[1] + if not os.path.isfile(filename): + print('Error: Could not find \"%s\"'%filename) + sys.exit(1) + + pdblist = pdb.readPDB(filename) + my_protein = protein.Protein(pdblist,'test.pdb') + + for chain in my_protein.chains: + for residue in chain.residues: + residue.atoms = [atom for atom in residue.atoms if atom.element != 'H'] + + b = bondmaker() + #b.protein_bonds = {} + atoms = b.find_bonds_for_protein_by_distance(my_protein) + # b.generate_protein_bond_dictionary(atoms) + + #file = open(b.data_file_name,'wb') + #pickle.dump(b.protein_bonds, file) + #file.close() diff --git a/Source/calculations.py b/Source/calculations.py new file mode 100644 index 0000000..ed9f6aa --- /dev/null +++ b/Source/calculations.py @@ -0,0 +1,891 @@ + + +import math, Source.protonate, Source.bonds,copy, sys + + +# +# methods for setting up atom naming, bonding, and protonation +# + +def setup_bonding_and_protonation(parameters, molecular_container): + # make bonds + my_bond_maker = setup_bonding(parameters, molecular_container) + + # set up ligand atom names + set_ligand_atom_names(molecular_container) + + # apply information on pi electrons + my_bond_maker.add_pi_electron_information(molecular_container) + + # Protonate atoms + if molecular_container.options.protonate_all: + my_protonator = Source.protonate.Protonate(verbose=False) + my_protonator.protonate(molecular_container) + + + return + + + +def setup_bonding(parameters, molecular_container): + # make bonds + my_bond_maker = Source.bonds.bondmaker() + my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) + + return my_bond_maker + + +def set_ligand_atom_names(molecular_container): + for name in molecular_container.conformation_names: + molecular_container.conformations[name].set_ligand_atom_names() + return + + + +# +# The following methods imitates propka3.0 protonation! +# +def setup_bonding_and_protonation_30_style(parameters, molecular_container): + # Protonate atoms + protonate_30_style(molecular_container) + + # make bonds + my_bond_maker = Source.bonds.bondmaker() + my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) + + return + + +def protonate_30_style(molecular_container): + for name in molecular_container.conformation_names: + print('Now protonating',name) + # split atom into residues + curres = -1000000 + residue = [] + O=None + C=None + for atom in molecular_container.conformations[name].atoms: + if atom.resNumb != curres: + curres = atom.resNumb + if len(residue)>0: + #backbone + [O, C]= addBackBoneHydrogen(residue,O,C) + #arginine + if residue[0].resName == 'ARG': + addArgHydrogen(residue) + #histidine + if residue[0].resName == 'HIS': + addHisHydrogen(residue) + #tryptophan + if residue[0].resName == 'TRP': + addTrpHydrogen(residue) + #amides + if residue[0].resName in ['GLN','ASN']: + addAmdHydrogen(residue) + + + residue = [] + if atom.type=='atom': + residue.append(atom) + + return + +def addArgHydrogen(residue): + """ + Adds Arg hydrogen atoms to residues according to the 'old way'. + """ + #print('Adding arg H',residue) + for atom in residue: + if atom.name == "CD": + CD = atom + elif atom.name == "CZ": + CZ = atom + elif atom.name == "NE": + NE = atom + elif atom.name == "NH1": + NH1 = atom + elif atom.name == "NH2": + NH2 = atom + + H1 = protonateSP2([CD, NE, CZ]) + H1.name = "HE" + H2 = protonateDirection([NH1, NE, CZ]) + H2.name = "HN1" + H3 = protonateDirection([NH1, NE, CD]) + H3.name = "HN2" + H4 = protonateDirection([NH2, NE, CZ]) + H4.name = "HN3" + H5 = protonateDirection([NH2, NE, H1]) + H5.name = "HN4" + + return [H1,H2,H3,H4,H5] + +def addHisHydrogen(residue): + """ + Adds His hydrogen atoms to residues according to the 'old way'. + """ + for atom in residue: + if atom.name == "CG": + CG = atom + elif atom.name == "ND1": + ND = atom + elif atom.name == "CD2": + CD = atom + elif atom.name == "CE1": + CE = atom + elif atom.name == "NE2": + NE = atom + HD = protonateSP2([CG, ND, CE]) + HD.name = "HND" + HE = protonateSP2([CD, NE, CE]) + HE.name = "HNE" + return + +def addTrpHydrogen(residue): + """ + Adds Trp hydrogen atoms to residues according to the 'old way'. + """ + CD = None + NE = None + DE = None + for atom in residue: + if atom.name == "CD1": + CD = atom + elif atom.name == "NE1": + NE = atom + elif atom.name == "CE2": + CE = atom + if CD == None or NE == None or CE == None: + str = "Did not find all atoms in %s%4d - in %s" % (self.resName, self.resNumb, "addTrpHydrogen()") + print(str) + sys.exit(0) + + HE = protonateSP2([CD, NE, CE]) + HE.name = "HNE" + + return + +def addAmdHydrogen(residue): + """ + Adds Gln & Asn hydrogen atoms to residues according to the 'old way'. + """ + C = None + O = None + N = None + for atom in residue: + if (atom.resName == "GLN" and atom.name == "CD") or (atom.resName == "ASN" and atom.name == "CG"): + C = atom + elif (atom.resName == "GLN" and atom.name == "OE1") or (atom.resName == "ASN" and atom.name == "OD1"): + O = atom + elif (atom.resName == "GLN" and atom.name == "NE2") or (atom.resName == "ASN" and atom.name == "ND2"): + N = atom + + if C == None or O == None or N == None: + str = "Did not find N, C and/or O in %s%4d - in %s" % (atom.resName, atom.resNumb, "addAmdHydrogen()") + print(str) + sys.exit(0) + + H1 = protonateDirection([N, O, C]) + H1.name = "HN1" + H2 = protonateAverageDirection([N, C, O]) + H2.name = "HN2" + + return + +def addBackBoneHydrogen(residue, O, C): + """ + Adds hydrogen backbone atoms to residues according to the old way; dR is wrong for the N-terminus + (i.e. first residue) but it doesn't affect anything at the moment. Could be improved, but works + for now. + """ + + new_C = None + new_O = None + N = None + + + for atom in residue: + if atom.name == "N": + N = atom + if atom.name == "C": + new_C = atom + if atom.name == "O": + new_O = atom + + + + + if None in [C,O,N]: + return [new_O,new_C] + + + if N.resName == "PRO": + """ PRO doesn't have an H-atom; do nothing """ + else: + H = protonateDirection([N, O, C]) + H.name = "H" + + return [new_O,new_C] + + + + + +def protonateDirection(list): + """ + Protonates an atom, X1, given a direction (X2 -> X3) [X1, X2, X3] + """ + X1 = list[0] + X2 = list[1] + X3 = list[2] + + dX = (X3.x - X2.x) + dY = (X3.y - X2.y) + dZ = (X3.z - X2.z) + length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) + x = X1.x + dX/length + y = X1.y + dY/length + z = X1.z + dZ/length + + + H = make_new_H(X1,x,y,z) + H.name = "H" + + + return H + + +def protonateAverageDirection(list): + """ + Protonates an atom, X1, given a direction (X1/X2 -> X3) [X1, X2, X3] + Note, this one uses the average of X1 & X2 (N & O) unlike the previous + N - C = O + """ + X1 = list[0] + X2 = list[1] + X3 = list[2] + + dX = (X3.x + X1.x)*0.5 - X2.x + dY = (X3.y + X1.y)*0.5 - X2.y + dZ = (X3.z + X1.z)*0.5 - X2.z + + length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) + x = X1.x + dX/length + y = X1.y + dY/length + z = X1.z + dZ/length + + H = make_new_H(X1,x,y,z) + H.name = "H" + + + + return H + + +def protonateSP2(list): + """ + Protonates a SP2 atom, H2, given a list of [X1, X2, X3] + X1-X2-X3 + """ + X1 = list[0] + X2 = list[1] + X3 = list[2] + + dX = (X1.x + X3.x)*0.5 - X2.x + dY = (X1.y + X3.y)*0.5 - X2.y + dZ = (X1.z + X3.z)*0.5 - X2.z + length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) + x = X2.x - dX/length + y = X2.y - dY/length + z = X2.z - dZ/length + + H = make_new_H(X2,x,y,z) + H.name = "H" + + return H + + +def make_new_H(atom, x,y,z): + + new_H = Source.atom.Atom() + new_H.setProperty(numb = None, + name = 'H%s'%atom.name[1:], + resName = atom.resName, + chainID = atom.chainID, + resNumb = atom.resNumb, + x = x, + y = y, + z = z, + occ = None, + beta = None) + new_H.element = 'H' + + + new_H.bonded_atoms = [atom] + new_H.charge = 0 + new_H.steric_number = 0 + new_H.number_of_lone_pairs = 0 + new_H.number_of_protons_to_add = 0 + new_H.number_of_pi_electrons_in_double_and_triple_bonds = 0 + + atom.bonded_atoms.append(new_H) + atom.conformation_container.add_atom(new_H) + + return new_H + +######## 3.0 style protonation methods end + + + + + + + +# +# Desolvation methods +# + + +def radial_volume_desolvation(parameters, group): + all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() + volume = 0.0 + group.Nmass = 0 + min_distance_4th = 57.1914 # pow(2.75, 4) + + for atom in all_atoms: + # ignore atoms in the same residue + if atom.resNumb == group.atom.resNumb and atom.chainID == group.atom.chainID: + continue + + sq_dist = squared_distance(group, atom) + + # desolvation + if sq_dist < parameters.desolv_cutoff_squared: + # use a default relative volume of 1.0 if the volume of the element is not found in parameters + dv = 1.0 + if atom.element in parameters.VanDerWaalsVolume.keys(): + dv = parameters.VanDerWaalsVolume[atom.element] + # insert check for methyl groups + if atom.element == 'C' and atom.name not in ['CA','C']: + dv = parameters.VanDerWaalsVolume['C4'] + + dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist) +# dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared) + volume += dv_inc + # buried + if sq_dist < parameters.buried_cutoff_squared: + group.Nmass += 1 + + group.buried = calculate_weight(parameters, group.Nmass) + scale_factor = calculate_scale_factor(parameters, group.buried) + volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) + + group.Emass = group.charge * parameters.desolvationPrefactor * volume_after_allowance * scale_factor + # Emass, Nmass + # Elocl, Nlocl -> reorganisation energy (count backbone hydorgen bond acceptors, C=O) + + + + #print('%s %5.2f %5.2f %4d'%(group, group.buried, group.Emass, group.Nmass)) + return + + + +def contactDesolvation(parameters, group): + """ + calculates the desolvation according to the Contact Model, the old default + """ + + local_radius = {'ASP': 4.5, + 'GLU': 4.5, + 'HIS': 4.5, + 'CYS': 3.5, + 'TYR': 3.5, + 'LYS': 4.5, + 'ARG': 5.0, + 'C-': 4.5, + 'N+': 4.5} + + all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() + if residue.resName in version.desolvationRadii: + local_cutoff = version.desolvationRadii[residue.resName] + else: + local_cutoff = 0.00 + residue.Nmass = 0 + residue.Nlocl = 0 + + for atom in all_atoms: + if atom.resNumb != group.atom.resNumb or atom.chainID != group.atom.chainID: + dX = atom.x - residue.x + dY = atom.y - residue.y + dZ = atom.z - residue.z + distance = math.sqrt(dX*dX + dY*dY + dZ*dZ) + if distance < local_cutoff: + group.Nlocl += 1 + if distance < parameters.buried_cutoff: + group.Nmass += 1 + if residue.Nmass > 400: + group.location = "BURIED " + else: + group.location = "SURFACE" + group.Emass = group.charge * parameters.desolvationPrefactor * max(0.00, group.Nmass-parameters.desolvationAllowance) + group.Elocl = group.charge * parameters.desolvationLocal * group.Nlocl + # Buried ratio - new feature in propka3.0 + # Note, there will be an unforseen problem: e.g. if one residue has Nmass > Nmax and + # the other Nmass < Nmax, the Npair will not be Nmass1 + Nmass2! + residue.buried = calculateWeight(residue.Nmass) + + return 0.00, 0.00, 0.00, 0.00 + + + + + + + +def calculate_scale_factor(parameters, weight): + scale_factor = 1.0 - (1.0 - parameters.desolvationSurfaceScalingFactor)*(1.0 - weight) + return scale_factor + + +def calculate_weight(parameters, Nmass): + """ + calculating the atom-based desolvation weight + """ + weight = float(Nmass - parameters.Nmin)/float(parameters.Nmax - parameters.Nmin) + weight = min(1.0, weight) + weight = max(0.0, weight) + + return weight + + + +def squared_distance(atom1, atom2): +# if atom1 in atom2.squared_distances: +# return atom2.squared_distances[atom1] + + dx = atom2.x - atom1.x + dy = atom2.y - atom1.y + dz = atom2.z - atom1.z + + res = dx*dx+dy*dy+dz*dz + + return res + + +def distance(atom1, atom2): + return math.sqrt(squared_distance(atom1,atom2)) + + + +def get_smallest_distance(atoms1, atoms2): + res_distance =1e6 + res_atom1 = None + res_atom2 = None + + for atom1 in atoms1: + for atom2 in atoms2: + dist = squared_distance(atom1, atom2) + if dist < res_distance: + res_distance = dist + res_atom1 = atom1 + res_atom2 = atom2 + + return [res_atom1, math.sqrt(res_distance), res_atom2] + + +def calculatePairWeight(parameters, Nmass1, Nmass2): + """ + calculating the atom-pair based desolvation weight + """ + Nmass = Nmass1 + Nmass2 + Nmin = 2*parameters.Nmin + Nmax = 2*parameters.Nmax + weight = float(Nmass - Nmin)/float(Nmax - Nmin) + weight = min(1.0, weight) + weight = max(0.0, weight) + + return weight + + +def HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle=1.0): + """ + returns a hydrogen-bond interaction pKa shift + """ + if distance < cutoff[0]: + value = 1.00 + elif distance > cutoff[1]: + value = 0.00 + else: + value = 1.0-(distance-cutoff[0])/(cutoff[1]-cutoff[0]) + + dpKa = dpka_max*value*f_angle + + return abs(dpKa) + + + +def AngleFactorX(atom1=None, atom2=None, atom3=None, center=None): + """ + Calculates the distance and angle-factor from three atoms for back-bone interactions, + IMPORTANT: you need to use atom1 to be the e.g. ASP atom if distance is reset at return: [O1 -- H2-N3] + Also generalized to be able to be used for residue 'centers' for C=O COO interactions. + """ + + dX_32 = atom2.x - atom3.x + dY_32 = atom2.y - atom3.y + dZ_32 = atom2.z - atom3.z + + distance_23 = math.sqrt( dX_32*dX_32 + dY_32*dY_32 + dZ_32*dZ_32 ) + + dX_32 = dX_32/distance_23 + dY_32 = dY_32/distance_23 + dZ_32 = dZ_32/distance_23 + + if atom1 == None: + dX_21 = center[0] - atom2.x + dY_21 = center[1] - atom2.y + dZ_21 = center[2] - atom2.z + else: + dX_21 = atom1.x - atom2.x + dY_21 = atom1.y - atom2.y + dZ_21 = atom1.z - atom2.z + + distance_12 = math.sqrt( dX_21*dX_21 + dY_21*dY_21 + dZ_21*dZ_21 ) + + dX_21 = dX_21/distance_12 + dY_21 = dY_21/distance_12 + dZ_21 = dZ_21/distance_12 + + f_angle = dX_21*dX_32 + dY_21*dY_32 + dZ_21*dZ_32 + + + return distance_12, f_angle, distance_23 + + + +def hydrogen_bond_interaction(group1, group2, version): + + # find the smallest distance between interacting atoms + atoms1 = group1.get_interaction_atoms(group2) + atoms2 = group2.get_interaction_atoms(group1) + [closest_atom1, distance, closest_atom2] = Source.calculations.get_smallest_distance(atoms1, atoms2) + + if None in [closest_atom1, closest_atom2]: + print('Warning: Side chain interaction failed for %s and %s'%(group1.label, group2.label)) + return None + + # get the parameters + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,closest_atom2) + + if dpka_max==None or None in cutoff: + return None + + # check that the closest atoms are close enough + if distance >= cutoff[1]: + return None + + # check that bond distance criteria is met + bond_distance_too_short = group1.atom.is_atom_within_bond_distance(group2.atom, + version.parameters.min_bond_distance_for_hydrogen_bonds,1) + if bond_distance_too_short: + return None + + # set the angle factor + # + # ---closest_atom1/2 + # . + # . + # the_hydrogen---closest_atom2/1--- + f_angle = 1.0 + if group2.type in version.parameters.angular_dependent_sidechain_interactions: + if closest_atom2.element == 'H': + heavy_atom = closest_atom2.bonded_atoms[0] + hydrogen = closest_atom2 + distance, f_angle, nada = Source.calculations.AngleFactorX(closest_atom1, hydrogen, heavy_atom) + else: + # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to + # the titratable atom than the hydrogen. In either case we set the angle factor + # to 0 + f_angle = 0.0 + + elif group1.type in version.parameters.angular_dependent_sidechain_interactions: + if closest_atom1.element == 'H': + heavy_atom = closest_atom1.bonded_atoms[0] + hydrogen = closest_atom1 + distance, f_angle, nada = Source.calculations.AngleFactorX(closest_atom2, hydrogen, heavy_atom) + else: + # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to + # the titratable atom than the hydrogen. In either case we set the angle factor + # to 0 + f_angle = 0.0 + + weight = version.calculatePairWeight(group1.Nmass, group2.Nmass) + + exception, value = version.checkExceptions(group1, group2) + #exception = False # circumventing exception + if exception == True: + """ do nothing, value should have been assigned """ + #print(" exception for %s %s %6.2lf" % (group1.label, group2.label, value)) + else: + value = version.calculateSideChainEnergy(distance, dpka_max, cutoff, weight, f_angle) + + # print('distance',distance) + # print('dpka_max',dpka_max) + # print('cutoff',cutoff) + # print('f_angle',f_angle) + # print('weight',weight) + # print('value',value) + # print('===============================================') + + return value + + + +def HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle=1.0): + """ + returns a hydrogen-bond interaction pKa shift + """ + if distance < cutoff[0]: + value = 1.00 + elif distance > cutoff[1]: + value = 0.00 + else: + value = 1.0-(distance-cutoff[0])/(cutoff[1]-cutoff[0]) + + dpKa = dpka_max*value*f_angle + + return abs(dpKa) + + + + +def electrostatic_interaction(group1, group2, distance, version): + + # check if we should do coulomb interaction at all + if not version.checkCoulombPair(group1, group2, distance): + return None + + weight = version.calculatePairWeight(group1.Nmass, group2.Nmass) + value = version.calculateCoulombEnergy(distance, weight) + + return value + + +def checkCoulombPair(parameters, group1, group2, distance): + """ + Checks if this Coulomb interaction should be done - a propka2.0 hack + """ + Npair = group1.Nmass + group2.Nmass + do_coulomb = True + + # check if both groups are titratable (ions are taken care of in determinants::setIonDeterminants) + if not (group1.titratable and group2.titratable): + do_coulomb = False + + # check if the distance is not too big + if distance > parameters.coulomb_cutoff2: + do_coulomb = False + + # check that desolvation is ok + if Npair < parameters.Nmin: + do_coulomb = False + + return do_coulomb + + +def CoulombEnergy(distance, weight, parameters): + """ + calculates the Coulomb interaction pKa shift based on Coulombs law + eps = 60.0 for the moment; to be scaled with 'weight' + """ + #diel = 80.0 - 60.0*weight + + diel = 160 - (160 -30)*weight + R = max(distance, parameters.coulomb_cutoff1) + scale = (R - parameters.coulomb_cutoff2)/(parameters.coulomb_cutoff1 -parameters.coulomb_cutoff2) + scale = max(0.0, scale) + scale = min(1.0, scale) + + dpka = 244.12/(diel*R) *scale + + return abs(dpka) + + + +def BackBoneReorganization(parameters, conformation): + """ + adding test stuff + """ + titratable_groups = conformation.get_backbone_reorganisation_groups() + BBC_groups = conformation.get_backbone_CO_groups() + + for titratable_group in titratable_groups: + weight = titratable_group.buried + dpKa = 0.00 + for BBC_group in BBC_groups: + center = [titratable_group.x, titratable_group.y, titratable_group.z] + distance, f_angle, nada = AngleFactorX(atom2=BBC_group.get_interaction_atoms(titratable_group)[0], + atom3=BBC_group.atom, + center=center) + if distance < 6.0 and f_angle > 0.001: + value = 1.0-(distance-3.0)/(6.0-3.0) + dpKa += 0.80*min(1.0, value) + + titratable_group.Elocl = dpKa*weight + return + + +# +# Exception methods +# + +def checkExceptions(version, group1, group2): + """ + checks for exceptions for this version - using defaults + """ + resType1 = group1.type + resType2 = group2.type + + if (resType1 == "COO" and resType2 == "ARG"): + exception, value = checkCooArgException(group1, group2, version) + elif (resType1 == "ARG" and resType2 == "COO"): + exception, value = checkCooArgException(group2, group1, version) + elif (resType1 == "COO" and resType2 == "COO"): + exception, value = checkCooCooException(group1, group2, version) + elif (resType1 == "CYS" and resType2 == "CYS"): + exception, value = checkCysCysException(group1, group2, version) + elif (resType1 == "COO" and resType2 == "HIS") or \ + (resType1 == "HIS" and resType2 == "COO"): + exception, value = checkCooHisException(group1, group2, version) + elif (resType1 == "OCO" and resType2 == "HIS") or \ + (resType1 == "HIS" and resType2 == "OCO"): + exception, value = checkOcoHisException(group1, group2, version) + elif (resType1 == "CYS" and resType2 == "HIS") or \ + (resType1 == "HIS" and resType2 == "CYS"): + exception, value = checkCysHisException(group1, group2, version) + else: + # do nothing, no exception for this pair + exception = False; value = None + + return exception, value + + + +def checkCooArgException(group_coo, group_arg, version): + """ + checking Coo-Arg exception: uses the two shortes unique distances (involving 2+2 atoms) + """ + + str = "xxx" + exception = True + value_tot = 0.00 + + #dpka_max = parameters.sidechain_interaction.get_value(group_coo.type,group_arg.type) + #cutoff = parameters.sidechain_cutoffs.get_value(group_coo.type,group_arg.type) + + # needs to be this way since you want to find shortest distance first + #print("--- exception for %s %s ---" % (group_coo.label, group_arg.label)) + atoms_coo = [] + atoms_coo.extend(group_coo.get_interaction_atoms(group_arg)) + atoms_arg = [] + atoms_arg.extend(group_arg.get_interaction_atoms(group_coo)) + + + for iter in ["shortest", "runner-up"]: + # find the closest interaction pair + [closest_coo_atom, distance, closest_arg_atom] = get_smallest_distance(atoms_coo, atoms_arg) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom,closest_arg_atom) + # calculate and sum up interaction energy + f_angle = 1.00 + if group_arg.type in version.parameters.angular_dependent_sidechain_interactions: + atom3 = closest_arg_atom.bonded_atoms[0] + distance, f_angle, nada = AngleFactorX(closest_coo_atom, closest_arg_atom, atom3) + + value = HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle) + #print(iter, closest_coo_atom, closest_arg_atom,distance,value) + value_tot += value + # remove closest atoms before we attemp to find the runner-up pair + atoms_coo.remove(closest_coo_atom) + atoms_arg.remove(closest_arg_atom) + + + return exception, value_tot + + +def checkCooCooException(group1, group2, version): + """ + checking Coo-Coo hydrogen-bond exception + """ + exception = True + [closest_atom1, distance, closest_atom2] = get_smallest_distance(group1.get_interaction_atoms(group2), + group2.get_interaction_atoms(group1)) + + #dpka_max = parameters.sidechain_interaction.get_value(group1.type,group2.type) + #cutoff = parameters.sidechain_cutoffs.get_value(group1.type,group2.type) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,closest_atom2) + f_angle = 1.00 + value = HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle) + weight = calculatePairWeight(version.parameters, group1.Nmass, group2.Nmass) + value = value * (1.0 + weight) + + return exception, value + + + +def checkCooHisException(group1, group2, version): + """ + checking Coo-His exception + """ + exception = False + if checkBuried(group1.Nmass, group2.Nmass): + exception = True + + return exception, version.parameters.COO_HIS_exception + + +def checkOcoHisException(group1, group2, version): + """ + checking Coo-His exception + """ + exception = False + if checkBuried(group1.Nmass, group2.Nmass): + exception = True + + return exception, version.parameters.OCO_HIS_exception + + +def checkCysHisException(group1, group2, version): + """ + checking Cys-His exception + """ + exception = False + if checkBuried(group1.Nmass, group2.Nmass): + exception = True + + return exception, version.parameters.CYS_HIS_exception + + +def checkCysCysException(group1, group2, version): + """ + checking Cys-Cys exception + """ + exception = False + if checkBuried(group1.Nmass, group2.Nmass): + exception = True + + return exception, version.parameters.CYS_CYS_exception + + + + + +def checkBuried(Nmass1, Nmass2): + """ + returns True if an interaction is buried + """ + + if (Nmass1 + Nmass2 <= 900) and (Nmass1 <= 400 or Nmass2 <= 400): + return False + else: + return True diff --git a/Source/conformation_container.py b/Source/conformation_container.py new file mode 100644 index 0000000..c327390 --- /dev/null +++ b/Source/conformation_container.py @@ -0,0 +1,439 @@ +# +# Container for molecular conformations +# + +import Source.group, Source.determinants, Source.determinant, Source.ligand, Source.output, Source.coupled_groups, functools + +class Conformation_container: + def __init__(self, name='', parameters=None, molecular_container=None): + self.molecular_container = molecular_container + self.name=name + self.parameters=parameters + self.atoms = [] + self.groups = [] + self.chains = [] + self.current_iter_item = 0 + + self.marvin_pkas_calculated = False + self.non_covalently_coupled_groups = False + + return + + + # + # Group related methods + # + def extract_groups(self): + """ Generates at list of molecular groups needed for calculating pKa values """ + for atom in self.get_non_hydrogen_atoms(): + # has this atom been checked for groups? + if atom.groups_extracted == 0: + self.validate_group(Source.group.is_group(self.parameters, atom)) + # if the atom has been checked in a another conformation, check if it has a + # group that should be used in this conformation as well + elif atom.group: + self.validate_group(atom.group) + + return + + def additional_setup_when_reading_input_file(self): + + # if a group is coupled and we are reading a .propka_input file, + # some more configuration might be needed + + # print coupling map + map = Source.output.make_interaction_map('Covalent coupling map for %s'%self, + self.get_covalently_coupled_groups(), + lambda g1,g2: g1 in g2.covalently_coupled_groups) + print(map) + + # check if we should set a common charge centre as well + if self.parameters.common_charge_centre: + self.set_common_charge_centres() + + return + + def set_common_charge_centres(self): + for system in self.get_coupled_systems(self.get_covalently_coupled_groups(), Source.group.Group.get_covalently_coupled_groups): + # make a list of the charge centre coordinates + all_coordinates = list(map(lambda g: [g.x, g.y, g.z], system)) + # find the common charge center + ccc = functools.reduce(lambda g1,g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]], all_coordinates) + ccc = list(map(lambda c: c/len(system), ccc)) + # set the ccc for all coupled groups in this system + for g in system: + [g.x, g.y, g.z] = ccc + g.common_charge_centre = True + return + + + + + def find_covalently_coupled_groups(self): + """ Finds covalently coupled groups and sets common charge centres if needed """ + for group in [ group for group in self.groups if group.titratable]: + + # Find covalently bonded groups + bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, group.atom) + + # couple groups + for cg in bonded_groups: + if cg in group.covalently_coupled_groups: + continue + if cg.atom.sybyl_type == group.atom.sybyl_type: + group.couple_covalently(cg) + + # check if we should set a common charge centre as well + if self.parameters.common_charge_centre: + self.set_common_charge_centres() + + # print coupling map + map = Source.output.make_interaction_map('Covalent coupling map for %s'%self, + #self.get_titratable_groups(), + self.get_covalently_coupled_groups(), + lambda g1,g2: g1 in g2.covalently_coupled_groups) + print(map) + + + return + + + def find_non_covalently_coupled_groups(self, verbose=False): + # check if non-covalent coupling has already been set up in an input file + if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: + self.non_covalently_coupled_groups = True + + Source.coupled_groups.nccg.identify_non_covalently_coupled_groups(self,verbose=verbose) + + # re-do the check + if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: + self.non_covalently_coupled_groups = True + + return + + + def find_bonded_titratable_groups(self, atom, no_bonds, original_atom): + res = set() + for ba in atom.bonded_atoms: + # skip the original atom + if ba == original_atom: + continue + # check if this atom has a titratable group + if ba.group and ba.group.titratable and no_bonds <= self.parameters.coupling_max_number_of_bonds: + res.add(ba.group) + # check for titratable groups bonded to this atom + if no_bonds < self.parameters.coupling_max_number_of_bonds: + res |= self.find_bonded_titratable_groups(ba,no_bonds+1, original_atom) + + return res + + + def validate_group(self, group): + """ Checks if we want to include this group in the calculations """ + # Is it recognized as a group at all? + if not group: + return + + # Other checks (include ligands, which chains etc.) + + # if all ok, accept the group + self.accept_group(group) + return + + + def accept_group(self, group): + # set up the group + group.parameters=self.parameters + group.setup() + + # and store it + self.groups.append(group) + return + + + # + # pka calculation methods + # + + def calculate_pka(self, version, options): + print('\nCalculating pKas for',self) + + # calculate desolvation + for group in self.get_titratable_groups()+self.get_ions(): + version.calculate_desolvation(group) + + # calculate backbone interactions + Source.determinants.setBackBoneDeterminants(self.get_titratable_groups(), self.get_backbone_groups(), version) + + # setting ion determinants + Source.determinants.setIonDeterminants(self, version) + + # calculating the back-bone reorganization/desolvation term + version.calculateBackBoneReorganization(self) + + # setting remaining non-iterative and iterative side-chain & Coulomb interaction determinants + Source.determinants.setDeterminants(self.get_sidechain_groups(), version=version, options=options) + + # calculating the total pKa values + for group in self.groups: group.calculate_total_pka() + + # take coupling effects into account + penalised_labels = self.coupling_effects() + + if self.parameters.remove_penalised_group and len(penalised_labels)>0: + print('Removing penalised groups!!!') + + for g in self.get_titratable_groups(): + g.remove_determinants(penalised_labels) + + # re-calculating the total pKa values + for group in self.groups: group.calculate_total_pka() + + + return + + + def coupling_effects(self): + # + # Bases: The group with the highest pKa (the most stable one in the + # charged form) will be the first one to adopt a proton as pH + # is lowered and this group is allowed to titrate. + # The remaining groups are penalised + # + # Acids: The group with the highest pKa (the least stable one in the + # charged form) will be the last group to loose the proton as + # pH is raised and will be penalised. + # The remaining groups are allowed to titrate. + # + penalised_labels = [] + + for all_groups in self.get_coupled_systems(self.get_covalently_coupled_groups(), + Source.group.Group.get_covalently_coupled_groups): + + # check if we should share determinants + if self.parameters.shared_determinants: + self.share_determinants(all_groups) + + # find the group that has the highest pKa value + first_group = max(all_groups, key=lambda g:g.pka_value) + + # In case of acids + if first_group.charge < 0: + first_group.coupled_titrating_group = min(all_groups, key=lambda g:g.pka_value) + penalised_labels.append(first_group.label) # group with the highest pKa is penalised + + # In case of bases + else: + for a in all_groups: + if a == first_group: + continue # group with the highest pKa is allowed to titrate... + a.coupled_titrating_group = first_group + penalised_labels.append(a.label) #... and the rest is penalised + + return penalised_labels + + + def share_determinants(self, groups): + + # make a list of the determinants to share + types = ['sidechain','backbone','coulomb'] + for type in types: + # find maximum value for each determinant + max_dets = {} + for g in groups: + for d in g.determinants[type]: + # update max dets + if d.group not in max_dets.keys(): + max_dets[d.group] = d.value + else: + max_dets[d.group] = max(d.value, max_dets[d.group], key= lambda v: abs(v)) + + # overwrite/add maximum value for each determinant + for det_group in max_dets.keys(): + new_determinant = Source.determinant.Determinant(det_group, max_dets[det_group]) + for g in groups: + g.set_determinant(new_determinant,type) + + + return + + + def get_coupled_systems(self, groups, get_coupled_groups): + """ This generator will yield one covalently coupled system at the time """ + groups = set(groups) + while len(groups)>0: + # extract a system of coupled groups ... + system = set() + self.get_a_coupled_system_of_groups(groups.pop(), system, get_coupled_groups) + # ... and remove them from the list + groups -= system + + yield system + + return + + + def get_a_coupled_system_of_groups(self, new_group, coupled_groups, get_coupled_groups): + coupled_groups.add(new_group) + [self.get_a_coupled_system_of_groups(c, coupled_groups, get_coupled_groups) for c in get_coupled_groups(new_group) if c not in coupled_groups] + return + + + # + # Energy/summary-related methods + # + def calculate_folding_energy(self, pH=None, reference=None): + ddg = 0.0 + for group in self.groups: + #print('Folding energy for %s at pH %f: %f'%(group,pH,group.calculate_folding_energy(self.parameters, pH=pH, reference=reference))) + ddg += group.calculate_folding_energy(self.parameters, pH=pH, reference=reference) + + return ddg + + def calculate_charge(self, parmaeters, pH=None): + unfolded = folded = 0.0 + for group in self.get_titratable_groups(): + unfolded += group.calculate_charge(parmaeters, pH=pH, state='unfolded') + folded += group.calculate_charge(parmaeters, pH=pH, state='folded') + + return unfolded,folded + + + # + # conformation/bookkeeping/atom methods + # + + def get_backbone_groups(self): + """ returns all backbone groups needed for the pKa calculations """ + return [group for group in self.groups if 'BB' in group.type] + + def get_sidechain_groups(self): + """ returns all sidechain groups needed for the pKa calculations """ + return [group for group in self.groups if ('BB' not in group.type\ + and not group.atom.cysteine_bridge)] + + def get_covalently_coupled_groups(self): + return [g for g in self.groups if len(g.covalently_coupled_groups)>0] + + def get_non_covalently_coupled_groups(self): + return [g for g in self.groups if len(g.non_covalently_coupled_groups)>0] + + def get_backbone_NH_groups(self): + """ returns all NH backbone groups needed for the pKa calculations """ + return [group for group in self.groups if group.type == 'BBN'] + + def get_backbone_CO_groups(self): + """ returns all CO backbone groups needed for the pKa calculations """ + return [group for group in self.groups if group.type == 'BBC'] + + def get_groups_in_residue(self, residue): + return [group for group in self.groups if group.residue_type == residue] + + def get_titratable_groups(self): + return [group for group in self.groups if group.titratable] + + def get_titratable_groups_and_cysteine_bridges(self): + return [group for group in self.groups if group.titratable or group.residue_type == 'CYS'] + + def get_acids(self): + return [group for group in self.groups if (group.residue_type in self.parameters.acid_list + and not group.atom.cysteine_bridge)] + + def get_backbone_reorganisation_groups(self): + return [group for group in self.groups if (group.residue_type in self.parameters.backbone_reorganisation_list + and not group.atom.cysteine_bridge)] + + def get_ions(self): + return [group for group in self.groups if group.residue_type in self.parameters.ions.keys()] + + def get_group_names(self, list): + return [group for group in self.groups if group.type in list] + + + def get_ligand_atoms(self): + return [atom for atom in self.atoms if atom.type=='hetatm'] + + def get_heavy_ligand_atoms(self): + return [atom for atom in self.atoms if atom.type=='hetatm' and atom.element != 'H'] + + def get_chain(self,chain): + return [atom for atom in self.atoms if atom.chainID != chain] + + + def add_atom(self, atom): + #print(self,'adding',atom) + self.atoms.append(atom) + if not atom.conformation_container: + atom.conformation_container = self + if not atom.molecular_container: + atom.molecular_container = self.molecular_container + + # store chain id for bookkeeping + if not atom.chainID in self.chains: + self.chains.append(atom.chainID) + + return + + def copy_atom(self, atom): + new_atom = atom.makeCopy() + self.atoms.append(new_atom) + new_atom.conformation_container = self + + return + + def get_non_hydrogen_atoms(self): + return [atom for atom in self.atoms if atom.element!='H'] + + + def top_up(self, other): + """ Tops up self with all atoms found in other but not in self """ + for atom in other.atoms: + if not self.have_atom(atom): + self.copy_atom(atom) + return + + def have_atom(self, atom): + res = False + for a in self.atoms: + if a.residue_label == atom.residue_label: + res = True + break + return res + + def find_group(self, group): + for g in self.groups: + if g.atom.residue_label == group.atom.residue_label: + if g.type == group.type: + return g + return False + + + def set_ligand_atom_names(self): + for atom in self.get_ligand_atoms(): + Source.ligand.assign_sybyl_type(atom) + return + + + + def __str__(self): + return'Conformation container %s with %d atoms and %d groups'%(self.name,len(self),len(self.groups)) + + def __len__(self): + return len(self.atoms) + + def sort_atoms(self): + # sort the atoms ... + self.atoms.sort(key=self.sort_atoms_key) + # ... and re-number them + for i in range(len(self.atoms)): + self.atoms[i].numb = i+1 + + return + + def sort_atoms_key(self, atom): + key = ord(atom.chainID)*1e7 + key += atom.resNumb*1000 + if len(atom.name) > len(atom.element): + key += ord(atom.name[len(atom.element)]) + #print(atom,ord(atom.name[len(atom.element)]), '|%s||%s|'%(atom.name,atom.element)) + return key diff --git a/Source/coupled_groups.py b/Source/coupled_groups.py new file mode 100644 index 0000000..06d966f --- /dev/null +++ b/Source/coupled_groups.py @@ -0,0 +1,318 @@ + +import math, Source.output, Source.group, Source.lib, itertools + + +class non_covalently_couple_groups: + def __init__(self): + + self.parameters = None + + # self.do_intrinsic = False + # self.do_pair_wise = False + self.do_prot_stat = True + + return + + + # + # Methods for finding coupled groups + # + def is_coupled_protonation_state_probability(self, group1, group2, energy_method, return_on_fail=True): + + # check if the interaction energy is high enough + interaction_energy = max(self.get_interaction(group1,group2), self.get_interaction(group2,group1)) + + if interaction_energy<=self.parameters.min_interaction_energy and return_on_fail: + return {'coupling_factor':-1.0} + + # calculate intrinsic pKa's, if not already done + for group in [group1, group2]: + if not hasattr(group, 'intrinsic_pKa'): + group.calculate_intrinsic_pka() + + use_pH = self.parameters.pH + if self.parameters.pH == 'variable': + use_pH = min(group1.pka_value, group2.pka_value) + + default_energy = energy_method(pH=use_pH, reference=self.parameters.reference) + default_pka1 = group1.pka_value + default_pka2 = group2.pka_value + + # check that pka values are within relevant limits + if max(default_pka1, default_pka2) < self.parameters.min_pka or \ + min(default_pka1, default_pka2) > self.parameters.max_pka: + if return_on_fail: + return {'coupling_factor':-1.0} + + # Swap interactions and re-calculate pKa values + self.swap_interactions([group1], [group2]) + group1.calculate_total_pka() + group2.calculate_total_pka() + + # store swapped energy and pka's + swapped_energy = energy_method(pH=use_pH, reference=self.parameters.reference) + swapped_pka1 = group1.pka_value + swapped_pka2 = group2.pka_value + + pka_shift1 = swapped_pka1 - default_pka1 + pka_shift2 = swapped_pka2 - default_pka2 + + # Swap back to original protonation state + self.swap_interactions([group1], [group2]) + group1.calculate_total_pka() + group2.calculate_total_pka() + + # check difference in free energy + if abs(default_energy - swapped_energy) > self.parameters.max_free_energy_diff and return_on_fail: + return {'coupling_factor':-1.0} + + # check pka shift + if max(abs(pka_shift1), abs(pka_shift2)) < self.parameters.min_swap_pka_shift and return_on_fail: + return {'coupling_factor':-1.0} + + # check intrinsic pka diff + if abs(group1.intrinsic_pKa - group2.intrinsic_pKa) > self.parameters.max_intrinsic_pKa_diff and return_on_fail: + return {'coupling_factor':-1.0} + + # if everything is OK, calculate the coupling factor and return all info + factor = self.get_free_energy_diff_factor(default_energy, swapped_energy)*\ + self.get_pka_diff_factor(group1.intrinsic_pKa, group2.intrinsic_pKa)*\ + self.get_interaction_factor(interaction_energy) + + return {'coupling_factor':factor, + 'default_energy':default_energy, + 'swapped_energy':swapped_energy, + 'interaction_energy':interaction_energy, + 'swapped_pka1':swapped_pka1, + 'swapped_pka2':swapped_pka2, + 'pka_shift1':pka_shift1, + 'pka_shift2':pka_shift2, + 'pH':use_pH} + + + + # + # Methods for calculating the coupling factor + # + + def get_pka_diff_factor(self, pka1, pka2): + intrinsic_pka_diff = abs(pka1-pka2) + res = 0.0 + if intrinsic_pka_diff <= self.parameters.max_intrinsic_pKa_diff: + res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pKa_diff)**2 + + return res + + def get_free_energy_diff_factor(self, energy1, energy2): + free_energy_diff = abs(energy1-energy2) + res = 0.0 + if free_energy_diff <= self.parameters.max_free_energy_diff: + res = 1-(free_energy_diff/self.parameters.max_free_energy_diff)**2 + return res + + def get_interaction_factor(self, interaction_energy): + res = 0.0 + interaction_energy = abs(interaction_energy) + if interaction_energy >= self.parameters.min_interaction_energy: + res = (interaction_energy-self.parameters.min_interaction_energy)/(1.0+interaction_energy-self.parameters.min_interaction_energy) + + return res + + + + + def identify_non_covalently_coupled_groups(self, conformation, verbose=True): + """ Finds coupled residues in protein """ + + self.parameters = conformation.parameters + if verbose: + print('') + print(' Warning: When using the -d option, pKa values based on \'swapped\' interactions') + print(' will be writting to the output .pka file') + print('') + print('-'*103) + print(' Detecting non-covalently coupled residues') + print('-'*103) + print(' Maximum pKa difference: %4.2f pKa units'%self.parameters.max_intrinsic_pKa_diff) + print(' Minimum interaction energy: %4.2f pKa units'%self.parameters.min_interaction_energy) + print(' Maximum free energy diff.: %4.2f pKa units'%self.parameters.max_free_energy_diff) + print(' Minimum swap pKa shift: %4.2f pKa units'%self.parameters.min_swap_pka_shift) + print(' pH: %6s ' %str(self.parameters.pH)) + print(' Reference: %s' %self.parameters.reference) + print(' Min pKa: %4.2f'%self.parameters.min_pka) + print(' Max pKa: %4.2f'%self.parameters.max_pka) + print('') + + # find coupled residues + titratable_groups = conformation.get_titratable_groups() + + if not conformation.non_covalently_coupled_groups: + for g1 in titratable_groups: + for g2 in titratable_groups: + if g1==g2: + break + + if not g1 in g2.non_covalently_coupled_groups and self.do_prot_stat: + data = self.is_coupled_protonation_state_probability(g1, g2,conformation.calculate_folding_energy) + if data['coupling_factor'] >0.0: + g1.couple_non_covalently(g2) + + if verbose: + self.print_out_swaps(conformation) + + return + + def print_out_swaps(self, conformation, verbose=True): + map = Source.output.make_interaction_map('Non-covalent coupling map for %s'%conformation, + conformation.get_non_covalently_coupled_groups(), + lambda g1,g2: g1 in g2.non_covalently_coupled_groups) + print(map) + + for system in conformation.get_coupled_systems(conformation.get_non_covalently_coupled_groups(),Source.group.Group.get_non_covalently_coupled_groups): + self.print_system(conformation, list(system)) + return + + def print_system(self, conformation, system): + + print('System containing %d groups:'%len(system)) + + # make list of interactions withi this system + interactions = list(itertools.combinations(system,2)) + + # print out coupling info for each interaction + coup_info = '' + for interaction in interactions: + data = self.is_coupled_protonation_state_probability(interaction[0], interaction[1],conformation.calculate_folding_energy, return_on_fail=False) + coup_info += self.make_data_to_string(data,interaction[0],interaction[1])+'\n\n' + print(coup_info) + + # make list of possible combinations of swap to try out + combinations = Source.lib.generate_combinations(interactions) + + # Make possible swap combinations + swap_info = '' + swap_info += self.print_determinants_section(system,'Original') + + for combination in combinations: + # Tell the user what is swap in this combination + swap_info += 'Swapping the following interactions:\n' + for interaction in combination: + swap_info += ' %s %s\n'%(interaction[0].label, interaction[1].label) + + # swap... + for interaction in combination: + self.swap_interactions([interaction[0]],[interaction[1]]) + + swap_info += self.print_determinants_section(system,'Swapped') + # ...and swap back + #for interaction in combination: + # self.swap_interactions([interaction[0]], [interaction[1]]) + + print(swap_info) + + return + # + # Interaction and swapping methods + # + + def get_interaction(self, group1, group2, include_side_chain_hbs = True): + determinants = group1.determinants['coulomb'] + if include_side_chain_hbs: + determinants = group1.determinants['sidechain'] + group1.determinants['coulomb'] + + interaction_energy = 0.0 + for det in determinants: + if group2 == det.group: + interaction_energy += det.value + + return interaction_energy + + + + def print_determinants_section(self, system, tag): + all_labels = [g.label for g in system] + s = ' '+'-'*113+'\n' + for group in system: + s += self.tagged_print(' %-8s|'%tag,group.getDeterminantString(), all_labels) + + return s+'\n' + + def swap_interactions(self, groups1, groups2, include_side_chain_hbs = True): + + for i in range(len(groups1)): + group1 = groups1[i] + group2 = groups2[i] + + # swap the interactions! + self.transfer_determinant(group1.determinants['coulomb'], group2.determinants['coulomb'], group1.label, group2.label) + if include_side_chain_hbs: + self.transfer_determinant(group1.determinants['sidechain'], group2.determinants['sidechain'], group1.label, group2.label) + + # re-calculate pKa values + group1.calculate_total_pka() + group2.calculate_total_pka() + + return + + + def transfer_determinant(self, determinants1, determinants2, label1, label2): + # find out what to transfer... + from1to2 = [] + from2to1 = [] + for det in determinants1: + if det.label == label2: + from1to2.append(det) + + for det in determinants2: + if det.label == label1: + from2to1.append(det) + + # ...and transfer it! + for det in from1to2: + det.label = label1 + determinants2.append(det) + determinants1.remove(det) + + for det in from2to1: + det.label = label2 + determinants1.append(det) + determinants2.remove(det) + + return + + # + # Output methods + # + + def tagged_print(self, tag, s, labels): + s = "%s %s"%(tag,s) + s = s.replace('\n','\n%s '%tag) + for label in labels: + s = s.replace(label, '\033[31m%s\033[30m'%label) + return s+'\n' + + + def make_data_to_string(self, data, group1, group2): + s = """ %s and %s coupled (prot.state): %5.2f + Energy levels: %6.2f, %6.2f (difference: %6.2f) at pH %6.2f + Interaction energy: %6.2f + Intrinsic pka's: %6.2f, %6.2f (difference: %6.2f) + Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)"""%(group1.label, + group2.label, + data['coupling_factor'], + data['default_energy'], data['swapped_energy'], + data['default_energy'] - data['swapped_energy'], + data['pH'], + data['interaction_energy'], + group1.intrinsic_pKa, + group2.intrinsic_pKa, + group1.intrinsic_pKa-group2.intrinsic_pKa, + data['swapped_pka1'], + data['swapped_pka2'], + data['pka_shift1'], + data['pka_shift2']) + + return s + + +nccg = non_covalently_couple_groups() diff --git a/Source/determinant.py b/Source/determinant.py new file mode 100644 index 0000000..3be7a02 --- /dev/null +++ b/Source/determinant.py @@ -0,0 +1,26 @@ + +class Determinant: + """ + Determinant class - set up for later structurization + """ + + def __init__(self, group, value): + """ + Contructer of determinant object - simple, but helps in creating structure! + """ + self.group = group + self.label = group.label + self.value = value + + return + + def add(self, value): + """ + adding a value to determinant + """ + self.value += value + + return + + def __str__(self): + return '%s: %8.2f'%(self.label,self.value) diff --git a/Source/determinants.py b/Source/determinants.py new file mode 100644 index 0000000..8f2dc60 --- /dev/null +++ b/Source/determinants.py @@ -0,0 +1,238 @@ +import math, time + +import Source.iterative, Source.lib, Source.vector_algebra +import Source.calculations +from Source.determinant import Determinant + + +def setDeterminants(propka_groups, version=None, options=None): + """ + adding side-chain and coulomb determinants/perturbations to all residues - note, backbone determinants are set separately + """ + + iterative_interactions = [] + # --- NonIterative section ---# + for group1 in propka_groups: + for group2 in propka_groups: + if group1 == group2: + break + # do not calculate interactions for coupled groups + if group2 in group1.covalently_coupled_groups: + break + + distance = Source.calculations.distance(group1, group2) + + if distance < version.parameters.coulomb_cutoff2: + interaction_type = version.parameters.interaction_matrix.get_value(group1.type,group2.type) + if interaction_type == 'I': + Source.iterative.addtoDeterminantList(group1, group2, distance, iterative_interactions, version=version) + elif interaction_type == 'N': + addDeterminants(group1, group2, distance, version) + + + # --- Iterative section ---# + Source.iterative.addDeterminants(iterative_interactions, version, options=options) + + +def addDeterminants(group1, group2, distance, version): + """ + adding determinants/perturbations, distance(R1, R2) < coulomb_cutoff always + """ + + # side-chain determinant + addSidechainDeterminants(group1, group2, version) + + # Coulomb determinant + addCoulombDeterminants(group1, group2, distance, version) + + return + +def addSidechainDeterminants(group1, group2, version=None): + """ + adding side-chain determinants/perturbations + Note, resNumb1 > resNumb2 + """ + + hbond_interaction = version.hydrogen_bond_interaction(group1, group2) + + if hbond_interaction: + + if group1.charge == group2.charge: + # acid pair or base pair + if group1.model_pka < group2.model_pka: + newDeterminant1 = Determinant(group2, -hbond_interaction) + newDeterminant2 = Determinant(group1, hbond_interaction) + else: + newDeterminant1 = Determinant(group2, hbond_interaction) + newDeterminant2 = Determinant(group1, -hbond_interaction) + else: + newDeterminant1 = Determinant(group2, hbond_interaction*group1.charge) + newDeterminant2 = Determinant(group1, hbond_interaction*group2.charge) + + group1.determinants['sidechain'].append(newDeterminant1) + group2.determinants['sidechain'].append(newDeterminant2) + + return + +def addCoulombDeterminants(group1, group2, distance, version): + """ + adding NonIterative Coulomb determinants/perturbations + """ + + coulomb_interaction = version.electrostatic_interaction(group1, group2, distance) + + if coulomb_interaction: + Q1 = group1.charge + Q2 = group2.charge + + # assigning the Coulombic interaction + if Q1 < 0.0 and Q2 < 0.0: + """ both are acids """ + addCoulombAcidPair(group1, group2, coulomb_interaction) + elif Q1 > 0.0 and Q2 > 0.0: + """ both are bases """ + addCoulombBasePair(group1, group2, coulomb_interaction) + else: + """ one of each """ + addCoulombIonPair(group1, group2, coulomb_interaction) + + return + + +def addCoulombAcidPair(object1, object2, value): + """ + Adding the Coulomb interaction (an acid pair): + the higher pKa is raised + """ + + if object1.model_pka > object2.model_pka: + newDeterminant = Determinant(object2, value) + object1.determinants['coulomb'].append(newDeterminant) + else: + newDeterminant = Determinant(object1, value) + object2.determinants['coulomb'].append(newDeterminant) + + +def addCoulombBasePair(object1, object2, value): + """ + Adding the Coulomb interaction (a base pair): + the lower pKa is lowered + """ + if object1.model_pka < object2.model_pka: + newDeterminant = Determinant(object2, -value) + object1.determinants['coulomb'].append(newDeterminant) + else: + newDeterminant = Determinant(object1, -value) + object2.determinants['coulomb'].append(newDeterminant) + + +def addCoulombIonPair(object1, object2, value): + """ + Adding the Coulomb interaction (an acid-base pair): + the pKa of the acid is lowered & the pKa of the base is raised + """ + + # residue1 + Q1 = object1.charge + newDeterminant = Determinant(object2, Q1*value) + object1.determinants['coulomb'].append(newDeterminant) + + # residue2 + Q2 = object2.charge + newDeterminant = Determinant(object1, Q2*value) + object2.determinants['coulomb'].append(newDeterminant) + + + + +def setIonDeterminants(conformation_container, version): + """ + adding ion determinants/perturbations + """ + for titratable_group in conformation_container.get_titratable_groups(): + for ion_group in conformation_container.get_ions(): + squared_distance = Source.calculations.squared_distance(titratable_group, ion_group) + if squared_distance < version.parameters.coulomb_cutoff2_squared: + weight = version.calculatePairWeight(titratable_group.Nmass, ion_group.Nmass) + # the pKa of both acids and bases are shifted up by negative ions (and vice versa) + value = (-ion_group.charge) * version.calculateCoulombEnergy(math.sqrt(squared_distance), weight) + newDeterminant = Determinant(ion_group, value) + titratable_group.determinants['coulomb'].append(newDeterminant) + + return + +def setBackBoneDeterminants(titratable_groups, backbone_groups, version): + + for titratable_group in titratable_groups: + # find out which backbone groups this titratable is interacting with + for backbone_group in backbone_groups: + # find the interacting atoms + backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group) + titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids + + # find the smallest distance + [backbone_atom, distance, titratable_atom] = Source.calculations.get_smallest_distance(backbone_interaction_atoms, + titratable_group_interaction_atoms) + # get the parameters + parameters = version.get_backbone_hydrogen_bond_parameters(backbone_atom, titratable_atom) + if not parameters: + continue + [dpKa_max, [cutoff1, cutoff2]] = parameters + + + if distance < cutoff2: + # calculate angle factor + f_angle = 1.0 + # for BBC groups, the hydrogen is on the titratable group + # + # Titra. + # / + # H + # . + # O + # || + # C + if backbone_group.type == 'BBC': + if titratable_group.type in version.parameters.angular_dependent_sidechain_interactions: + if titratable_atom.element == 'H': + heavy_atom = titratable_atom.bonded_atoms[0] + hydrogen_atom = titratable_atom + [d1, f_angle, d2] = Source.calculations.AngleFactorX(atom1=heavy_atom, + atom2=hydrogen_atom, + atom3=backbone_atom) + else: + # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to + # the titratable atom than the hydrogen. In either case we set the angle factor + # to 0 + f_angle = 0.0 + + # for BBN groups, the hydrogen is on the backbone group + # + # Titra. + # . + # H + # | + # N + # / \ + if backbone_group.type == 'BBN': + if backbone_atom.element == 'H': + backbone_N = backbone_atom.bonded_atoms[0] + backbone_H = backbone_atom + [d1, f_angle, d2] = Source.calculations.AngleFactorX(atom1=titratable_atom, + atom2=backbone_H, + atom3=backbone_N) + else: + # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to + # the titratable atom than the hydrogen. In either case we set the angle factor + # to 0 + f_angle = 0.0 + + + if f_angle > 0.001: + value = titratable_group.charge * Source.calculations.HydrogenBondEnergy(distance, dpKa_max, [cutoff1,cutoff2], f_angle) + + newDeterminant = Determinant(backbone_group, value) + titratable_group.determinants['backbone'].append(newDeterminant) + + + return diff --git a/Source/group.py b/Source/group.py new file mode 100644 index 0000000..105b742 --- /dev/null +++ b/Source/group.py @@ -0,0 +1,1315 @@ +# +# Class for storing groups important for propka calculations +# + +import Source.ligand, Source.determinant, Source.ligand_pka_values, math, Source.protonate + +my_protonator = Source.protonate.Protonate(verbose=False) + +expected_atoms_acid_interactions = { + 'COO':{'O':2}, + 'HIS':{'H':2, 'N':2}, + 'CYS':{'S':1}, + 'TYR':{'O':1}, + 'LYS':{'N':1}, + 'ARG':{'H':5, 'N':3}, + 'ROH':{'O':1}, + 'AMD':{'H':2, 'N':1}, + 'TRP':{'H':1, 'N':1}, + 'N+': {'N':1}, + 'C-': {'O':2}, + 'BBN':{'H':1, 'N':1,}, + 'BBC':{'O':1}, + 'NAR':{'H':1, 'N':1}, + 'NAM':{'H':1, 'N':1}, + 'F': {'F':1}, + 'Cl': {'Cl':1}, + 'OH': {'H':1, 'O':1}, + 'OP': {'O':1}, + 'O3': {'O':1}, + 'O2': {'O':1}, + 'SH': {'S':1}, + 'CG': {'H':5, 'N':3}, + 'C2N':{'H':4, 'N':2}, + 'OCO':{'O':2}, + 'N30':{'H':4, 'N':1}, + 'N31':{'H':3, 'N':1}, + 'N32':{'H':2, 'N':1}, + 'N33':{'H':1, 'N':1}, + 'NP1':{'H':2, 'N':1}, + 'N1' :{'N':1} +} + +expected_atoms_base_interactions = { + 'COO':{'O':2}, + 'HIS':{'N':2}, + 'CYS':{'S':1}, + 'TYR':{'O':1}, + 'LYS':{'N':1}, + 'ARG':{'N':3}, + 'ROH':{'O':1}, + 'AMD':{'O':1}, + 'TRP':{'N':1}, + 'N+': {'N':1}, + 'C-': {'O':2}, + 'BBN':{'H':1, 'N':1,}, + 'BBC':{'O':1}, + 'NAR':{'H':1, 'N':1}, + 'NAM':{'H':1, 'N':1}, + 'F': {'F':1}, + 'Cl': {'Cl':1}, + 'OH': {'H':1, 'O':1}, + 'OP': {'O':1}, + 'O3': {'O':1}, + 'O2': {'O':1}, + 'SH': {'S':1}, + 'CG': {'N':3}, + 'C2N':{'N':2}, + 'OCO':{'O':2}, + 'N30':{'N':1}, + 'N31':{'N':1}, + 'N32':{'N':1}, + 'N33':{'N':1}, + 'NP1':{'N':1}, + 'N1' :{'N':1} +} + + +class Group: + def __init__(self, atom): + #print('Made new %s group from %s'%(type,atom)) + self.atom = atom + self.type = '' + atom.group = self + + # set up data structures + self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} + self.pka_value = 0.0 + self.model_pka = 0.0 + + self.Emass = 0.0 + self.Nmass = 0.0 + self.Elocl = 0.0 + self.Nlocl = 0.0 + self.buried = 0.0 + self.x = 0.0 + self.y = 0.0 + self.z = 0.0 + self.charge = 0 + self.interaction_atoms_for_acids = [] + self.interaction_atoms_for_bases = [] + self.model_pka_set = False + + # information on covalent and non-covalent coupling + self.non_covalently_coupled_groups = [] + self.covalently_coupled_groups = [] + self.coupled_titrating_group = None + self.common_charge_centre = False + + + self.residue_type = self.atom.resName + if self.atom.terminal: + self.residue_type = self.atom.terminal + + if self.atom.type=='atom': + self.label = '%-3s%4d%2s'%(self.residue_type, atom.resNumb, atom.chainID) + elif self.atom.resName in ['DA ','DC ','DG ','DT ']: + self.label = '%1s%1s%1s%4d%2s'%(self.residue_type[1], + atom.element, + atom.name.replace('\'','')[-1], + atom.resNumb, + atom.chainID) + +# self.label = '%1s%1s%1s%4d%2s'%(self.residue_type[1], atom.element,atom.name[-1], atom.resNumb, atom.chainID) + else: + self.label = '%-3s%4s%2s'%(self.residue_type, atom.name, atom.chainID) + + + # container for squared distances + self.squared_distances = {} + + return + + + # + # Coupling-related methods + # + def couple_covalently(self, other): + """ Couple this group with another group """ + # do the coupling + if not other in self.covalently_coupled_groups: + self.covalently_coupled_groups.append(other) + + if not self in other.covalently_coupled_groups: + other.covalently_coupled_groups.append(self) + + return + + def couple_non_covalently(self, other): + """ Couple this group with another group """ + # do the coupling + if not other in self.non_covalently_coupled_groups: + self.non_covalently_coupled_groups.append(other) + + if not self in other.non_covalently_coupled_groups: + other.non_covalently_coupled_groups.append(self) + + return + + def get_covalently_coupled_groups(self): return self.covalently_coupled_groups + def get_non_covalently_coupled_groups(self): return self.non_covalently_coupled_groups + + + def share_determinants(self, others): + + # for each determinant type + for other in others: + if other == self: + continue + + for type in ['sidechain','backbone','coulomb']: + for g in other.determinants[type]: self.share_determinant(g,type) + + # recalculate pka values + self.calculate_total_pka() + other.calculate_total_pka() + + return + + + def share_determinant(self, new_determinant, type): + added = False + # first check if we already have a determinant with this label + for own_determinant in self.determinants[type]: + if own_determinant.group == new_determinant.group: + # if so, find the average value + avr = (own_determinant.value + new_determinant.value)/2.0 + own_determinant.value = avr + new_determinant.value = avr + added = True + + # otherwise we just add the determinant to our list + if not added: + self.determinants[type].append(Source.determinant.Determinant(new_determinant.group, + new_determinant.value)) + + return + + def make_covalently_coupled_line(self): + + # first check if there are any coupled groups at all + if len(self.covalently_coupled_groups) == 0: + return '' + + line = 'CCOUPL%5d'%self.atom.numb + + # extract and sort numbers of coupled groups + coupled = [] + for g in self.covalently_coupled_groups: + coupled.append(g.atom.numb) + coupled.sort() + + # write 'em out + for b in coupled: + line += '%5d'%b + line += '\n' + + return line + + def make_non_covalently_coupled_line(self): + # first check if there are any coupled groups at all + if len(self.non_covalently_coupled_groups) == 0: + return '' + + line = 'NCOUPL%5d'%self.atom.numb + + # extract and sort numbers of coupled groups + coupled = [] + for g in self.non_covalently_coupled_groups: + coupled.append(g.atom.numb) + coupled.sort() + + # write 'em out + for b in coupled: + line += '%5d'%b + line += '\n' + + + return line + + # + # Bookkeeping methods + # + def __eq__(self, other): + """ + Check if two groups should be considered identical + """ + if self.atom.type == 'atom': + # In case of protein atoms we trust the labels + return self.label==other.label + else: + # For heterogene atoms we also need to check the residue number + return self.label==other.label and self.atom.resNumb == other.atom.resNumb + + def __hash__(self): + """ Needed together with __eq__ - otherwise we can't make sets of groups """ + return id(self) + + def __iadd__(self, other): + if self.type != other.type: + raise Exception('Cannot add groups of different types (%s and %s)'%(self.type,other.type)) + + # add all values + self.pka_value += other.pka_value + self.Nmass += other.Nmass + self.Emass += other.Emass + self.Nlocl += other.Nlocl + self.Elocl += other.Elocl + self.buried += other.buried + # and add all determinants + for type in ['sidechain','backbone','coulomb']: + for determinant in other.determinants[type]: + self.add_determinant(determinant, type) + return self + + + def add_determinant(self, new_determinant, type): + """ Adds to current and creates non-present determinants""" + # first check if we already have a determinant with this label + for own_determinant in self.determinants[type]: + if own_determinant.group == new_determinant.group: + # if so, add the value + own_determinant.value += new_determinant.value + return + + # otherwise we just add the determinant to our list + self.determinants[type].append(Source.determinant.Determinant(new_determinant.group, + new_determinant.value)) + + return + + def set_determinant(self, new_determinant, type): + """ Overwrites current and creates non-present determinants""" + # first check if we already have a determinant with this label + for own_determinant in self.determinants[type]: + if own_determinant.group == new_determinant.group: + # if so, overwrite the value + own_determinant.value = new_determinant.value + return + + # otherwise we just add the determinant to our list + self.determinants[type].append(Source.determinant.Determinant(new_determinant.group, + new_determinant.value)) + + return + + def remove_determinants(self, labels): + """ removes all determinants with label in labels """ + for type in ['sidechain','backbone','coulomb']: + matches = list(filter(lambda d: d.label in labels, [d for d in self.determinants[type]])) + for m in matches: self.determinants[type].remove(m) + + return + + def __truediv__(self, value): + value = float(value) + # divide all values + self.pka_value /= value + self.Nmass /= value + self.Emass /= value + self.Nlocl /= value + self.Elocl /= value + self.buried /= value + # and all determinants + for type in ['sidechain','backbone','coulomb']: + for determinant in self.determinants[type]: + determinant.value /= value + return self + + def clone(self): + res = Group(self.atom) + res.type = self.type + res.residue_type = self.residue_type + res.model_pka = self.model_pka + res.coupled_titrating_group = self.coupled_titrating_group + res.covalently_coupled_groups = self.covalently_coupled_groups + res.non_covalently_coupled_groups = self.non_covalently_coupled_groups + res.titratable = self.titratable + res.charge = self.charge + return res + + def setup(self): + # set the charges + if self.type in self.parameters.charge.keys(): + self.charge = self.parameters.charge[self.type] + if self.residue_type in self.parameters.ions.keys(): + self.charge = self.parameters.ions[self.residue_type] + + #print('ION setup',self,self.residue_type, self.charge) + + # find the center and the interaction atoms + self.setup_atoms() + + # set the model pka value + self.titratable = False + if self.residue_type in self.parameters.model_pkas.keys(): + if not self.model_pka_set: + self.model_pka = self.parameters.model_pkas[self.residue_type] + # check if we should apply a custom model pka + key = '%s-%s'%(self.atom.resName.strip(), self.atom.name.strip()) + if key in self.parameters.custom_model_pkas.keys(): + self.model_pka = self.parameters.custom_model_pkas[key] + + self.model_pka_set = True + + if self.model_pka_set and not self.atom.cysteine_bridge: + self.titratable = True + + + return + + def setup_atoms(self): + # This method is overwritten for some types of groups + # set the center at the position of the main atom + self.set_center([self.atom]) + # set the main atom as interaction atom + self.set_interaction_atoms([self.atom], [self.atom]) + return + + def set_interaction_atoms(self, interaction_atoms_for_acids, interaction_atoms_for_bases): + [a.set_group_type(self.type) for a in interaction_atoms_for_acids+interaction_atoms_for_bases] + + self.interaction_atoms_for_acids = interaction_atoms_for_acids + self.interaction_atoms_for_bases = interaction_atoms_for_bases + + # check if all atoms have been identified + ok = True + for [expect, found, t] in [[expected_atoms_acid_interactions, self.interaction_atoms_for_acids, 'acid'], + [expected_atoms_base_interactions, self.interaction_atoms_for_bases, 'base']]: + if self.type in expect.keys(): + for e in expect[self.type].keys(): + if len([a for a in found if a.element==e]) != expect[self.type][e]: + ok = False + + if not ok: + print('Warning: Missing atoms or failed protonation for %s (%s) -- please check the structure'%(self.label, self.type)) + print(' %s'%self) + Na = sum([expected_atoms_acid_interactions[self.type][e] for e in expected_atoms_acid_interactions[self.type].keys()]) + Nb = sum([expected_atoms_base_interactions[self.type][e] for e in expected_atoms_base_interactions[self.type].keys()]) + + print(' Expected %d interaction atoms for acids, found:'%Na) + for i in range(len(self.interaction_atoms_for_acids)): + print(' %s'%self.interaction_atoms_for_acids[i]) + + print(' Expected %d interaction atoms for bases, found:'%Nb) + for i in range(len(self.interaction_atoms_for_bases)): + print(' %s'%self.interaction_atoms_for_bases[i]) + + + #return + + return + + def get_interaction_atoms(self, interacting_group): + if interacting_group.residue_type in self.parameters.base_list: + return self.interaction_atoms_for_bases + else: + return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0 + + def set_center(self, atoms): + # reset center + self.x = 0.0; self.y = 0.0; self.z = 0.0 + + # find the average positon of atoms + for atom in atoms: + self.x += atom.x; self.y += atom.y; self.z += atom.z + + self.x /= float(len(atoms)) + self.y /= float(len(atoms)) + self.z /= float(len(atoms)) + + return + + def getDeterminantString(self, remove_penalised_group=False): + if self.coupled_titrating_group and remove_penalised_group: + return '' + + number_of_sidechain = len(self.determinants['sidechain']) + number_of_backbone = len(self.determinants['backbone']) + number_of_coulomb = len(self.determinants['coulomb']) + + number_of_lines = max(1, number_of_sidechain, number_of_backbone, number_of_coulomb) + str = "" + for line_number in range(number_of_lines): + str += "%s" % (self.label) + if line_number == 0: + str += " %6.2lf" %(self.pka_value) + if len(self.non_covalently_coupled_groups)>0: + str+='*' + else: + str+=' ' + + # if self.atom.cysteine_bridge: + # str += " BONDED " + # else: + str += " %4d%2s " % (int(100.0*self.buried), "%") + + str += " %6.2lf %4d" % (self.Emass, self.Nmass) + str += " %6.2lf %4d" % (self.Elocl, self.Nlocl) + else: + str += "%40s" % (" ") + + # add the determinants + for type in ['sidechain','backbone','coulomb']: + str += self.get_determinant_for_string(type,line_number) + + # adding end-of-line + str += "\n" + + str += "\n" + + return str + + def get_determinant_for_string(self, type, number): + if number >= len(self.determinants[type]): + empty_determinant = "%s%4d%2s" % ("XXX", 0, "X") + return "%8.2lf %s" % (0.0, empty_determinant) + else: + determinant = self.determinants[type][number] + return "%8.2lf %s" % (determinant.value, determinant.label) + + + def calculate_total_pka(self): + # if this is a cysteine involved in a di-sulphide bond + if self.atom.cysteine_bridge: + self.pka_value = 99.99 + return + + + self.pka_value = self.model_pka + self.Emass + self.Elocl + + for determinant_type in ['sidechain', 'backbone', 'coulomb']: + for determinant in self.determinants[determinant_type]: + self.pka_value += determinant.value + + return + + + + + def calculate_intrinsic_pka(self): + """ + Calculates the intrinsic pKa values from the desolvation determinants, back-bone hydrogen bonds, + and side-chain hydrogen bond to non-titratable residues + """ + back_bone = 0.0 + for determinant in self.determinants['backbone']: + value = determinant.value + back_bone += value + + side_chain = 0.0 + for determinant in self.determinants['sidechain']: + if determinant.label[0:3] not in ['ASP','GLU','LYS','ARG','HIS','CYS','TYR','C- ','N+ ']: + value = determinant.value + side_chain += value + + self.intrinsic_pKa = self.model_pka + self.Emass + self.Elocl + back_bone + side_chain + + return + + + + + + def getSummaryString(self, remove_penalised_group=False): + if self.coupled_titrating_group and remove_penalised_group: + return '' + + ligand_type = '' + if self.atom.type == 'hetatm': + ligand_type = self.type + + penalty = '' + if self.coupled_titrating_group: + penalty = ' NB: Discarded due to coupling with %s'%self.coupled_titrating_group.label + + str = " %9s %8.2lf %10.2lf %18s %s\n" % (self.label, + self.pka_value, + self.model_pka,ligand_type, + penalty) + + return str + + def __str__(self): + return 'Group (%s) for %s'%(self.type,self.atom) + + + + # + # Energy-related methods + # + + def calculate_folding_energy(self, parameters, pH=None, reference=None): + """ + returning the electrostatic energy of this residue at pH 'pH' + """ + if pH == None: + pH = parameters.pH + if reference == None: + reference = parameters.reference + + # If not titratable, the contribution is zero + + if not self.titratable: + return 0.00 + + # calculating the ddG(neutral --> low-pH) contribution + ddG_neutral = 0.00 + if reference == 'neutral' and self.charge > 0.00: + pka_prime = self.pka_value + for determinant in self.determinants['coulomb']: + if determinant.value > 0.00: + pka_prime -= determinant.value + ddG_neutral = -1.36*(pka_prime - self.model_pka) + + # calculating the ddG(low-pH --> pH) contribution + # folded + x = pH - self.pka_value + y = 10**x + Q_pro = math.log10(1+y) + + # unfolded + x = pH - self.model_pka + y = 10**x + Q_mod = math.log10(1+y) + + ddG_low = -1.36*(Q_pro - Q_mod) + ddG = ddG_neutral + ddG_low + + return ddG + + def calculate_charge(self, parmaeters, pH=7.0, state='folded'): + + if state == "unfolded": + x = self.charge * (self.model_pka - pH) + else: + x = self.charge * (self.pka_value - pH) + + y = 10**x + charge = self.charge*(y/(1.0+y)) + + return charge + + + +class COO_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'COO' + + def setup_atoms(self): + # Identify the two caroxyl oxygen atoms + the_oxygens = self.atom.get_bonded_elements('O') + + # set the center using the two oxygen carboxyl atoms + self.set_center(the_oxygens) + self.set_interaction_atoms(the_oxygens, the_oxygens) + return + + +class HIS_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'HIS' + + def setup_atoms(self): + # Find the atoms in the histidine ring + ring_atoms = Source.ligand.is_ring_member(self.atom) + if len(ring_atoms) != 5: + print('Warning: His group does not seem to contain a ring',self) + + # protonate ring + for r in ring_atoms: + my_protonator.protonate_atom(r) + + # set the center using the ring atoms + self.set_center(ring_atoms) + + # find the hydrogens on the ring-nitrogens + hydrogens = [] + nitrogens = [ra for ra in ring_atoms if ra.element == 'N'] + + for nitrogen in nitrogens: + hydrogens.extend(nitrogen.get_bonded_elements('H')) + + self.set_interaction_atoms(hydrogens+nitrogens, nitrogens) + + return + + +class CYS_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'CYS' + + +class TYR_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'TYR' + + +class LYS_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'LYS' + + +class ARG_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'ARG' + + def setup_atoms(self): + # set the center at the position of the main atom + self.set_center([self.atom]) + + # find all the hydrogens on the nitrogen atoms + nitrogens = self.atom.get_bonded_elements('N') + for n in nitrogens: + my_protonator.protonate_atom(n) + + hydrogens = [] + for nitrogen in nitrogens: + hydrogens.extend(nitrogen.get_bonded_elements('H')) + self.set_interaction_atoms(nitrogens+hydrogens, nitrogens) + + return + + +class ROH_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'ROH' + +class SER_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'SER' + +class AMD_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'AMD' + + def setup_atoms(self): + # Identify the oxygen and nitrogen amide atoms + the_oxygen = self.atom.get_bonded_elements('O') + the_nitrogen = self.atom.get_bonded_elements('N') + + # add protons to the nitrogen + my_protonator.protonate_atom(the_nitrogen[0]) + the_hydrogens = the_nitrogen[0].get_bonded_elements('H') + + # set the center using the oxygen and nitrogen amide atoms + self.set_center(the_oxygen+the_nitrogen) + + self.set_interaction_atoms(the_nitrogen+the_hydrogens,the_oxygen) + + return + + +class TRP_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'TRP' + + def setup_atoms(self): + # set the center at the position of the main atom + self.set_center([self.atom]) + + # find the hydrogen on the nitrogen atom + my_protonator.protonate_atom(self.atom) + the_hydrogen = self.atom.get_bonded_elements('H') + self.set_interaction_atoms(the_hydrogen+[self.atom], [self.atom]) + return + + +class Nterm_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'N+' + + +class Cterm_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'COO' # this is to deal with the COO-C- parameter unification. + + def setup_atoms(self): + # Identify the carbon and other oxygen carboxyl atoms + the_carbon = self.atom.get_bonded_elements('C') + the_other_oxygen = the_carbon[0].get_bonded_elements('O') + the_other_oxygen.remove(self.atom) + + # set the center and interaction atoms + the_oxygens = [self.atom]+ the_other_oxygen + self.set_center(the_oxygens) + self.set_interaction_atoms(the_oxygens, the_oxygens) + + + return + + +class BBN_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'BBN' + self.residue_type = 'BBN' + + + def setup_atoms(self): + # Identify the hydrogen + my_protonator.protonate_atom(self.atom) + the_hydrogen = self.atom.get_bonded_elements('H') + + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + return + + +class BBC_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'BBC' + self.residue_type = 'BBC' + + def setup_atoms(self): + # Identify the oxygen + the_oxygen = self.atom.get_bonded_elements('O') + + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_oxygen, the_oxygen) + return + + +class NAR_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'NAR' + self.residue_type = 'NAR' + print('Found NAR group:',atom) + return + + + def setup_atoms(self): + # Identify the hydrogen + my_protonator.protonate_atom(self.atom) + the_hydrogen = self.atom.get_bonded_elements('H') + + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + return + + + + + +class NAM_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'NAM' + self.residue_type = 'NAM' + print('Found NAM group:',atom) + return + + + def setup_atoms(self): + # Identify the hydrogen + my_protonator.protonate_atom(self.atom) + the_hydrogen = self.atom.get_bonded_elements('H') + + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + return + + + +class F_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'F' + self.residue_type = 'F' + print('Found F group:',atom) + return + +class Cl_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'Cl' + self.residue_type = 'Cl' + print('Found Cl group:',atom) + return + +class OH_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'OH' + self.residue_type = 'OH' + print('Found OH group:',atom) + return + + + def setup_atoms(self): + # Identify the hydrogen + my_protonator.protonate_atom(self.atom) + the_hydrogen = self.atom.get_bonded_elements('H') + + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + return + +class OP_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'OP' + self.residue_type = 'OP' + print('Found OP group:',atom) + return + + + def setup_atoms(self): + # Identify the hydrogen + my_protonator.protonate_atom(self.atom) + #the_hydrogen = self.atom.get_bonded_elements('H') + + # set the center using the oxygen + self.set_center([self.atom]) + #self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + self.set_interaction_atoms([self.atom], [self.atom]) + return + + +class O3_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'O3' + self.residue_type = 'O3' + print('Found O3 group:',atom) + return + + +class O2_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'O2' + self.residue_type = 'O2' + print('Found O2 group:',atom) + return + +class SH_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'SH' + self.residue_type = 'SH' + print('Found SH group:',atom) + return + + +class CG_group(Group): + """Guadinium group""" + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'CG' + self.residue_type = 'CG' + print('Found CG group:',atom) + return + + def setup_atoms(self): + # Identify the nitrogens + the_nitrogens = self.atom.get_bonded_elements('N') + + # set the center using the nitrogen + self.set_center([self.atom]) + + the_hydrogens = [] + for n in the_nitrogens: + my_protonator.protonate_atom(n) + the_hydrogens += n.get_bonded_elements('H') + self.set_interaction_atoms(the_hydrogens+the_nitrogens, the_nitrogens) + + return + +class C2N_group(Group): + """Amidinium group""" + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'C2N' + self.residue_type = 'C2N' + print('Found C2N group:',atom) + return + + def setup_atoms(self): + # Identify the nitrogens + the_nitrogens = self.atom.get_bonded_elements('N') + the_nitrogens = [n for n in the_nitrogens if len(n.get_bonded_heavy_atoms())==1] + + # set the center using the nitrogen + self.set_center([self.atom]) + + the_hydrogens = [] + for n in the_nitrogens: + my_protonator.protonate_atom(n) + the_hydrogens += n.get_bonded_elements('H') + + self.set_interaction_atoms(the_hydrogens+the_nitrogens, the_nitrogens) + return + +class OCO_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'OCO' + self.residue_type = 'OCO' + print('Found OCO group:',atom) + return + + def setup_atoms(self): + # Identify the two caroxyl oxygen atoms + the_oxygens = self.atom.get_bonded_elements('O') + + # set the center using the two oxygen carboxyl atoms + self.set_center(the_oxygens) + self.set_interaction_atoms(the_oxygens, the_oxygens) + return + + + +class N30_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'N30' + self.residue_type = 'N30' + print('Found N30 group:',atom) + return + + def setup_atoms(self): + # Identify the nitrogens + my_protonator.protonate_atom(self.atom) + the_hydrogens = self.atom.get_bonded_elements('H') + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) + return + +class N31_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'N31' + self.residue_type = 'N31' + print('Found N31 group:',atom) + return + + def setup_atoms(self): + # Identify the nitrogens + my_protonator.protonate_atom(self.atom) + the_hydrogens = self.atom.get_bonded_elements('H') + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) + return + +class N32_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'N32' + self.residue_type = 'N32' + print('Found N32 group:',atom) + return + + def setup_atoms(self): + # Identify the nitrogens + my_protonator.protonate_atom(self.atom) + the_hydrogens = self.atom.get_bonded_elements('H') + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) + return + +class N33_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'N33' + self.residue_type = 'N33' + print('Found N33 group:',atom) + return + + def setup_atoms(self): + # Identify the nitrogens + my_protonator.protonate_atom(self.atom) + the_hydrogens = self.atom.get_bonded_elements('H') + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) + return + +class NP1_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'NP1' + self.residue_type = 'NP1' + print('Found NP1 group:',atom) + return + + + def setup_atoms(self): + # Identify the nitrogens + my_protonator.protonate_atom(self.atom) + the_hydrogens = self.atom.get_bonded_elements('H') + # set the center using the nitrogen + self.set_center([self.atom]) + self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) + return + +class N1_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'N1' + self.residue_type = 'N1' + print('Found N1 group:',atom) + return + + + +class Ion_group(Group): + def __init__(self, atom): + Group.__init__(self,atom) + self.type = 'ION' + self.residue_type = atom.resName.strip() + print('Found ion group:',atom) + return + + +class non_titratable_ligand_group(Group): + def __init__(self, atom): + Group.__init__(self, atom) + self.type = 'LG' + self.residue_type = 'LG' +# print('Non-titratable ligand group',atom) + return + +class titratable_ligand_group(Group): + def __init__(self, atom): + Group.__init__(self, atom) + # set the charge and determine type (acid or base) + self.charge = atom.charge + if self.charge <0: + self.type = 'ALG' + self.residue_type = 'ALG' + elif self.charge > 0: + self.type = 'BLG' + self.residue_type = 'BLG' + else: + raise Exception('Unable to determine type of ligand group - charge not set?') + + + # check if marvin model pka has been calculated + # this is not true if we are reading an input file + if atom.marvin_pka: + self.model_pka = atom.marvin_pka + print('Titratable ligand group ',atom, self.model_pka, self.charge) + self.model_pka_set = True + + return + + +def is_group(parameters, atom): + atom.groups_extracted = 1 + + # check if this atom belongs to a protein group + protein_group = is_protein_group(parameters, atom) + if protein_group: return protein_group + + # check if this atom belongs to a ion group + ion_group = is_ion_group(parameters, atom) + if ion_group: return ion_group + + # check if this atom belongs to a ligand group + if parameters.ligand_typing == 'marvin': + ligand_group = is_ligand_group_by_marvin_pkas(parameters, atom) + elif parameters.ligand_typing == 'sybyl': + ligand_group = is_ligand_group_by_sybyl_types(parameters, atom) + elif parameters.ligand_typing == 'groups': + ligand_group = is_ligand_group_by_groups(parameters, atom) + else: + raise Exception('Unknown ligand typing method \'%s\''%parameters.ligand_typing) + + if ligand_group: return ligand_group + + + + return None + + +def is_protein_group(parameters,atom): + if atom.type != 'atom': + return None + + ### Check for termial groups + if atom.terminal == 'N+': + return Nterm_group(atom) + elif atom.terminal == 'C-': + return Cterm_group(atom) + + ### Backbone + if atom.type == 'atom' and atom.name == 'N': + # ignore proline backbone nitrogens + if atom.resName != 'PRO': + return BBN_group(atom) + if atom.type == 'atom' and atom.name == 'C': + # ignore C- carboxyl + if atom.count_bonded_elements('O') == 1: + return BBC_group(atom) + + ### Filters for side chains based on PDB protein atom names + key = '%s-%s'%(atom.resName, atom.name) + + if key in parameters.protein_group_mapping.keys(): + return eval('%s_group(atom)'%parameters.protein_group_mapping[key]) + + return None + +def is_ligand_group_by_sybyl_types(parameters, atom): + + + return None + + +def is_ligand_group_by_groups(parameters, atom): + ### Ligand group filters + if atom.type != 'hetatm': + return None + + my_protonator.protonate_atom(atom) + + if atom.sybyl_type == 'N.ar': + if len(atom.get_bonded_heavy_atoms())==2: + return NAR_group(atom) + + if atom.sybyl_type == 'N.am': + return NAM_group(atom) + + if atom.sybyl_type in ['N.3', 'N.4']: + heavy_bonded = atom.get_bonded_heavy_atoms() + if len(heavy_bonded) == 0: + return N30_group(atom) + elif len(heavy_bonded) == 1: + return N31_group(atom) + elif len(heavy_bonded) == 2: + return N32_group(atom) + elif len(heavy_bonded) == 3: + return N33_group(atom) + + if atom.sybyl_type == 'N.1': + return N1_group(atom) + + if atom.sybyl_type == 'N.pl3': + # make sure that this atom is not part of a guadinium or amidinium group + bonded_carbons = atom.get_bonded_elements('C') + if len(bonded_carbons) == 1: + bonded_nitrogens = bonded_carbons[0].get_bonded_elements('N') + if len(bonded_nitrogens) == 1: + return NP1_group(atom) + + + if atom.sybyl_type == 'C.2': + # Guadinium and amidinium groups + bonded_nitrogens = atom.get_bonded_elements('N') + npls = [n for n in bonded_nitrogens if (n.sybyl_type == 'N.pl3' and len(n.get_bonded_heavy_atoms())==1)] + if len(npls) == 2: + n_with_max_two_heavy_atom_bonds = [n for n in bonded_nitrogens if len(n.get_bonded_heavy_atoms())<3] + if len(n_with_max_two_heavy_atom_bonds) == 2: + return C2N_group(atom) + if len(bonded_nitrogens) == 3: + return CG_group(atom) + # carboxyl group + bonded_oxygens = atom.get_bonded_elements('O') + bonded_oxygens = [b for b in bonded_oxygens if 'O.co2' in b.sybyl_type] + if len(bonded_oxygens) == 2: + return OCO_group(atom) + + + if atom.sybyl_type == 'F': + return F_group(atom) + + if atom.sybyl_type == 'Cl': + return Cl_group(atom) + + if atom.sybyl_type == 'O.3': + if len(atom.get_bonded_heavy_atoms()) == 1: + # phosphate group + if atom.count_bonded_elements('P') == 1: + return OP_group(atom) + # hydroxyl group + else: + return OH_group(atom) + # other SP3 oxygen + else: + return O3_group(atom) + + if atom.sybyl_type == 'O.2': + return O2_group(atom) + + + if atom.sybyl_type == 'S.3': + # sulphide group + if len(atom.get_bonded_heavy_atoms()) == 1: + return SH_group(atom) + # other SP3 oxygen + #else: + # return S3_group(atom) + + + return None + + +def is_ligand_group_by_marvin_pkas(parameters, atom): + if atom.type != 'hetatm': + return None + + # calculate Marvin ligand pkas for this conformation container + # if not already done + if not atom.conformation_container.marvin_pkas_calculated: + lpka = Source.ligand_pka_values.ligand_pka_values(parameters) + lpka.get_marvin_pkas_for_molecular_container(atom.molecular_container, + min_pH=parameters.min_ligand_model_pka, + max_pH=parameters.max_ligand_model_pka) + + + if atom.marvin_pka: + return titratable_ligand_group(atom) + + # Special case of oxygen in carboxyl group not assigned a pka value by marvin + if atom.sybyl_type == 'O.co2': + atom.charge = -1.0 + other_oxygen = [o for o in atom.get_bonded_elements('C')[0].get_bonded_elements('O') if not o==atom][0] + atom.marvin_pka = other_oxygen.marvin_pka + return titratable_ligand_group(atom) + + + if atom.element in parameters.hydrogen_bonds.elements: + return non_titratable_ligand_group(atom) + + return None + + +def is_ion_group(parameters, atom): + + if atom.resName.strip() in parameters.ions.keys(): + return Ion_group(atom) + + return None diff --git a/Source/iterative.py b/Source/iterative.py new file mode 100644 index 0000000..c994867 --- /dev/null +++ b/Source/iterative.py @@ -0,0 +1,358 @@ +import math, time +import Source.lib as lib +from Source.determinant import Determinant +import Source.calculations + +# Some library functions for the interative pKa determinants + + +def addtoDeterminantList(group1, group2, distance, iterative_interactions, version): + """ + Adds 'iterative determinants' to list ..., [[R1, R2], [side-chain, coulomb], [A1, A2]], ... + Note, the sign is determined when the interaction is added to the iterative object! + Note, distance < coulomb_cutoff here + """ + + hbond_value = version.hydrogen_bond_interaction(group1, group2) + coulomb_value = version.electrostatic_interaction(group1, group2, distance) + + # adding the interaction to 'iterative_interactions' + if hbond_value or coulomb_value: + pair = [group1, group2] + + values = [hbond_value, coulomb_value] + while None in values: + values[values.index(None)] = 0.0 + + annihilation = [0., 0.] + interaction = [pair, values, annihilation] + iterative_interactions.append(interaction) + + return + + +def addIterativeAcidPair(object1, object2, interaction): + """ + Adding the Coulomb 'iterative' interaction (an acid pair): + the higher pKa is raised with QQ+HB + the lower pKa is lowered with HB + """ + values = interaction[1] + annihilation = interaction[2] + hbond_value = values[0] + coulomb_value = values[1] + diff = coulomb_value + 2*hbond_value + comp1 = object1.pKa_old + annihilation[0] + diff + comp2 = object2.pKa_old + annihilation[1] + diff + annihilation[0] = 0. + annihilation[1] = 0. + if comp1 > comp2: + # side-chain + determinant = [object2, hbond_value] + object1.determinants['sidechain'].append(determinant) + determinant = [object1, -hbond_value] + object2.determinants['sidechain'].append(determinant) + # Coulomb + determinant = [object2, coulomb_value] + object1.determinants['coulomb'].append(determinant) + annihilation[0] = -diff + else: + # side-chain + determinant = [object1, hbond_value] + object2.determinants['sidechain'].append(determinant) + determinant = [object2, -hbond_value] + object1.determinants['sidechain'].append(determinant) + # Coulomb + determinant = [object1, coulomb_value] + object2.determinants['coulomb'].append(determinant) + annihilation[1] = -diff + + +def addIterativeBasePair(object1, object2, interaction): + """ + Adding the Coulomb 'iterative' interaction (a base pair): + the lower pKa is lowered + """ + values = interaction[1] + annihilation = interaction[2] + hbond_value = values[0] + coulomb_value = values[1] + diff = coulomb_value + 2*hbond_value + diff = -diff + comp1 = object1.pKa_old + annihilation[0] + diff + comp2 = object2.pKa_old + annihilation[1] + diff + annihilation[0] = 0. + annihilation[1] = 0. + if comp1 < comp2: + # side-chain + determinant = [object2, -hbond_value] + object1.determinants['sidechain'].append(determinant) + determinant = [object1, hbond_value] + object2.determinants['sidechain'].append(determinant) + # Coulomb + determinant = [object2, -coulomb_value] + object1.determinants['coulomb'].append(determinant) + annihilation[0] = -diff + else: + # side-chain + determinant = [object1, -hbond_value] + object2.determinants['sidechain'].append(determinant) + determinant = [object2, hbond_value] + object1.determinants['sidechain'].append(determinant) + # Coulomb + determinant = [object1, -coulomb_value] + object2.determinants['coulomb'].append(determinant) + annihilation[1] = -diff + + +def addIterativeIonPair(object1, object2, interaction, version): + """ + Adding the Coulomb 'iterative' interaction (an acid-base pair): + the pKa of the acid is lowered & the pKa of the base is raised + """ + values = interaction[1] + annihilation = interaction[2] + hbond_value = values[0] + coulomb_value = values[1] + Q1 = object1.Q + Q2 = object2.Q + comp1 = object1.pKa_old + annihilation[0] + Q1*coulomb_value + comp2 = object2.pKa_old + annihilation[1] + Q2*coulomb_value + if object1.resName not in version.parameters.exclude_sidechain_interactions: + comp1 += Q1*hbond_value + if object2.resName not in version.parameters.exclude_sidechain_interactions: + comp2 += Q2*hbond_value + + if Q1 == -1.0 and comp1 < comp2: + add_term = True # pKa(acid) < pKa(base) + elif Q1 == 1.0 and comp1 > comp2: + add_term = True # pKa(base) > pKa(acid) + else: + add_term = False + + annihilation[0] = 0.00 + annihilation[1] = 0.00 + + if add_term == True: + + # Coulomb + if coulomb_value > 0.005: + # residue1 + interaction = [object2, Q1*coulomb_value] + annihilation[0] += -Q1*coulomb_value + object1.determinants['coulomb'].append(interaction) + # residue2 + interaction = [object1, Q2*coulomb_value] + annihilation[1] += -Q2*coulomb_value + object2.determinants['coulomb'].append(interaction) + + # Side-chain + if hbond_value > 0.005: + # residue1 + if object1.resName not in version.parameters.exclude_sidechain_interactions: + interaction = [object2, Q1*hbond_value] + annihilation[0] += -Q1*hbond_value + object1.determinants['sidechain'].append(interaction) + # residue2 + if object2.resName not in version.parameters.exclude_sidechain_interactions: + interaction = [object1, Q2*hbond_value] + annihilation[1] += -Q2*hbond_value + object2.determinants['sidechain'].append(interaction) + + +def addDeterminants(iterative_interactions, version, options=None): + """ + The iterative pKa scheme. Later it is all added in 'calculateTotalPKA' + """ + # --- setup --- + iteratives = [] + done_group = [] + + # creating iterative objects with references to their real group counterparts + for interaction in iterative_interactions: + pair = interaction[0] + for group in pair: + if group in done_group: + #print "done already" + """ do nothing - already have an iterative object for this group """ + else: + newIterative = Iterative(group) + iteratives.append(newIterative) + done_group.append(group) + + # Initialize iterative scheme + if options.verbose == True: + print("\n --- pKa iterations (%d groups, %d interactions) ---" % ( len(iteratives), len(iterative_interactions) )) + converged = False + iteration = 0 + # set non-iterative pka values as first step + for itres in iteratives: + itres.pKa_iter.append(itres.pKa_NonIterative) + + + # --- starting pKa iterations --- + while converged == False: + + # initialize pKa_new + iteration += 1 + for itres in iteratives: + itres.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} + itres.pKa_new = itres.pKa_NonIterative + + + # Adding interactions to temporary determinant container + for interaction in iterative_interactions: + pair = interaction[0] + values = interaction[1] + annihilation = interaction[2] + #print "len(interaction) = %d" % (len(interaction)) + object1, object2 = findIterative(pair, iteratives) + Q1 = object1.Q + Q2 = object2.Q + if Q1 < 0.0 and Q2 < 0.0: + """ both are acids """ + addIterativeAcidPair(object1, object2, interaction) + elif Q1 > 0.0 and Q2 > 0.0: + """ both are bases """ + addIterativeBasePair(object1, object2, interaction) + else: + """ one of each """ + addIterativeIonPair(object1, object2, interaction, version) + + + # Calculating pKa_new values + for itres in iteratives: + for type in ['sidechain','backbone','coulomb']: + for determinant in itres.determinants[type]: + itres.pKa_new += determinant[1] + + # Check convergence + converged = True + for itres in iteratives: + if itres.pKa_new == itres.pKa_old: + itres.converged = True + else: + itres.converged = False + converged = False + + # reset pKa_old & storing pKa_new in pKa_iter + for itres in iteratives: + itres.pKa_old = itres.pKa_new + itres.pKa_iter.append(itres.pKa_new) + + if iteration == 10: + print("did not converge in %d iterations" % (iteration)) + break + + # --- Iterations finished --- + + # printing pKa iterations + if options.verbose == True: + str = "%12s" % (" ") + for index in range(0, iteration+1 ): + str += "%8d" % (index) + print(str) + for itres in iteratives: + str = "%s " % (itres.label) + for pKa in itres.pKa_iter: + str += "%8.2lf" % (pKa) + if itres.converged == False: + str += " *" + print(str) + + # creating real determinants and adding them to group object + for itres in iteratives: + for type in ['sidechain','backbone','coulomb']: + for interaction in itres.determinants[type]: + #print('done',itres.group.label,interaction[0],interaction[1]) + value = interaction[1] + if value > 0.005 or value < -0.005: + g = interaction[0] + newDeterminant = Determinant(g, value) + itres.group.determinants[type].append(newDeterminant) + + + +def findIterative(pair, iteratives): + """ + Function to find the two 'iteratives' that corresponds to the groups in 'pair' + """ + for iterative in iteratives: + if iterative.group == pair[0]: + iterative0 = iterative + elif iterative.group == pair[1]: + iterative1 = iterative + + return iterative0, iterative1 + + + +class Iterative: + """ + Iterative class - pKa values and references of iterative groups + Note, this class has a fake determinant list, true determinants are + made after the iterations are finished. + """ + + def __init__(self, group): + """ + Contructer of the iterative object + """ + + #print "creating 'iterative object' for %s" % (group.label) + + self.label = group.label + self.atom = group.atom + self.resName = group.residue_type + self.Q = group.charge + self.pKa_old = None + self.pKa_new = None + self.pKa_iter = [] + self.pKa_NonIterative = 0.00 + self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} + self.group = group + self.converged = True + + # Calculate the Non-Iterative part of pKa from the group object + # Side chain + side_chain = 0.00 + for determinant in group.determinants['sidechain']: + value = determinant.value + side_chain += value + + # Back bone + back_bone = 0.00 + for determinant in group.determinants['backbone']: + value = determinant.value + back_bone += value + + # Coulomb + coulomb = 0.00 + for determinant in group.determinants['coulomb']: + value = determinant.value + coulomb += value + + self.pKa_NonIterative = group.model_pka + self.pKa_NonIterative += group.Emass + self.pKa_NonIterative += group.Elocl + self.pKa_NonIterative += side_chain + self.pKa_NonIterative += back_bone + self.pKa_NonIterative += coulomb + + self.pKa_old = self.pKa_NonIterative + + + def __eq__(self, other): + """ + Check if two groups should be considered identical + """ + if self.atom.type == 'atom': + # In case of protein atoms we trust the labels + return self.label==other.label + else: + # For heterogene atoms we also need to check the residue number + return self.label==other.label and self.atom.resNumb == other.atom.resNumb + + def __hash__(self): + """ Needed together with __eq__ - otherwise we can't make sets of groups """ + return id(self) diff --git a/Source/lib.py b/Source/lib.py new file mode 100755 index 0000000..43b471d --- /dev/null +++ b/Source/lib.py @@ -0,0 +1,198 @@ +#!/usr/bin/python + +import string, sys, copy, math, os + + +# +# file I/O +# +def open_file_for_reading(filename): + if not os.path.isfile(filename): + raise Exception('Cannot find file %s' %filename) + + return open(filename,'r') + +def open_file_for_writing(filename): + res = open(filename,'w') + if not res: + raise Exception('Could not open %s'%filename) + return res + +# +# bookkeeping etc. +# +def conformation_sorter(conf): + model = int(conf[:-1]) + altloc = conf[-1:] + return model*100+ord(altloc) + +def split_atoms_into_molecules(atoms): + molecules = [] + + while len(atoms)>0: + initial_atom = atoms.pop() + molecules.append( make_molecule(initial_atom,atoms)) + + return molecules + +def make_molecule(atom, atoms): + bonded_atoms = [a for a in atoms if atom in a.bonded_atoms] + res_atoms = [atom,] + + for ba in bonded_atoms: + if ba in atoms: + atoms.remove(ba) + res_atoms.extend(make_molecule(ba, atoms)) + + return res_atoms + + +def make_grid(min,max,step): + x = min + while x <= max: + yield x + x += step + return + +def generate_combinations(interactions): + res = [[]] + for interaction in interactions: + res = make_combination(res, interaction) + res.remove([]) + + return res + + +def make_combination(combis, interaction): + res = [] + for combi in combis: + res.append(combi+[interaction]) + res.append(combi) + return res + + + +def loadOptions(): + """ + load the arguments parser with options + """ + from optparse import OptionParser + + # defining a 'usage' message + usage = "usage: %prog [options] filename" + + # creating a parser + parser = OptionParser(usage) + + # loading the parser + parser.add_option("-f", "--file", action="append", dest="filenames", + help="read data from , i.e. is added to arguments") + parser.add_option("-r", "--reference", dest="reference", default="neutral", + help="setting which reference to use for stability calculations [neutral/low-pH]") + parser.add_option("-c", "--chain", action="append", dest="chains", + help="creating the protein with only a specified chain, note, chains without ID are labeled 'A' [all]") + parser.add_option("-t", "--thermophile", action="append", dest="thermophiles", + help="defining a thermophile filename; usually used in 'alignment-mutations'") + parser.add_option("-a", "--alignment", action="append", dest="alignment", + help="alignment file connecting and [.pir]") + parser.add_option("-m", "--mutation", action="append", dest="mutations", + help="specifying mutation labels which is used to modify according to, e.g. N25R/N181D") + parser.add_option("-v", "--version", dest="version_label", default="Jan15", + help="specifying the sub-version of propka [Jan15/Dec19]") + parser.add_option("-p", "--parameters",dest="parameters", default="propka.cfg", + help="set the parameter file") + parser.add_option("-z", "--verbose", dest="verbose", action="store_true", default=True, + help="sleep during calculations") + parser.add_option("-q", "--quiet", dest="verbose", action="store_false", + help="sleep during calculations") + parser.add_option("-s", "--silent", dest="verbose", action="store_false", + help="not activated yet") + parser.add_option("--verbosity", dest="verbosity", action="store_const", + help="level of printout - not activated yet") + parser.add_option("-o", "--pH", dest="pH", type="float", default=7.0, + help="setting pH-value used in e.g. stability calculations [7.0]") + parser.add_option("-w", "--window", dest="window", nargs=3, type="float", default=(0.0, 14.0, 1.0), + help="setting the pH-window to show e.g. stability profiles [0.0, 14.0, 1.0]") + parser.add_option("-g", "--grid", dest="grid", nargs=3, type="float", default=(0.0, 14.0, 0.1), + help="setting the pH-grid to calculate e.g. stability related properties [0.0, 14.0, 0.1]") + parser.add_option("--mutator", dest="mutator", + help="setting approach for mutating [alignment/scwrl/jackal]") + parser.add_option("--mutator-option", dest="mutator_options", action="append", + help="setting property for mutator [e.g. type=\"side-chain\"]") + + parser.add_option("-d","--display-coupled-residues", dest="display_coupled_residues", action="store_true", + help="Displays alternative pKa values due to coupling of titratable groups") + parser.add_option("-l","--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file", action="store_true", + help="Reuses the ligand mol2 files allowing the user to alter ligand bond orders", default=False) + parser.add_option("-k","--keep-protons", dest="keep_protons", action="store_true", + help="Keep protons in input file", default=False) + parser.add_option("--protonate-all", dest="protonate_all", action="store_true", + help="Protonate all atoms (will not influence pKa calculation)", default=False) + + + # parsing and returning options and arguments + options, args = parser.parse_args() + + # adding specified filenames to arguments + if options.filenames: + for filename in options.filenames: + args.append(filename) + + # checking at early stage that there is at least one pdbfile to work with + if len(args) == 0: + print("Warning: no pdbfile provided") + #sys.exit(9) + + + + # done! + return options, args + + + + + +def makeTidyAtomLabel(name,element): + """ + Returns a 'tidier' atom label for printing the new pdbfile + """ + + if len(name)>4:# if longer than 4, just truncate the name + label=name[0:4] + elif len(name)==4:# if lenght is 4, otherwise use the name as it is + label = name + else: # if less than 4 characters long, insert white space as needed + if len(element)==1: + label = ' %-3s'%name + else: # The element shoul occupy the two first chars + label = '%-4s'%name + + return label + + + +def get_sorted_configurations(configuration_keys): + """ + extract and sort configurations + """ + configurations = list(configuration_keys) + configurations.sort(key=configuration_compare) + return configurations + +def configuration_compare(conf): + return 100*int(conf[1:-2]) + ord(conf[-1]) + + + + +def writeFile(filename, lines): + """ + Writes a new file + """ + file = open(filename, 'w') + + for line in lines: + file.write( "%s\n" % (line) ) + file.close() + + diff --git a/Source/ligand.py b/Source/ligand.py new file mode 100755 index 0000000..d745d51 --- /dev/null +++ b/Source/ligand.py @@ -0,0 +1,403 @@ +#!/usr/bin/python + +import sys, Source.calculations +from Source.vector_algebra import * + + +all_sybyl_types = [ + 'C.3', # carbon sp3 + 'H', # hydrogen + 'C.2', # carbon sp2 + 'H.spc', # hydrogen in Single Point Charge (SPC) water model + 'C.1', # carbon sp + 'H.t3p', # hydrogen in Transferable intermolecular Potential (TIP3P) water model + 'C.ar', # carbon aromatic + 'LP', # lone pair + 'C.cat', # carbocation (C+) used only in a guadinium group + 'Du', # dummy atom + 'N.3', # nitrogen sp3 + 'Du.C', # dummy carbon + 'N.2', # nitrogen sp2 + 'Any', # any atom + 'N.1', # nitrogen sp + 'Hal', # halogen + 'N.ar', # nitrogen aromatic + 'Het', # heteroatom = N, O, S, P + 'N.am', # nitrogen amide + 'Hev', # heavy atom (non hydrogen) + 'N.pl3', # nitrogen trigonal planar + 'Li', # lithium + 'N.4', # nitrogen sp3 positively charged + 'Na', # sodium + 'O.3', # oxygen sp3 + 'Mg', # magnesium + 'O.2', # oxygen sp2 + 'Al', # aluminum + 'O.co2', # oxygen in carboxylate and phosphate groups + 'Si', # silicon + 'O.spc', # oxygen in Single Point Charge (SPC) water model + 'K', # potassium + 'O.t3p', # oxygen in Transferable Intermolecular Potential (TIP3P) water model + 'Ca', # calcium + 'S.3', # sulfur sp3 + 'Cr.th', # chromium (tetrahedral) + 'S.2', # sulfur sp2 + 'Cr.oh', # chromium (octahedral) + 'S.O', # sulfoxide sulfur + 'Mn', # manganese + 'S.O2', # sulfone sulfur + 'Fe', # iron + 'P.3', # phosphorous sp3 + 'Co.oh', # cobalt (octahedral) + 'F', # fluorine + 'Cu', # copper + 'Cl', # chlorine + 'Zn', # zinc + 'Br', # bromine + 'Se', # selenium + 'I', # iodine + 'Mo', # molybdenum + 'Sn'] # tin + + +#propka_input_types = ['1P','1N','2P','2N'] +#for type in all_sybyl_types: +# temp = type.replace('.','') +# if len(temp)>3: +# temp = temp[0:3] +# propka_input_types.append(temp) +# +#for t in propka_input_types: +# print (t) + + +propka_input_types = [ + '1P', + '1N', + '2P', + '2N', + 'C3', + 'H', + 'C2', + 'Hsp', + 'C1', + 'Ht3', + 'Car', + 'LP', + 'Cca', + 'Du', + 'N3', + 'DuC', + 'N2', + 'Any', + 'N1', + 'Hal', + 'Nar', + 'Het', + 'Nam', + 'Hev', + 'Npl', + 'Li', + 'N4', + 'Na', + 'O3', + 'Mg', + 'O2', + 'Al', + 'Oco', + 'Si', + 'Osp', + 'K', + 'Ot3', + 'Ca', + 'S3', + 'Crt', + 'S2', + 'Cro', + 'SO', + 'Mn', + 'SO2', + 'Fe', + 'P3', + 'Coo', + 'F', + 'Cu', + 'Cl', + 'Zn', + 'Br', + 'Se', + 'I', + 'Mo', + 'Sn'] + + +max_C_double_bond = 1.3 +max_C_triple_bond = 1.2 + +max_C_double_bond_squared = max_C_double_bond*max_C_double_bond +max_C_triple_bond_squared = max_C_triple_bond*max_C_triple_bond + + + + + + +def assign_sybyl_type(atom): + # check if we already have assigned a name to this atom + if atom.sybyl_assigned: + #print(atom.name,'already assigned') + return + + # find some properties of the atom + ring_atoms = is_ring_member(atom) + aromatic = is_aromatic_ring(ring_atoms) + planar = is_planar(atom) + bonded_elements = {} + for i in range(len(atom.bonded_atoms)): + bonded_elements[i]=atom.bonded_atoms[i].element + + + + # Aromatic carbon/nitrogen + if aromatic: + for ra in ring_atoms: + if ra.element in ['C','N']: + set_type(ra, ra.element+'.ar') + return + + # check for amide + if atom.element in ['O','N','C']: + O=None + N=None + C=None + # oxygen, nitrogen + if atom.element in ['O','N']: + for b in atom.get_bonded_elements('C'): + for bb in b.bonded_atoms: + if (bb.element =='N' and atom.element == 'O'): + O=atom + C=b + N=bb + elif (bb.element =='O' and atom.element == 'N'): + N=atom + C=b + O=bb + # carbon + if atom.element == 'C': + nitrogens = atom.get_bonded_elements('N') + oxygens = atom.get_bonded_elements('O') + if len(nitrogens)==1 and len(oxygens)==1: + C = atom + N = nitrogens[0] + O = oxygens[0] + + + if C and N and O: + # make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds + if not is_aromatic_ring(is_ring_member(N)) and len(N.get_bonded_heavy_atoms())==2: + set_type(N,'N.am') + set_type(C,'C.2') + set_type(O,'O.2') + return + + + if atom.element=='C': + # check for carboxyl + if len(atom.bonded_atoms)==3 and list(bonded_elements.values()).count('O')==2: + i1 = list(bonded_elements.values()).index('O') + i2 = list(bonded_elements.values()).index('O',i1+1) + if len(atom.bonded_atoms[i1].bonded_atoms)==1 and len(atom.bonded_atoms[i2].bonded_atoms)==1: + set_type(atom.bonded_atoms[i1],'O.co2-') + set_type(atom.bonded_atoms[i2],'O.co2') + set_type(atom,'C.2') + return + + + + # sp carbon + if len(atom.bonded_atoms)<=2: + for b in atom.bonded_atoms: + if Source.calculations.squared_distance(atom, b) < max_C_triple_bond_squared: + set_type(atom,'C.1') + set_type(b,b.element+'.1') + if atom.sybyl_assigned: + return + + # sp2 carbon + if planar: + set_type(atom,'C.2') + # check for N.pl3 + for b in atom.bonded_atoms: + if b.element=='N': + if len(b.bonded_atoms)<3 or is_planar(b): + set_type(b,'N.pl3') + return + + # sp3 carbon + set_type(atom, 'C.3') + return + + # Nitrogen + if atom.element == 'N': + # check for planar N + if len(atom.bonded_atoms)==1: + if is_planar(atom.bonded_atoms[0]): + set_type(atom,'N.pl3') + return + + if planar: + set_type(atom,'N.pl3') + return + + set_type(atom,'N.3') + return + + # Oxygen + if atom.element == 'O': + set_type(atom,'O.3') + + if len(atom.bonded_atoms) == 1: + # check for carboxyl + if atom.bonded_atoms[0].element == 'C': + the_carbon = atom.bonded_atoms[0] + if len(the_carbon.bonded_atoms)==3 and the_carbon.count_bonded_elements('O')==2: + [O1,O2] = the_carbon.get_bonded_elements('O') + + if len(O1.bonded_atoms)==1 and len(O2.bonded_atoms)==1: + set_type(O1,'O.co2-') + set_type(O2,'O.co2') + set_type(the_carbon,'C.2') + return + + # check for X=O + if Source.calculations.squared_distance(atom, atom.bonded_atoms[0]) < max_C_double_bond_squared: + set_type(atom,'O.2') + if atom.bonded_atoms[0].element=='C': + set_type(atom.bonded_atoms[0],'C.2') + return + + + # Sulphur + if atom.element == 'S': + # check for SO2 + if list(bonded_elements.values()).count('O')==2: + i1 = list(bonded_elements.values()).index('O') + i2 = list(bonded_elements.values()).index('O',i1+1) + set_type(atom.bonded_atoms[i1],'O.2') + set_type(atom.bonded_atoms[i2],'O.2') + set_type(atom,'S.o2') + return + + # check for SO4 + if list(bonded_elements.values()).count('O')==4: + no_O2 = 0 + for i in range(len(atom.bonded_atoms)): + if len(atom.bonded_atoms[i].bonded_atoms)==1 and no_O2<2: + set_type(atom.bonded_atoms[i],'O.2') + no_O2+=1 + else: + set_type(atom.bonded_atoms[i],'O.3') + + set_type(atom,'S.3') + + + return + + + # Phosphorus + if atom.element == 'P': + set_type(atom,'P.3') + + # check for phosphate group + bonded_oxygens = atom.get_bonded_elements('O') + for o in bonded_oxygens: set_type(o,'O.3') +# if len(bonded_oxygens)>=3: +# # find oxygens only bonded to current phosphorus +# bonded_oxygens_1 = [o for o in bonded_oxygens if len(o.get_bonded_heavy_atoms())==1] +# # find the closest oxygen ... +# closest_oxygen = min(bonded_oxygens_1, +# key= lambda o:Source.calculations.squared_distance(atom,o)) +# # ... and set it to O.2 +# set_type(closest_oxygen,'O.2') + + return + + + + element = atom.element.capitalize() + set_type(atom,element) + # print('Using element as type for %s'%atom.element) + + return + + +def is_ring_member(atom): + return identify_ring(atom,atom,0,[]) + +def identify_ring(this_atom, original_atom, number, past_atoms): + number+=1 + past_atoms=past_atoms+[this_atom] + return_atoms = [] + if number > 10: + return return_atoms + + for atom in this_atom.get_bonded_heavy_atoms(): + if atom == original_atom and number>2: + return past_atoms + + if atom not in past_atoms: + these_return_atoms = identify_ring(atom, original_atom, number, past_atoms) + if len(these_return_atoms) > 0: + if len(return_atoms)>len(these_return_atoms) or len(return_atoms)==0: + return_atoms = these_return_atoms + + return return_atoms + + + + +def set_type(atom,type): + #print(atom, '->',type) + atom.sybyl_type = type + atom.sybyl_assigned=True + return + + + + +def is_planar(atom): + """ Finds out if atom forms a plane together with its bonded atoms""" + atoms = [atom]+atom.bonded_atoms + return are_atoms_planar(atoms) + +def are_atoms_planar(atoms): + if len(atoms)==0: + return False + if len(atoms)<4: + return False + v1 = vector(atom1=atoms[0], atom2=atoms[1]) + v2 = vector(atom1=atoms[0], atom2=atoms[2]) + n = (v1**v2).rescale(1.0) + + margin = 0.20 + for b in atoms[3:]: + v = vector(atom1=atoms[0], atom2=b).rescale(1.0) + #print(atoms[0],abs(v*n) ) + if abs(v*n)>margin: + return False + + return True + +def is_aromatic_ring(atoms): + if len(atoms)<5: + return False + + for i in range(len(atoms)): + if not are_atoms_planar(atoms[i:]+atoms[:i]): + return False + + return True + + + + diff --git a/Source/ligand_pka_values.py b/Source/ligand_pka_values.py new file mode 100644 index 0000000..591a77c --- /dev/null +++ b/Source/ligand_pka_values.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +import Source.molecular_container, Source.calculations, Source.calculations, Source.parameters, Source.pdb, Source.lib, os, subprocess, sys + +class ligand_pka_values: + def __init__(self, parameters): + self.parameters = parameters + + # attempt to find Marvin executables in the path + self.molconvert = self.find_in_path('molconvert') + self.cxcalc = self.find_in_path('cxcalc') + print('Found Marvin executables:') + print(self.cxcalc) + print(self.molconvert) + + return + + + def find_in_path(self, program): + path = os.environ.get('PATH').split(os.pathsep) + + l = [i for i in filter(lambda loc: os.access(loc, os.F_OK), + map(lambda dir: os.path.join(dir, program),path))] + + if len(l) == 0: + print('Error: Could not find %s. Please make sure that it is found in the path.'%program) + sys.exit(-1) + + return l[0] + + def get_marvin_pkas_for_pdb_file(self, file, no_pkas=10, min_pH =-10, max_pH=20): + molecule = Source.molecular_container.Molecular_container(file) + self.get_marvin_pkas_for_molecular_container(molecule, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + return + + def get_marvin_pkas_for_molecular_container(self, molecule, no_pkas=10, min_pH =-10, max_pH=20): + for name in molecule.conformation_names: + filename = '%s_%s'%(molecule.name,name) + self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], name=filename, reuse=molecule.options.reuse_ligand_mol2_file, + no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + + return + + def get_marvin_pkas_for_conformation_container(self, conformation, name = 'temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + conformation.marvin_pkas_calculated = True + self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse, + no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + + return + + def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + # do one molecule at the time so we don't confuse marvin + molecules = Source.lib.split_atoms_into_molecules(atoms) + for i in range(len(molecules)): + filename = '%s_%d.mol2'%(name, i+1) + self.get_marvin_pkas_for_molecule(molecules[i], filename=filename, reuse=reuse, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + + return + + + def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + # print out structure unless we are using user-modified structure + if not reuse: + Source.pdb.write_mol2_for_atoms(atoms, filename) + # check that we actually have a file to work with + if not os.path.isfile(filename): + print('Warning: Didn\'t find a user-modified file \'%s\' - generating one'%filename) + Source.pdb.write_mol2_for_atoms(atoms, filename) + + + + # Marvin + # calculate pKa values + options = 'pka -a %d -b %d --min %f --max %f -d large'%(no_pkas, no_pkas, min_pH, max_pH) + (output,errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), + stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + + if len(errors)>0: + print('********************************************************************************************************') + print('* Warning: Marvin execution failed: *') + print('* %-100s *'%errors) + print('* *') + print('* Please edit the ligand mol2 file and re-run PropKa with the -l option: %29s *'%filename) + print('********************************************************************************************************') + sys.exit(-1) + + # extract calculated pkas + indices,pkas,types = self.extract_pkas(output) + + # store calculated pka values + for i in range(len(indices)): + atoms[indices[i]].marvin_pka = pkas[i] + atoms[indices[i]].charge = {'a':-1,'b':+1}[types[i]] + print('%s model pKa: %.2f'%(atoms[indices[i]],pkas[i])) + + return + + def extract_pkas(self, output): + # split output + [tags, values,empty_line] = output.decode().split('\n') + #print(tags) + #print(values) + tags = tags.split('\t') + values = values.split('\t') + + # format values + types = [tags[i][0] for i in range(1,len(tags)-1) if len(values)>i and values[i] != ''] + indices = [int(a)-1 for a in values[-1].split(',') if a !=''] + values = [float(v.replace(',','.')) for v in values[1:-1] if v != ''] + + if len(indices) != len(values) != len(types): + raise Exception('Lengths of atoms and pka values mismatch') + + return indices, values, types + + + + diff --git a/Source/molecular_container.py b/Source/molecular_container.py new file mode 100755 index 0000000..83812f7 --- /dev/null +++ b/Source/molecular_container.py @@ -0,0 +1,247 @@ +#!/usr/bin/python +# +# Molecular container for storing all contents of pdb files +# +# + +import os, Source.pdb, sys, Source.version, Source.output, Source.conformation_container, Source.group, Source.lib + +class Molecular_container: + def __init__(self, input_file, options=None): + # printing out header before parsing input + Source.output.printHeader() + + # set up some values + self.options = options + self.input_file = input_file + self.dir = os.path.split(input_file)[0] + self.file = os.path.split(input_file)[1] + self.name = self.file[0:self.file.rfind('.')] + input_file_extension = input_file[input_file.rfind('.'):] + + # set the version + if options: + parameters = Source.parameters.Parameters(self.options.parameters) + else: + parameters = Source.parameters.Parameters('propka.cfg') + try: + exec('self.version = Source.version.%s(parameters)'%parameters.version) + except: + raise Exception('Error: Version %s does not exist'%parameters.version) + + # read the input file + if input_file_extension[0:4] == '.pdb': + # input is a pdb file + # read in atoms and top up containers to make sure that all atoms are present in all conformations + [self.conformations, self.conformation_names] = Source.pdb.read_pdb(input_file, self.version.parameters,self) + if len(self.conformations)==0: + print('Error: The pdb file does not seems to contain any molecular conformations') + sys.exit(-1) + + self.top_up_conformations() + + # make a structure precheck + Source.pdb.protein_precheck(self.conformations, self.conformation_names) + + # set up atom bonding and protonation + self.version.setup_bonding_and_protonation(self) + + # Extract groups + self.extract_groups() + + # sort atoms + for name in self.conformation_names: + self.conformations[name].sort_atoms() + + # find coupled groups + self.find_covalently_coupled_groups() + + # write out the input file + filename = self.file.replace(input_file_extension,'.propka_input') + Source.pdb.write_input(self, filename) + + + elif input_file_extension == '.propka_input': + #input is a propka_input file + [self.conformations, self.conformation_names] = Source.pdb.read_input(input_file, self.version.parameters, self) + + # Extract groups - this merely sets up the groups found in the input file + self.extract_groups() + + # do some additional set up + self.additional_setup_when_reading_input_file() + + else: + print('Unrecognized input file:%s'%input_file) + sys.exit(-1) + + + return + def top_up_conformations(self): + """ Makes sure that all atoms are present in all conformations """ + for name in self.conformation_names: + if name!='1A' and (len(self.conformations[name]) < len(self.conformations['1A'])): + self.conformations[name].top_up(self.conformations['1A']) + + return + + def find_covalently_coupled_groups(self): + print('-'*103) + for name in self.conformation_names: + self.conformations[name].find_covalently_coupled_groups() + + return + + def find_non_covalently_coupled_groups(self): + print('-'*103) + for name in self.conformation_names: + self.conformations[name].find_non_covalently_coupled_groups(verbose=self.options.display_coupled_residues) + + return + + def extract_groups(self): + """ Identify the groups needed for pKa calculation """ + for name in self.conformation_names: + self.conformations[name].extract_groups() + + return + + def additional_setup_when_reading_input_file(self): + for name in self.conformation_names: + self.conformations[name].additional_setup_when_reading_input_file() + + return + + + def calculate_pka(self): + # calculate for each conformation + for name in self.conformation_names: + self.conformations[name].calculate_pka(self.version, self.options) + + # find non-covalently coupled groups + self.find_non_covalently_coupled_groups() + + # find the average of the conformations + self.average_of_conformations() + + # print out the conformation-average results + Source.output.printResult(self, 'AVR', self.version.parameters) + + return + + def average_of_conformations(self): + # make a new configuration to hold the average values + avr_conformation = Source.conformation_container.Conformation_container(name='average', + parameters=self.conformations[self.conformation_names[0]].parameters, + molecular_container=self) + + for group in self.conformations[self.conformation_names[0]].get_titratable_groups_and_cysteine_bridges(): + # new group to hold average values + avr_group = group.clone() + # sum up all groups ... + for name in self.conformation_names: + group_to_add = self.conformations[name].find_group(group) + if group_to_add: + avr_group += group_to_add + else: + print('Warning: Group %s could not be found in conformation %s.'%(group.atom.residue_label, name)) + # ... and store the average value + avr_group = avr_group / len(self.conformation_names) + avr_conformation.groups.append(avr_group) + + # store information on coupling in the average container + if len(list(filter(lambda c: c.non_covalently_coupled_groups, self.conformations.values()))): + avr_conformation.non_covalently_coupled_groups = True + + # store chain info + avr_conformation.chains = self.conformations[self.conformation_names[0]].chains + + self.conformations['AVR'] = avr_conformation + return + + def write_pka(self, filename=None, reference="neutral", direction="folding", options=None): + #for name in self.conformation_names: + # Source.output.writePKA(self, self.version.parameters, filename='%s_3.1_%s.pka'%(self.name, name), + # conformation=name,reference=reference, + # direction=direction, options=options) + + # write out the average conformation + filename=os.path.join('%s.pka'%(self.name)) + + # if the display_coupled_residues option is true, + # write the results out to an alternative pka file + if self.options.display_coupled_residues: + filename=os.path.join('%s_alt_state.pka'%(self.name)) + + if hasattr(self.version.parameters, 'output_file_tag') and len(self.version.parameters.output_file_tag)>0: + filename=os.path.join('%s_%s.pka'%(self.name,self.version.parameters.output_file_tag)) + + Source.output.writePKA(self, self.version.parameters, filename=filename, + conformation='AVR',reference=reference, + direction=direction, options=options) + + return + + def getFoldingProfile(self, conformation='AVR',reference="neutral", direction="folding", grid=[0., 14., 0.1], options=None): + # calculate stability profile + profile = [] + for ph in Source.lib.make_grid(*grid): + ddg = self.conformations[conformation].calculate_folding_energy( pH=ph, reference=reference) + #print(ph,ddg) + profile.append([ph, ddg]) + + # find optimum + opt =[None, 1e6] + for point in profile: + opt = min(opt, point, key=lambda v:v[1]) + + # find values within 80 % of optimum + range_80pct = [None, None] + values_within_80pct = [p[0] for p in profile if p[1]< 0.8*opt[1]] + if len(values_within_80pct)>0: + range_80pct = [min(values_within_80pct), max(values_within_80pct)] + + # find stability range + stability_range = [None, None] + stable_values = [p[0] for p in profile if p[1]< 0.0] + + if len(stable_values)>0: + stability_range = [min(stable_values), max(stable_values)] + + return profile, opt, range_80pct, stability_range + + + def getChargeProfile(self, conformation='AVR', grid=[0., 14., .1]): + charge_profile = [] + for ph in Source.lib.make_grid(*grid): + q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, pH=ph) + charge_profile.append([ph, q_unfolded, q_folded]) + + return charge_profile + + def getPI(self, conformation='AVR', grid=[0., 14., 1], iteration=0): + #print('staring',grid, iteration) + # search + charge_profile = self.getChargeProfile(conformation=conformation, grid=grid) + pi = [] + pi_folded = pi_unfolded = [None, 1e6,1e6] + for point in charge_profile: + pi_folded = min(pi_folded, point, key=lambda v:abs(v[2])) + pi_unfolded = min(pi_unfolded, point, key=lambda v:abs(v[1])) + + # If results are not good enough, do it again with a higher sampling resolution + pi_folded_value = pi_folded[0] + pi_unfolded_value = pi_unfolded[0] + step = grid[2] + if (pi_folded[2] > 0.01 or pi_unfolded[1] > 0.01) and iteration<4: + pi_folded_value, x = self.getPI(conformation=conformation, grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], iteration=iteration+1) + x, pi_unfolded_value = self.getPI(conformation=conformation, grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], iteration=iteration+1) + + + return pi_folded_value, pi_unfolded_value + + + +if __name__ == '__main__': + input_file = sys.argv[1] + mc = Molecular_container(input_file) diff --git a/Source/output.py b/Source/output.py new file mode 100644 index 0000000..16a97e1 --- /dev/null +++ b/Source/output.py @@ -0,0 +1,391 @@ +import sys +import Source.lib + + +def printHeader(): + """ + prints the header section + """ + str = "%s\n" % ( getPropkaHeader() ) + str += "%s\n" % ( getReferencesHeader() ) + str += "%s\n" % ( getWarningHeader() ) + + print(str) + + +def writePDB(protein, file=None, filename=None, include_hydrogens=False, options=None): + """ + Write the residue to the new pdbfile + """ + + if file == None: + # opening file if not given + if filename == None: + filename = "%s.pdb" % (protein.name) + file = open(filename, 'w') + print("writing pdbfile %s" % (filename)) + close_file = True + else: + # don't close the file, it was opened in a different place + close_file = False + + numb = 0 + for chain in protein.chains: + for residue in chain.residues: + if residue.resName not in ["N+ ", "C- "]: + for atom in residue.atoms: + if include_hydrogens == False and atom.name[0] == "H": + """ don't print """ + else: + numb += 1 + line = atom.makePDBLine(numb=numb) + line += "\n" + file.write(line) + + if close_file == True: + file.close() + + +def writePKA(protein, parameters, filename=None, conformation ='1A',reference="neutral", direction="folding", verbose=False, options=None): + """ + Write the pka-file based on the given protein + """ + verbose = True + if filename == None: + filename = "%s.pka" % (protein.name) + file = open(filename, 'w') + if verbose == True: + print("Writing %s" % (filename)) + + # writing propka header + str = "%s\n" % ( getPropkaHeader() ) + str += "%s\n" % ( getReferencesHeader() ) + str += "%s\n" % ( getWarningHeader() ) + + # writing pKa determinant section + str += getDeterminantSection(protein,conformation, parameters) + + # writing pKa summary section + str += getSummarySection(protein,conformation,parameters) + str += "%s\n" % ( getTheLine() ) + + # printing Folding Profile + str += getFoldingProfileSection(protein, conformation=conformation, reference=reference, direction=direction, window=[0., 14., 1.0], options=options) + + # printing Protein Charge Profile + str += getChargeProfileSection(protein, conformation=conformation) + + # now, writing the pka text to file + file.write(str) + + file.close() + + +def printTmProfile(protein, reference="neutral", window=[0., 14., 1.], Tm=[0.,0.], Tms=None, ref=None, verbose=False, options=None): + """ + prints Tm profile + """ + profile = protein.getTmProfile(reference=reference, grid=[0., 14., 0.1], Tms=Tms, ref=ref, options=options) + if profile == None: + str = "Could not determine Tm-profile\n" + else: + str = " suggested Tm-profile for %s\n" % (protein.name) + for (pH, Tm) in profile: + if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.01 or pH%window[2] > 0.99*window[2]): + str += "%6.2lf%10.2lf\n" % (pH, Tm) + print(str) + + +def printResult(protein, conformation, parameters): + """ + prints all resulting output from determinants and down + """ + printPKASection(protein, conformation, parameters) + + +def printPKASection(protein, conformation, parameters): + """ + prints out the pka-section of the result + """ + # geting the determinants section + str = getDeterminantSection(protein, conformation, parameters) + print(str) + + str = getSummarySection(protein,conformation,parameters) + print(str) + + +def getDeterminantSection(protein, conformation, parameters): + """ + prints out the pka-section of the result + """ + # getting the same order as in propka2.0 + str = "%s\n" % ( getDeterminantsHeader() ) + # printing determinants + for chain in protein.conformations[conformation].chains: + for residue_type in parameters.write_out_order: + groups = [g for g in protein.conformations[conformation].groups if g.atom.chainID == chain] + for group in groups: + if group.residue_type == residue_type: + str += "%s" % ( group.getDeterminantString(parameters.remove_penalised_group) ) + + # Add a warning in case of coupled residues + if protein.conformations[conformation].non_covalently_coupled_groups and not protein.options.display_coupled_residues: + str += 'Coupled residues (marked *) were detected. Please rerun PropKa with the --display-coupled-residues \nor -d option for detailed information.\n' + + return str + + +def getSummarySection(protein, conformation, parameters): + """ + prints out the pka-section of the result + """ + str = "%s\n" % ( getSummaryHeader() ) + # printing pKa summary + for residue_type in parameters.write_out_order: + for group in protein.conformations[conformation].groups: + if group.residue_type == residue_type: + str += "%s" % ( group.getSummaryString(parameters.remove_penalised_group) ) + + return str + + +def getFoldingProfileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): + """ + returns the protein-folding-profile section + """ + str = getTheLine() + str += "\n" + str += "Free energy of %9s (kcal/mol) as a function of pH (using %s reference)\n" % (direction, reference) + + profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.getFoldingProfile(conformation=conformation, + reference=reference, + direction=direction, grid=[0., 14., 0.1], options=options) + if profile == None: + str += "Could not determine folding profile\n" + else: + for (pH, dG) in profile: + if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.05 or pH%window[2] > 0.95): + str += "%6.2lf%10.2lf\n" % (pH, dG) + str += "\n" + + if pH_opt == None or dG_opt == None: + str += "Could not determine pH optimum\n" + else: + str += "The pH of optimum stability is %4.1lf for which the free energy is%6.1lf kcal/mol at 298K\n" % (pH_opt, dG_opt) + + if dG_min == None or dG_max == None: + str += "Could not determine pH values where the free energy is within 80 %s of minimum\n" % ("%") + else: + str += "The free energy is within 80 %s of maximum at pH %4.1lf to %4.1lf\n" % ("%", dG_min, dG_max) + + if pH_min == None or pH_max == None: + str += "Could not determine the pH-range where the free energy is negative\n\n" + else: + str += "The free energy is negative in the range %4.1lf - %4.1lf\n\n" % (pH_min, pH_max) + + + return str + + + +def getChargeProfileSection(protein, conformation='AVR', options=None): + """ + returns the protein-folding-profile section + """ + str = "Protein charge of folded and unfolded state as a function of pH\n" + + profile = protein.getChargeProfile(conformation=conformation,grid=[0., 14., 1.]) + if profile == None: + str += "Could not determine charge profile\n" + else: + str += "%6s%10s%8s\n" % ("pH", "unfolded", "folded") + for (pH, Q_mod, Q_pro) in profile: + str += "%6.2lf%10.2lf%8.2lf\n" % (pH, Q_mod, Q_pro) + + + pI_pro, pI_mod = protein.getPI(conformation=conformation) + if pI_pro == None or pI_mod == None: + str += "Could not determine the pI\n\n" + else: + str += "The pI is %5.2lf (folded) and %5.2lf (unfolded)\n" % (pI_pro, pI_mod) + + + return str + + +def writeJackalScapFile(mutationData=None, filename="1xxx_scap.list", options=None): + """ + writing a scap file for, i.e., generating a mutated protein + """ + file = open(filename, 'w') + + for chainID, code1, resNumb, code2 in mutationData: + str = "%s, %d, %s\n" % (chainID, resNumb, code2) + file.write(str) + file.close() + + +def writeScwrlSequenceFile(sequence, filename="x-ray.seq", options=None): + """ + writing a scwrl sequence file for, e.g., generating a mutated protein + """ + file = open(filename, 'w') + + start = 0 + while len(sequence[start:]) > 60: + file.write( "%s\n" % (sequence[start:start+60]) ) + start += 60 + file.write( "%s\n" % (sequence[start:]) ) + + file.close() + + + +# --- various header text --- # + + +def getPropkaHeader(): + """ + Creates the header + """ + from datetime import date + today = date.today() + + + + str = "propka3.1 %93s\n" % (today) + str += "-------------------------------------------------------------------------------------------------------\n" + str += "-- --\n" + str += "-- PROPKA: A PROTEIN PKA PREDICTOR --\n" + str += "-- --\n" + str += "-- VERSION 1.0, 04/25/2004, IOWA CITY --\n" + str += "-- BY HUI LI --\n" + str += "-- --\n" + str += "-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --\n" + str += "-- BY DELPHINE C. BAS AND DAVID M. ROGERS --\n" + str += "-- --\n" + str += "-- VERSION 3.0, 01/06/2011, COPENHAGEN --\n" + str += "-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --\n" + str += "-- --\n" + str += "-- VERSION 3.1, 07/01/2011, COPENHAGEN --\n" + str += "-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --\n" + str += "-------------------------------------------------------------------------------------------------------\n" + str += "\n" + + + + return str + + +def getReferencesHeader(): + """ + Returns the 'references' part in output file + """ + + str = "" + str += "-------------------------------------------------------------------------------------------------------\n" + str += " References:\n" + str += "\n" + str += " Very Fast Empirical Prediction and Rationalization of Protein pKa Values\n" + str += " Hui Li, Andrew D. Robertson and Jan H. Jensen\n" + str += " PROTEINS: Structure, Function, and Bioinformatics 61:704-721 (2005)\n" + str += " \n" + str += " Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand Complexes\n" + str += " Delphine C. Bas, David M. Rogers and Jan H. Jensen\n" + str += " PROTEINS: Structure, Function, and Bioinformatics 73:765-783 (2008)\n" + str += " \n" + str += " PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical pKa predictions\n" + str += " Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, and Jan H. Jensen\n" + str += " Journal of Chemical Theory and Computation, 7(2):525-537 (2011)\n" + str += " \n" + str += " Improved Treatment of Ligands and Coupling Effects in Empirical Calculation\n" + str += " and Rationalization of pKa Values\n" + str += " Chresten R. Sondergaard, Mats H.M. Olsson, Michal Rostkowski, and Jan H. Jensen\n" + str += " Journal of Chemical Theory and Computation, (2011)\n" + str += " \n" + str += "-------------------------------------------------------------------------------------------------------\n" + + return str + + +def getWarningHeader(): + """ + Returns the 'warning' part in output file + """ + + str = "" + + return str + + +def getDeterminantsHeader(): + """ + Creates the Determinant header + """ + str = "" + str += "--------- ----- ------ --------------------- -------------- -------------- --------------\n" + str += " DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC \n" + str += " RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION \n" + str += "--------- ----- ------ --------- --------- -------------- -------------- --------------\n" + + return str + + +def getSummaryHeader(): + """ + returns the summary header + """ + str = getTheLine() + str += "\n" + str += "SUMMARY OF THIS PREDICTION\n" + str += " Group pKa model-pKa ligand atom-type" + + return str + + +def getTheLine(): + """ + draw the line - Johnny Cash would have been proud - or actually Aerosmith! + """ + str = "" + for i in range(0, 104): + str += "-" + + return str + + +# Interaction maps +def make_interaction_map(name, list, interaction): + """ Print out an interaction map named 'name' of the groups in 'list' + based on the function 'interaction' """ + + # return an empty string, if the list is empty + if len(list)==0: + return '' + + # for long list, use condensed formatting + if len(list)>10: + res = 'Condensed form:\n' + for i in range(len(list)): + for j in range(i,len(list)): + if interaction(list[i],list[j]): + res += 'Coupling: %9s - %9s\n'%(list[i].label,list[j].label) + return res + + # Name and map header + res = '%s\n%12s'%(name,'') + for g in list: + res += '%9s | '%g.label + + # do the map + for g1 in list: + res += '\n%-12s'%(g1.label) + for g2 in list: + tag = '' + if interaction(g1, g2): + tag = ' X ' + res += '%10s| '%tag + + return res + diff --git a/Source/parameters.py b/Source/parameters.py new file mode 100644 index 0000000..80d50f1 --- /dev/null +++ b/Source/parameters.py @@ -0,0 +1,529 @@ +import math +import Source.lib as lib +import sys, os + + +# names and types of all key words in configuration file +matrices = ['interaction_matrix'] + +pair_wise_matrices = ['sidechain_cutoffs'] + +number_dictionaries = ['VanDerWaalsVolume','charge','model_pkas','ions', + 'valence_electrons','custom_model_pkas'] + +list_dictionaries = ['backbone_NH_hydrogen_bond','backbone_CO_hydrogen_bond'] + +string_dictionaries = ['protein_group_mapping'] + +string_lists = ['ignore_residues','angular_dependent_sidechain_interactions', + 'acid_list','base_list','exclude_sidechain_interactions', + 'backbone_reorganisation_list','write_out_order'] + +distances = ['desolv_cutoff','buried_cutoff','coulomb_cutoff1','coulomb_cutoff2'] + +parameters = ['Nmin','Nmax','desolvationSurfaceScalingFactor','desolvationPrefactor', + 'desolvationAllowance','coulomb_diel','COO_HIS_exception','OCO_HIS_exception', + 'CYS_HIS_exception','CYS_CYS_exception','min_ligand_model_pka','max_ligand_model_pka', + 'include_H_in_interactions','coupling_max_number_of_bonds', + 'min_bond_distance_for_hydrogen_bonds','coupling_penalty', + 'shared_determinants','common_charge_centre','hide_penalised_group', 'remove_penalised_group', + 'max_intrinsic_pKa_diff','min_interaction_energy','max_free_energy_diff','min_swap_pka_shift', + 'min_pka','max_pka','sidechain_interaction'] + +strings = ['version','output_file_tag','ligand_typing','pH','reference'] + + + + +class Parameters: + def __init__(self, parameter_file): + + self.set_up_data_structures() + self.read_parameters(parameter_file) + + #self.print_interaction_parameters() + #self.print_interaction_parameters_latex() + #####self.print_interactions_latex() + #sys.exit(0) + + + return + + + def read_parameters(self, file): + # try to locate the parameters file + try: + path = os.path.dirname(__file__) + ifile = os.path.join(path,'../'+file) + input = lib.open_file_for_reading(ifile) + except: + input = lib.open_file_for_reading(file) + + for line in input: + self.parse_line(line) + + return + + + def parse_line(self, line): + # first, remove comments + comment_pos = line.find('#') + if comment_pos != -1: + line = line[:comment_pos] + + # split the line into words + words = line.split() + if len(words) == 0: + return + + # parse the words + if len(words)==3 and words[0] in number_dictionaries: + self.parse_to_number_dictionary(words) + elif len(words)==2 and words[0] in string_lists: + self.parse_to_string_list(words) + elif len(words)==2 and words[0] in distances: + self.parse_distance(words) + elif len(words)==2 and words[0] in parameters: + self.parse_parameter(words) + elif len(words)==2 and words[0] in strings: + self.parse_string(words) + elif len(words)>2 and words[0] in list_dictionaries: + self.parse_to_list_dictionary(words) + elif words[0] in matrices+pair_wise_matrices: + self.parse_to_matrix(words) + elif len(words)==3 and words[0] in string_dictionaries: + self.parse_to_string_dictionary(words) + + + #print(words) + + return + + + def parse_to_number_dictionary(self, words): + exec('self.%s[\'%s\'] = %s'%tuple(words)) + return + + def parse_to_string_dictionary(self, words): + exec('self.%s[\'%s\'] = \'%s\''%tuple(words)) + return + + def parse_to_list_dictionary(self, words): + exec('if not \'%s\' in self.%s.keys(): self.%s[\'%s\'] = []'%(words[1],words[0],words[0],words[1])) + for word in words[2:]: + exec('self.%s[\'%s\'].append(%s)'%(words[0],words[1],word)) + + return + + def parse_to_string_list(self, words): + exec('self.%s.append(\'%s\')'%tuple(words)) + return + + def parse_to_matrix(self, words): + exec('self.%s.add(%s)'%(words[0],tuple(words[1:]))) + return + + def parse_distance(self, words): + # float check needed + exec('self.%s = %s'%tuple(words)) + exec('self.%s_squared = pow(%s,2)'%tuple(words)) + return + + def parse_parameter(self, words): + exec('self.%s = %s'%tuple(words)) + return + + def parse_string(self, words): + #print('self.%s = \'%s\''%tuple(words)) + exec('self.%s = \'%s\''%tuple(words)) + return + + + def set_up_data_structures(self): + for key_word in number_dictionaries+list_dictionaries+string_dictionaries: + exec('self.%s = {}'%key_word) + for key_word in string_lists: + exec('self.%s = []'%key_word) + for key_word in strings: + exec('self.%s = \'\''%key_word) + for key_word in matrices: + exec('self.%s = Interaction_matrix(\'%s\')'%(key_word,key_word)) + for key_word in pair_wise_matrices: + exec('self.%s =Pair_wise_matrix(\'%s\')'%(key_word,key_word)) + + return + + def print_interaction_parameters(self): + print('--------------- Model pKa values ----------------------') + for k in self.model_pkas.keys(): + print('%3s %8.2f'%(k,self.model_pkas[k])) + + print('') + print('--------------- Interactions --------------------------') + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + + map = { + 'CG' :['ARG'], + 'C2N':['ARG'], + 'N30':['N+','LYS'], + 'N31':['N+','LYS'], + 'N32':['N+','LYS'], + 'N33':['N+','LYS'] , + 'NAR':['HIS'], + 'OCO':['COO'], + 'OP' :[],#['TYR','SER'], + 'SH' :['CYS'] , + 'NP1':[], + 'OH' :['ROH'], + 'O3' :[] , + 'CL' :[], + 'F' :[], + 'NAM':['AMD'], + 'N1' :[], + 'O2' :[]} + + + for g1 in agroups: + for g2 in lgroups: + + interaction = '%3s %3s %1s %4s %4s'%(g1,g2, + self.interaction_matrix[g1][g2], + self.sidechain_cutoffs.get_value(g1,g2)[0], + self.sidechain_cutoffs.get_value(g1,g2)[1]) + + map_interaction = '' + if g2 in map: + for m in map[g2]: + map_interaction += '|%3s %3s %1s %4s %4s'%(g1,m, + self.interaction_matrix[g1][m], + self.sidechain_cutoffs.get_value(g1,m)[0], + self.sidechain_cutoffs.get_value(g1,m)[1]) + if self.interaction_matrix[g1][m] != self.interaction_matrix[g1][g2]: + map_interaction += '* ' + if self.sidechain_cutoffs.get_value(g1,m)[0] != self.sidechain_cutoffs.get_value(g1,g2)[0] or \ + self.sidechain_cutoffs.get_value(g1,m)[1] != self.sidechain_cutoffs.get_value(g1,g2)[1]: + map_interaction += '! ' + else: + map_interaction += ' ' + if len(map[g2])==0 and (self.sidechain_cutoffs.get_value(g1,g2)[0] !=3 or self.sidechain_cutoffs.get_value(g1,g2)[1] != 4): + map_interaction += '? ' + + print(interaction,map_interaction ) + + if g1==g2: + break + print('-') + + print('--------------- Exceptions ----------------------------') + print('COO-HIS',self.COO_HIS_exception) + print('OCO-HIS',self.OCO_HIS_exception) + print('CYS-HIS',self.CYS_HIS_exception) + print('CYS-CYS',self.CYS_CYS_exception) + + + print('--------------- Mapping -------------------------------') + print(""" +Titratable: +CG ARG +C2N ARG +N30 N+/LYS +N31 N+/LYS +N32 N+/LYS +N33 N+/LYS +NAR HIS +OCO COO +OP TYR/SER? +SH CYS + +Non-titratable: +NP1 AMD? +OH ROH +O3 ? +CL +F +NAM +N1 +O2 +""") + return + + + + + + + def print_interaction_parameters_latex(self): +# print('--------------- Model pKa values ----------------------') +# for k in self.model_pkas.keys(): +# print('%3s %8.2f'%(k,self.model_pkas[k])) + +# print('') +# print('--------------- Interactions --------------------------') + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + + map = { + 'CG' :['ARG'], + 'C2N':['ARG'], + 'N30':['N+','LYS'], + 'N31':['N+','LYS'], + 'N32':['N+','LYS'], + 'N33':['N+','LYS'] , + 'NAR':['HIS'], + 'OCO':['COO'], + 'OP' :[],#['TYR','SER'], + 'SH' :['CYS'] , + 'NP1':['AMD'], + 'OH' :['ROH'], + 'O3' :[] , + 'CL' :[], + 'F' :[], + 'NAM':[], + 'N1' :[], + 'O2' :[]} + + + s = """ +\\begin{longtable}{lllll} +\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} +\\label{tab:ligand_interaction_parameters}\\\\ + +\\toprule +Group1 & Group2 & Interaction & c1 &c2 \\\\ +\\midrule +\\endfirsthead + +\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\ +\\toprule +Group1 & Group2 & Interaction & c1 &c2 \\\\ +\\midrule +\\endhead + +\\midrule +\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\ +\\endfoot + +\\bottomrule +\\endlastfoot + +"""%(self.sidechain_cutoffs.default) + for g1 in agroups: + for g2 in lgroups: + if self.interaction_matrix[g1][g2]=='-': + continue + if self.sidechain_cutoffs.get_value(g1,g2)==self.sidechain_cutoffs.default: + continue + + + s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, + self.interaction_matrix[g1][g2], + self.sidechain_cutoffs.get_value(g1,g2)[0], + self.sidechain_cutoffs.get_value(g1,g2)[1]) + + if g1==g2: + break + + s += ' \\end{longtable}\n' + print(s) + return + + def print_interactions_latex(self): + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + + + s = """ +\\begin{longtable}{%s} +\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} +\\label{tab:ligand_interaction_parameters}\\\\ + +\\toprule +Group1 & Group2 & Interaction & c1 &c2 \\\\ +\\midrule +\\endfirsthead + +\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\ +\\toprule +Group1 & Group2 & Interaction & c1 &c2 \\\\ +\\midrule +\\endhead + +\\midrule +\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\ +\\endfoot + +\\bottomrule +\\endlastfoot + +"""%('l'*len(agroups),self.sidechain_cutoffs.default) + for g1 in agroups: + for g2 in agroups: + + s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, + self.interaction_matrix[g1][g2], + self.sidechain_cutoffs.get_value(g1,g2)[0], + self.sidechain_cutoffs.get_value(g1,g2)[1]) + + if g1==g2: + break + + s += ' \\end{longtable}\n' + print(s) + return + + + + +class Interaction_matrix: + def __init__(self, name): + self.name = name + self.ordered_keys = [] + self.dictionary = {} + return + + def add(self,words): + new_group = words[0] + self.ordered_keys.append(new_group) + + if not new_group in self.dictionary.keys(): + self.dictionary[new_group] = {} + + for i in range(len(self.ordered_keys)): + group = self.ordered_keys[i] + if len(words)>i+1: + try: + exec('self.value = %s'%words[i+1]) + except: + self.value = words[i+1] + self.dictionary[group][new_group] = self.value + self.dictionary[new_group][group] = self.value + + + return + + def get_value(self, item1, item2): + try: + return self.dictionary[item1][item2] + except: + return None + + def __getitem__(self, group): + if group not in self.dictionary.keys(): + raise Exception('%s not found in interaction matrix %s'%(group,self.name)) + return self.dictionary[group] + + + def keys(self): + return self.dictionary.keys() + + def __str__(self): + s = ' ' + for k1 in self.ordered_keys: + s+='%3s '%k1 + s+='\n' + for k1 in self.ordered_keys: + s+='%3s '%k1 + for k2 in self.ordered_keys: + s+='%3s '%self[k1][k2] + s+='\n' + + return s +# ks = ['COO', 'SER', 'ARG', 'LYS', 'HIS', 'AMD', 'CYS', 'TRP','ROH','TYR','N+','CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + +# p = '' +# n=0 +# for i in range(len(ks)): +# for j in range(i,len(ks)): +# if not [0.0,0.0]==self[ks[i]][ks[j]]: +# if not [3.0,4.0]==self[ks[i]][ks[j]]: +# p+='sidechain_cutoff %3s %3s %s\n'%(ks[i],ks[j],self[ks[i]][ks[j]]) +# n+=1 + +# print('total',n,len(ks)) +# return p + + + +class Pair_wise_matrix: + def __init__(self, name): + self.name = name + self.dictionary = {} + self.default = [0.0, 0.0] + return + + def add(self,words): + # assign the default value + if len(words)==3 and words[0]=='default': + self.default = [float(words[1]), float(words[2])] + return + + # assign non-default values + g1 = words[0] + g2 = words[1] + v = [float(words[2]), float(words[3])] + + self.insert(g1,g2,v) + self.insert(g2,g1,v) + + return + + def insert(self, k1,k2,v): + + if k1 in self.dictionary.keys() and k2 in self.dictionary[k1].keys(): + if k1!=k2: + print ('Warning: Paramter value for %s, %s defined more than once'%(k1,k2)) + + if not k1 in self.dictionary: + self.dictionary[k1] = {} + + self.dictionary[k1][k2] =v + + return + + def get_value(self, item1, item2): + + try: + return self.dictionary[item1][item2] + except: + return self.default + + def __getitem__(self, group): + if group not in self.dictionary.keys(): + raise Exception('%s not found in interaction matrix %s'%(group,self.name)) + return self.dictionary[group] + + + def keys(self): + return self.dictionary.keys() + + def __str__(self): + s='' + for k1 in self.keys(): + for k2 in self[k1].keys(): + s += '%s %s %s\n'%(k1,k2,self[k1][k2]) + + return s + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Source/pdb.py b/Source/pdb.py new file mode 100644 index 0000000..d0a3bfa --- /dev/null +++ b/Source/pdb.py @@ -0,0 +1,323 @@ +import string, sys, copy, Source.lib +from Source.atom import Atom +from Source.conformation_container import Conformation_container + +expected_atom_numbers = {'ALA':5, + 'ARG':11, + 'ASN':8, + 'ASP':8, + 'CYS':6, + 'GLY':4, + 'GLN':9, + 'GLU':9, + 'HIS':10, + 'ILE':8, + 'LEU':8, + 'LYS':9, + 'MET':8, + 'PHE':11, + 'PRO':7, + 'SER':6, + 'THR':7, + 'TRP':14, + 'TYR':12, + 'VAL':7} + + +def read_pdb(pdb_file, parameters, molecule): + conformations = {} + + # read in all atoms in the file + lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains) + for (name, atom) in lines: + if not name in conformations.keys(): + conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) + conformations[name].add_atom(atom) + + # make a sorted list of conformation names + names = sorted(conformations.keys(), key=Source.lib.conformation_sorter) + + return [conformations, names] + +def protein_precheck(conformations, names): + + for name in names: + atoms = conformations[name].atoms + + res_ids = [] + [res_ids.append(resid_from_atom(a)) for a in atoms if not res_ids.count(resid_from_atom(a))] + + for res_id in res_ids: + res_atoms = [a for a in atoms if resid_from_atom(a) == res_id and a.element != 'H'] + resname = res_atoms[0].resName + residue_label = '%3s%5s'%(resname, res_id) + + # ignore ligand residues + if resname not in expected_atom_numbers: + continue + + # check for c-terminal + if 'C-' in [a.terminal for a in res_atoms]: + if len(res_atoms) != expected_atom_numbers[resname]+1: + print('Warning: Unexpected number (%d) of atoms in residue %s in conformation %s'%(len(res_atoms),residue_label, name)) + continue + + # check number of atoms in residue + if len(res_atoms) != expected_atom_numbers[resname]: + print('Warning: Unexpected number (%d) of atoms in residue %s in conformation %s'%(len(res_atoms),residue_label, name)) + + return + +def resid_from_atom(a): + return '%4d %s %s'%(a.resNumb,a.chainID,a.icode) + + +def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM ', 'HETATM'], chains=None): + + lines = Source.lib.open_file_for_reading(pdb_file).readlines() + nterm_residue = 'next_residue' + old_residue = None + terminal = None + model = 1 + + + for line in lines: + tag = line[0:6] + + # set the model number + if tag == 'MODEL ': + model = int(line[6:]) + nterm_residue = 'next_residue' + + if tag == 'TER ': + nterm_residue = 'next_residue' + + if tag in tags: + alt_conf_tag = line[16] + residue_name = line[12:16] + residue_number = line[22:26] + + # check if we want this residue + if line[17:20] in ignore_residues: + continue + if chains and line[21] not in chains: + continue + + # set the Nterm residue number - nessecary because we may need to + # identify more than one N+ group for structures with alt_conf tags + if nterm_residue == 'next_residue' and tag == 'ATOM ': + # make sure that we reached a new residue - nessecary if OXT is not the last atom in + # the previous residue + if old_residue != residue_number: + nterm_residue = residue_number + old_residue = None + + + # Identify the configuration + # convert digits to letters + if alt_conf_tag in '123456789': + alt_conf_tag = chr(ord(alt_conf_tag)+16) + if alt_conf_tag == ' ': + alt_conf_tag = 'A' + conformation = '%d%s'%(model, alt_conf_tag) + + # set the terminal + if tag == 'ATOM ': + if residue_name.strip() == 'N' and nterm_residue == residue_number: + terminal = 'N+' + if residue_name.strip() in ['OXT','O\'\'']: + terminal = 'C-' + nterm_residue = 'next_residue' + old_residue = residue_number + # and yield the atom + atom = Atom(line=line) + atom.terminal = terminal + #if keep_protons: + # atom.is_protonated = True + if not (atom.element == 'H' and not keep_protons): #ignore hydrogen + yield (conformation, atom) + + terminal = None + + return + + +def write_pdb(conformation, filename): + write_pdb_for_atoms(conformation.atoms, filename) + return + +def write_pdb_for_atoms(atoms, filename, make_conect_section=False): + out = Source.lib.open_file_for_writing(filename) + + for atom in atoms: + out.write(atom.make_pdb_line()) + + if make_conect_section: + for atom in atoms: + out.write(atom.make_conect_line()) + + + out.close() + + return + + + +def write_mol2_for_atoms(atoms, filename): + + header = '@MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n' + + atoms_section = '@ATOM\n' + for i in range(len(atoms)): + atoms_section += atoms[i].make_mol2_line(i+1) + + + bonds_section = '@BOND\n' + id = 1 + for i in range(len(atoms)): + for j in range(i+1,len(atoms)): + if atoms[i] in atoms[j].bonded_atoms: + type = get_bond_order(atoms[i],atoms[j]) + bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type) + id+=1 + + substructure_section = '@SUBSTRUCTURE\n\n' + if len(atoms)>0: + substructure_section = '@SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].resNumb,atoms[0].resName,atoms[0].numb) + + out = Source.lib.open_file_for_writing(filename) + out.write(header%(len(atoms),id-1)) + out.write(atoms_section) + out.write(bonds_section) + out.write(substructure_section) + out.close() + + return + +def get_bond_order(atom1, atom2): + type = '1' + pi_electrons1 = atom1.number_of_pi_electrons_in_double_and_triple_bonds + pi_electrons2 = atom2.number_of_pi_electrons_in_double_and_triple_bonds + + if '.ar' in atom1.sybyl_type: + pi_electrons1 -=1 + if '.ar' in atom2.sybyl_type: + pi_electrons2 -=1 + + if pi_electrons1 > 0 and pi_electrons2 > 0: + type = '%d'%(min(pi_electrons1, pi_electrons2)+1) + + if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: + type = 'ar' + + + return type + + + +def write_input(molecular_container, filename): + out = Source.lib.open_file_for_writing(filename) + + for conformation_name in molecular_container.conformation_names: + out.write('MODEL %s\n'%conformation_name) + # write atoms + for atom in molecular_container.conformations[conformation_name].atoms: + out.write(atom.make_input_line()) + # write bonds + for atom in molecular_container.conformations[conformation_name].atoms: + out.write(atom.make_conect_line()) + # write covalently coupled groups + for group in molecular_container.conformations[conformation_name].groups: + out.write(group.make_covalently_coupled_line()) + # write non-covalently coupled groups + for group in molecular_container.conformations[conformation_name].groups: + out.write(group.make_non_covalently_coupled_line()) + out.write('ENDMDL\n') + + out.close() + + return + + +def read_input(input_file, parameters,molecule): + conformations = {} + + # read in all atoms in the input file + lines = get_atom_lines_from_input(input_file) + for (name, atom) in lines: + if not name in conformations.keys(): + conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) + conformations[name].add_atom(atom) + + # make a sorted list of conformation names + names = sorted(conformations.keys(), key=Source.lib.conformation_sorter) + + return [conformations, names] + + + +def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): + lines = Source.lib.open_file_for_reading(input_file).readlines() + conformation = '' + + atoms = {} + numbers = [] + + for line in lines: + tag = line[0:6] + + # set the conformation + if tag == 'MODEL ': + conformation = line[6:].strip() + + # found an atom - save it + if tag in tags: + atom = Atom(line=line) + atom.get_input_parameters() + atom.groups_extracted = 1 + atom.is_protonated = True + atoms[atom.numb] = atom + numbers.append(atom.numb) + + # found bonding information - apply it + if tag == 'CONECT' and len(line)>14: + conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] + center_atom = atoms[int(conect_numbers[0])] + for n in conect_numbers[1:]: + b = atoms[int(n)] + # remember to check for cysteine bridges + if center_atom.element == 'S' and b.element == 'S': + center_atom.cysteine_bridge = True + b.cysteine_bridge = True + # set up bonding + if not b in center_atom.bonded_atoms: + center_atom.bonded_atoms.append(b) + if not center_atom in b.bonded_atoms: + b.bonded_atoms.append(center_atom) + + # found info on covalent coupling + if tag == 'CCOUPL' and len(line)>14: + conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] + center_atom = atoms[int(conect_numbers[0])] + for n in conect_numbers[1:]: + cg = atoms[int(n)] + center_atom.group.couple_covalently(cg.group) + + # found info on non-covalent coupling + if tag == 'NCOUPL' and len(line)>14: + conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] + center_atom = atoms[int(conect_numbers[0])] + for n in conect_numbers[1:]: + cg = atoms[int(n)] + center_atom.group.couple_non_covalently(cg.group) + + # this conformation is done - yield the atoms + if tag == 'ENDMDL': + for n in numbers: + yield (conformation, atoms[n]) + # prepare for next conformation + atoms = {} + numbers = [] + + + return diff --git a/Source/protein_bonds.dat b/Source/protein_bonds.dat new file mode 100644 index 0000000..fc6f5d8 --- /dev/null +++ b/Source/protein_bonds.dat @@ -0,0 +1,568 @@ +(dp0 +S'CYS' +p1 +(dp2 +S'CB' +p3 +(lp4 +S'CA' +p5 +aS'SG' +p6 +asg5 +(lp7 +g3 +asg6 +(lp8 +g3 +aS'SG' +p9 +assS'GLN' +p10 +(dp11 +S'CB' +p12 +(lp13 +S'CA' +p14 +aS'CG' +p15 +asg14 +(lp16 +g12 +asg15 +(lp17 +g12 +aS'CD' +p18 +asg18 +(lp19 +g15 +aS'OE1' +p20 +aS'NE2' +p21 +asg21 +(lp22 +g18 +asg20 +(lp23 +g18 +assS'HIS' +p24 +(dp25 +S'CD2' +p26 +(lp27 +S'CG' +p28 +aS'NE2' +p29 +asS'CB' +p30 +(lp31 +S'CA' +p32 +ag28 +asg32 +(lp33 +g30 +asg28 +(lp34 +g30 +aS'ND1' +p35 +ag26 +asS'CE1' +p36 +(lp37 +g35 +ag29 +asg35 +(lp38 +g28 +ag36 +asg29 +(lp39 +g26 +ag36 +assS'ASN' +p40 +(dp41 +S'CB' +p42 +(lp43 +S'CA' +p44 +aS'CG' +p45 +asg44 +(lp46 +g42 +asS'ND2' +p47 +(lp48 +g45 +asg45 +(lp49 +g42 +aS'OD1' +p50 +ag47 +asg50 +(lp51 +g45 +assS'VAL' +p52 +(dp53 +S'CG1' +p54 +(lp55 +S'CB' +p56 +asg56 +(lp57 +S'CA' +p58 +ag54 +aS'CG2' +p59 +asg58 +(lp60 +g56 +asg59 +(lp61 +g56 +assS'LYS' +p62 +(dp63 +S'CB' +p64 +(lp65 +S'CA' +p66 +aS'CG' +p67 +asg66 +(lp68 +g64 +asg67 +(lp69 +g64 +aS'CD' +p70 +asS'CE' +p71 +(lp72 +g70 +aS'NZ' +p73 +asg70 +(lp74 +g67 +ag71 +asg73 +(lp75 +g71 +assS'ILE' +p76 +(dp77 +S'CG1' +p78 +(lp79 +S'CB' +p80 +aS'CD1' +p81 +asg80 +(lp82 +S'CA' +p83 +ag78 +aS'CG2' +p84 +asg83 +(lp85 +g80 +asg84 +(lp86 +g80 +asg81 +(lp87 +g78 +assS'PRO' +p88 +(dp89 +S'CB' +p90 +(lp91 +S'CA' +p92 +aS'CG' +p93 +asg92 +(lp94 +g90 +asS'CD' +p95 +(lp96 +S'N' +p97 +ag93 +asg93 +(lp98 +g90 +ag95 +asg97 +(lp99 +g95 +assS'THR' +p100 +(dp101 +S'CB' +p102 +(lp103 +S'CA' +p104 +aS'OG1' +p105 +aS'CG2' +p106 +asg104 +(lp107 +g102 +asg106 +(lp108 +g102 +asg105 +(lp109 +g102 +assS'PHE' +p110 +(dp111 +S'CD2' +p112 +(lp113 +S'CG' +p114 +aS'CE2' +p115 +asS'CB' +p116 +(lp117 +S'CA' +p118 +ag114 +asg118 +(lp119 +g116 +asg114 +(lp120 +g116 +aS'CD1' +p121 +ag112 +asS'CZ' +p122 +(lp123 +S'CE1' +p124 +ag115 +asg121 +(lp125 +g114 +ag124 +asg124 +(lp126 +g121 +ag122 +asg115 +(lp127 +g112 +ag122 +assS'ALA' +p128 +(dp129 +S'CB' +p130 +(lp131 +S'CA' +p132 +asg132 +(lp133 +g130 +assS'MET' +p134 +(dp135 +S'CB' +p136 +(lp137 +S'CA' +p138 +aS'CG' +p139 +asg138 +(lp140 +g136 +asg139 +(lp141 +g136 +aS'SD' +p142 +asS'CE' +p143 +(lp144 +g142 +asg142 +(lp145 +g139 +ag143 +assS'ASP' +p146 +(dp147 +S'CB' +p148 +(lp149 +S'CA' +p150 +aS'CG' +p151 +asg150 +(lp152 +g148 +asg151 +(lp153 +g148 +aS'OD1' +p154 +aS'OD2' +p155 +asg155 +(lp156 +g151 +asg154 +(lp157 +g151 +assS'LEU' +p158 +(dp159 +S'CB' +p160 +(lp161 +S'CA' +p162 +aS'CG' +p163 +asg162 +(lp164 +g160 +asg163 +(lp165 +g160 +aS'CD1' +p166 +aS'CD2' +p167 +asg166 +(lp168 +g163 +asg167 +(lp169 +g163 +assS'ARG' +p170 +(dp171 +S'CB' +p172 +(lp173 +S'CA' +p174 +aS'CG' +p175 +asg174 +(lp176 +g172 +asg175 +(lp177 +g172 +aS'CD' +p178 +asS'NE' +p179 +(lp180 +g178 +aS'CZ' +p181 +asg178 +(lp182 +g175 +ag179 +asg181 +(lp183 +g179 +aS'NH1' +p184 +aS'NH2' +p185 +asg184 +(lp186 +g181 +asg185 +(lp187 +g181 +assS'TRP' +p188 +(dp189 +S'CZ2' +p190 +(lp191 +S'CE2' +p192 +aS'CH2' +p193 +asS'CB' +p194 +(lp195 +S'CA' +p196 +aS'CG' +p197 +asg196 +(lp198 +g194 +asg197 +(lp199 +g194 +aS'CD1' +p200 +aS'CD2' +p201 +asg193 +(lp202 +g190 +aS'CZ3' +p203 +asg192 +(lp204 +g201 +aS'NE1' +p205 +ag190 +asS'CE3' +p206 +(lp207 +g201 +ag203 +asg200 +(lp208 +g197 +ag205 +asg201 +(lp209 +g197 +ag192 +ag206 +asg203 +(lp210 +g206 +ag193 +asg205 +(lp211 +g200 +ag192 +assS'GLU' +p212 +(dp213 +S'OE2' +p214 +(lp215 +S'CD' +p216 +asS'CA' +p217 +(lp218 +S'CB' +p219 +asS'CG' +p220 +(lp221 +g219 +ag216 +asg216 +(lp222 +g220 +aS'OE1' +p223 +ag214 +asg219 +(lp224 +g217 +ag220 +asg223 +(lp225 +g216 +assS'TYR' +p226 +(dp227 +S'CD2' +p228 +(lp229 +S'CG' +p230 +aS'CE2' +p231 +asS'OH' +p232 +(lp233 +S'CZ' +p234 +asS'CB' +p235 +(lp236 +S'CA' +p237 +ag230 +asg237 +(lp238 +g235 +asg230 +(lp239 +g235 +aS'CD1' +p240 +ag228 +asg234 +(lp241 +S'CE1' +p242 +ag231 +ag232 +asg240 +(lp243 +g230 +ag242 +asg242 +(lp244 +g240 +ag234 +asg231 +(lp245 +g228 +ag234 +assS'SER' +p246 +(dp247 +S'OG' +p248 +(lp249 +S'CB' +p250 +asg250 +(lp251 +S'CA' +p252 +ag248 +asg252 +(lp253 +g250 +ass. \ No newline at end of file diff --git a/Source/protonate.py b/Source/protonate.py new file mode 100755 index 0000000..92ab0f9 --- /dev/null +++ b/Source/protonate.py @@ -0,0 +1,443 @@ +#!/usr/bin/python + +from Source.vector_algebra import * +import Source.bonds, Source.pdb, Source.atom + +class Protonate: + """ Protonates atoms using VSEPR theory """ + + def __init__(self, verbose=False): + self.verbose=verbose + + self.valence_electrons = {'H': 1, + 'He':2, + 'Li':1, + 'Be':2, + 'B': 3, + 'C': 4, + 'N': 5, + 'O': 6, + 'F': 7, + 'Ne':8, + 'Na':1, + 'Mg':2, + 'Al':3, + 'Si':4, + 'P': 5, + 'S': 6, + 'Cl':7, + 'Ar':8, + 'K': 1, + 'Ca':2, + 'Sc':2, + 'Ti':2, + 'Va':2, + 'Cr':1, + 'Mn':2, + 'Fe':2, + 'Co':2, + 'Ni':2, + 'Cu':1, + 'Zn':2, + 'Ga':3, + 'Ge':4, + 'As':5, + 'Se':6, + 'Br':7, + 'Kr':8} + + + + + + self.standard_charges= {'ARG-NH1':1.0, + 'ASP-OD2':-1.0, + 'GLU-OE2':-1.0, + 'HIS-ND1':1.0, + 'LYS-NZ':1.0, + 'N+':1.0, + 'C-':-1.0} + + + self.sybyl_charges = {'N.pl3':+1, + 'N.3':+1, + 'N.4':+1, + 'N.ar':+1, + 'O.co2-':-1} + + + self.bond_lengths = {'C':1.09, + 'N':1.01, + 'O':0.96, + 'F':0.92, + 'Cl':1.27, + 'Br':1.41, + 'I':1.61, + 'S':1.35} + + + # protonation_methods[steric_number] = method + self.protonation_methods = {4:self.tetrahedral, + 3:self.trigonal} + + + return + + + + + def protonate(self, molecules): + """ Will protonate all atoms in the molecular container """ + + self.display('----- Protonation started -----') + # Remove all currently present hydrogen atoms + self.remove_all_hydrogen_atoms(molecules) + + # protonate all atoms + for name in molecules.conformation_names: + non_H_atoms = molecules.conformations[name].get_non_hydrogen_atoms() + + for atom in non_H_atoms: + self.protonate_atom(atom) + + # fix hydrogen names + #self.set_proton_names(non_H_atoms) + + return + + + def remove_all_hydrogen_atoms(self, molecular_container): + for name in molecular_container.conformation_names: + molecular_container.conformations[name].atoms = molecular_container.conformations[name].get_non_hydrogen_atoms() + return + + + def set_charge(self, atom): + # atom is a protein atom + if atom.type=='atom': + key = '%3s-%s'%(atom.resName, atom.name) + if atom.terminal: + self.display(atom.terminal) + key=atom.terminal + if key in list(self.standard_charges.keys()): + atom.charge = self.standard_charges[key] + self.display('Charge', atom, atom.charge) + atom.charge_set = True + # atom is a ligand atom + elif atom.type=='hetatm': + if atom.sybyl_type in list(self.sybyl_charges.keys()): + atom.charge = self.sybyl_charges[atom.sybyl_type] + atom.sybyl_type = atom.sybyl_type.replace('-','') + atom.charge_set = True + + return + + def protonate_atom(self, atom): + if atom.is_protonated: return + if atom.element == 'H': return + + self.set_charge(atom) + self.set_number_of_protons_to_add(atom) + self.set_steric_number_and_lone_pairs(atom) + self.add_protons(atom) + atom.is_protonated = True + return + + def set_proton_names(self, heavy_atoms): + + for heavy_atom in heavy_atoms: + i = 1 + for bonded in heavy_atom.bonded_atoms: + + if bonded.element == 'H': + bonded.name+='%d'%i + i+=1 + + + return + + + def set_number_of_protons_to_add(self, atom): + self.display('*'*10) + self.display('Setting number of protons to add for',atom) + atom.number_of_protons_to_add = 8 + self.display(' %4d'%8) + atom.number_of_protons_to_add -= self.valence_electrons[atom.element] + self.display('Valence eletrons: %4d'%-self.valence_electrons[atom.element]) + atom.number_of_protons_to_add -= len(atom.bonded_atoms) + self.display('Number of bonds: %4d'%- len(atom.bonded_atoms)) + atom.number_of_protons_to_add -= atom.number_of_pi_electrons_in_double_and_triple_bonds + self.display('Pi electrons: %4d'%-atom.number_of_pi_electrons_in_double_and_triple_bonds) + atom.number_of_protons_to_add += int(atom.charge) + self.display('Charge: %4.1f'%atom.charge) + + self.display('-'*10) + self.display(atom.number_of_protons_to_add) + + return + + def set_steric_number_and_lone_pairs(self, atom): + + # If we already did this, there is no reason to do it again + if atom.steric_number_and_lone_pairs_set: + return + + self.display('='*10) + self.display('Setting steric number and lone pairs for',atom) + + # costumly set the N backbone atoms up for peptide bond trigonal planer shape + #if atom.name == 'N' and len(atom.bonded_atoms) == 2: + # atom.steric_number = 3 + # atom.number_of_lone_pairs = 0 + # self.display 'Peptide bond: steric number is %d and number of lone pairs is %s'%(atom.steric_number, + # atom.number_of_lone_pairs) + # return + + + atom.steric_number = 0 + + self.display('%65s: %4d'%('Valence electrons',self.valence_electrons[atom.element])) + atom.steric_number += self.valence_electrons[atom.element] + + self.display('%65s: %4d'%('Number of bonds',len(atom.bonded_atoms))) + atom.steric_number += len(atom.bonded_atoms) + + self.display('%65s: %4d'%('Number of hydrogen atoms to add',atom.number_of_protons_to_add)) + atom.steric_number += atom.number_of_protons_to_add + + self.display('%65s: %4d'%('Number of pi-electrons in double and triple bonds(-)',atom.number_of_pi_electrons_in_double_and_triple_bonds)) + atom.steric_number -= atom.number_of_pi_electrons_in_double_and_triple_bonds + + self.display('%65s: %4d'%('Number of pi-electrons in conjugated double and triple bonds(-)',atom.number_of_pi_electrons_in_conjugate_double_and_triple_bonds)) + atom.steric_number -= atom.number_of_pi_electrons_in_conjugate_double_and_triple_bonds + + self.display('%65s: %4d'%('Number of donated co-ordinated bonds',0)) + atom.steric_number += 0 + + self.display('%65s: %4.1f'%('Charge(-)',atom.charge)) + atom.steric_number -= atom.charge + + atom.steric_number = math.floor(atom.steric_number/2.0) + + atom.number_of_lone_pairs = atom.steric_number - len(atom.bonded_atoms) - atom.number_of_protons_to_add + + self.display('-'*70) + self.display('%65s: %4d'%('Steric number',atom.steric_number)) + self.display('%65s: %4d'%('Number of lone pairs',atom.number_of_lone_pairs)) + + atom.steric_number_and_lone_pairs_set = True + + return + + + def add_protons(self, atom): + # decide which method to use + self.display('PROTONATING',atom) + if atom.steric_number in list(self.protonation_methods.keys()): + self.protonation_methods[atom.steric_number](atom) + else: + print('Warning: Do not have a method for protonating',atom,'(steric number: %d)'%atom.steric_number) + + return + + + def trigonal(self, atom): + self.display('TRIGONAL - %d bonded atoms'%(len(atom.bonded_atoms))) + rot_angle = math.radians(120.0) + + c = vector(atom1 = atom) + + # 0 bonds + if len(atom.bonded_atoms) == 0: + pass + + # 1 bond + if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: + # Add another atom with the right angle to the first one + a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) + # use plane of bonded trigonal atom - e.g. arg + + self.set_steric_number_and_lone_pairs(atom.bonded_atoms[0]) + if atom.bonded_atoms[0].steric_number == 3 and len(atom.bonded_atoms[0].bonded_atoms)>1: + # use other atoms bonded to the neighbour to establish the plane, if possible + other_atom_indices = [] + for i in range(len(atom.bonded_atoms[0].bonded_atoms)): + if atom.bonded_atoms[0].bonded_atoms[i] != atom: + other_atom_indices.append(i) + + + v1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) + v2 = vector(atom1 = atom.bonded_atoms[0], + atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[0]]) + + axis = v1**v2 + + # this is a trick to make sure that the order of atoms doesn't influence + # the final postions of added protons + if len(other_atom_indices)>1: + v3 = vector(atom1 = atom.bonded_atoms[0], + atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[1]]) + + axis2 = v1**v3 + + if axis * axis2>0: + axis = axis+axis2 + else: + axis = axis-axis2 + + else: + axis = a.orthogonal() + + a = rotate_vector_around_an_axis(rot_angle, axis, a) + a = self.set_bond_distance(a, atom.element) + self.add_proton(atom, c+a) + + # 2 bonds + if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: + # Add another atom with the right angle to the first two + a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) + a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) + + new_a = -a1 - a2 + new_a = self.set_bond_distance(new_a, atom.element) + self.add_proton(atom, c+new_a) + + + return + + + def tetrahedral(self, atom): + self.display('TETRAHEDRAL - %d bonded atoms'%(len(atom.bonded_atoms))) + rot_angle = math.radians(109.5) + + # sanity check + # if atom.number_of_protons_to_add + len(atom.bonded_atoms) != 4: + # self.display 'Error: Attempting tetrahedral structure with %d bonds'%(atom.number_of_protons_to_add + + # len(atom.bonded_atoms)) + + c = vector(atom1 = atom) + + # 0 bonds + if len(atom.bonded_atoms) == 0: + pass + + # 1 bond + if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: + # Add another atom with the right angle to the first one + a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) + axis = a.orthogonal() + a = rotate_vector_around_an_axis(rot_angle, axis, a) + a = self.set_bond_distance(a, atom.element) + self.add_proton(atom, c+a) + + # 2 bonds + if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: + # Add another atom with the right angle to the first two + a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) + a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) + + axis = a1 + a2 + + new_a = rotate_vector_around_an_axis(math.radians(90), axis, -a1) + new_a = self.set_bond_distance(new_a, atom.element) + self.add_proton(atom, c+new_a) + + # 3 bonds + if len(atom.bonded_atoms) == 3 and atom.number_of_protons_to_add > 0: + a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) + a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) + a3 = vector(atom1 = atom, atom2 = atom.bonded_atoms[2]).rescale(1.0) + + new_a = -a1-a2-a3 + new_a = self.set_bond_distance(new_a, atom.element) + self.add_proton(atom, c+new_a) + + return + + + def add_proton(self, atom, position): + # Create the new proton + new_H = Source.atom.Atom() + new_H.setProperty(numb = None, + name = 'H%s'%atom.name[1:], + resName = atom.resName, + chainID = atom.chainID, + resNumb = atom.resNumb, + x = round(position.x,3), # round of to three digimal points + y = round(position.y,3), # to avoid round-off differences + z = round(position.z,3), # when input file + occ = None, + beta = None) + new_H.element = 'H' + new_H.type = atom.type + + new_H.bonded_atoms = [atom] + new_H.charge = 0 + new_H.steric_number = 0 + new_H.number_of_lone_pairs = 0 + new_H.number_of_protons_to_add = 0 + new_H.number_of_pi_electrons_in_double_and_triple_bonds = 0 + new_H.is_protonates = True + + atom.bonded_atoms.append(new_H) + atom.number_of_protons_to_add -=1 + atom.conformation_container.add_atom(new_H) + + # update names of all protons on this atom + new_H.residue_label = "%-3s%4d%2s" % (new_H.name,new_H.resNumb, new_H.chainID) + no_protons = atom.count_bonded_elements('H') + if no_protons > 1: + i = 1 + for proton in atom.get_bonded_elements('H'): + proton.name = 'H%s%d'%(atom.name[1:],i) + proton.residue_label = "%-3s%4d%2s" % (proton.name,proton.resNumb, proton.chainID) + i+=1 + + + self.display('added',new_H, 'to',atom) + return + + def set_bond_distance(self, a, element): + d = 1.0 + if element in list(self.bond_lengths.keys()): + d = self.bond_lengths[element] + else: + print('WARNING: Bond length for %s not found, using the standard value of %f'%(element, d)) + + a = a.rescale(d) + + return a + + def display(self,*text): + if self.verbose: + s = '' + for t in text: + s+='%s '%t + print(s) + return + + +if __name__ == '__main__': + import protein, pdb, sys,os + arguments = sys.argv + if len(arguments) != 2: + print('Usage: protonate.py ') + sys.exit(0) + + filename = arguments[1] + if not os.path.isfile(filename): + print('Error: Could not find \"%s\"'%filename) + sys.exit(1) + + + p = Protonate() + pdblist = pdb.readPDB(filename) + my_protein = protein.Protein(pdblist,'test.pdb') + + p.remove_all_hydrogen_atoms_from_protein(my_protein) + my_protein.writePDB('before_protonation.pdb') + + p.protonate_protein(my_protein) + + ## write out protonated file + my_protein.writePDB('protonated.pdb') diff --git a/Source/vector_algebra.py b/Source/vector_algebra.py new file mode 100644 index 0000000..0603ce9 --- /dev/null +++ b/Source/vector_algebra.py @@ -0,0 +1,315 @@ +import math + +class vector: + """ Vector """ + def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1 = 0, atom2 = 0): + self.x = xi + self.y = yi + self.z = zi + + if atom1: + # make vector pointing to atom1 + self.x = atom1.x + self.y = atom1.y + self.z = atom1.z + + if atom2: + # make inter-atomic vector (atom1 -> atom2) + self.x = atom2.x - self.x + self.y = atom2.y - self.y + self.z = atom2.z - self.z + + return + + def __add__(self, other): + return vector(self.x + other.x, + self.y + other.y, + self.z + other.z) + + def __sub__(self, other): + return vector(self.x - other.x, + self.y - other.y, + self.z - other.z) + + def __mul__(self, other): + """ Dot product, scalar and matrix multiplication """ + + if isinstance(other,vector): + return self.x * other.x + self.y * other.y + self.z * other.z + elif isinstance(other, matrix4x4): + return vector( + xi = other.a11*self.x + other.a12*self.y + other.a13*self.z + other.a14*1.0, + yi = other.a21*self.x + other.a22*self.y + other.a23*self.z + other.a24*1.0, + zi = other.a31*self.x + other.a32*self.y + other.a33*self.z + other.a34*1.0 + ) + elif type(other) in [int, float]: + return vector(self.x * other, self.y * other, self.z * other) + else: + print('%s not supported'%type(other)) + raise TypeError + + def __rmul__(self,other): + return self.__mul__(other) + + + def __pow__(self, other): + """ Cross product """ + return vector(self.y * other.z - self.z * other.y, + self.z * other.x - self.x * other.z, + self.x * other.y - self.y * other.x) + + + def __neg__(self): + res = vector(xi = -self.x, + yi = -self.y, + zi = -self.z) + return res + + def sq_length(self): + return self.x * self.x + self.y * self.y + self.z * self.z + + def length(self): + return math.sqrt(self.sq_length()) + + def __str__(self): + return '%10.4f %10.4f %10.4f'%(self.x, self.y, self.z) + + def __repr__(self): + return '' + + def orthogonal(self): + """ Returns a vector orthogonal to self """ + res = vector(self.y, -self.x, 0) + + if abs(self.y) < abs(self.z): + res = vector(self.z, 0, -self.x) + + return res + + def rescale(self, new_length): + """ Rescale vector to new length while preserving direction """ + frac = new_length/(self.length()) + res = vector(xi = self.x*frac, + yi = self.y*frac, + zi = self.z*frac) + return res + + +class matrix4x4: + def __init__(self, + a11i=0.0, a12i=0.0, a13i=0.0, a14i=0.0, + a21i=0.0, a22i=0.0, a23i=0.0, a24i=0.0, + a31i=0.0, a32i=0.0, a33i=0.0, a34i=0.0, + a41i=0.0, a42i=0.0, a43i=0.0, a44i=0.0): + + self.a11 = a11i + self.a12 = a12i + self.a13 = a13i + self.a14 = a14i + + self.a21 = a21i + self.a22 = a22i + self.a23 = a23i + self.a24 = a24i + + self.a31 = a31i + self.a32 = a32i + self.a33 = a33i + self.a34 = a34i + + self.a41 = a41i + self.a42 = a42i + self.a43 = a43i + self.a44 = a44i + + return + + + + + +# methods working on vectors + + +def angle(a, b): + dot = a * b + return math.acos(dot / (a.length() * b.length())) + + +def angle_degrees(a,b): + return math.degrees(angle(a, b)) + + +def signed_angle_around_axis(a,b, axis): + na = a**axis + nb = b**axis + + v = angle(na,nb) + + d = b*(a**axis) + + if d < 0: + v =-v + + return v + +def signed_angle_degrees(a,b): + return 180/math.pi * signed_angle(a, b) + + +def rotate_vector_around_an_axis(theta, axis, v): + #print "# 1. rotate space about the z-axis so that the rotation axis lies in the xz-plane" + gamma = 0.0 + if axis.y != 0: + if axis.x != 0: + gamma = -axis.x/abs(axis.x)*math.asin(axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y))) + else: + gamma = math.pi/2.0 + + Rz = rotate_atoms_around_z_axis(gamma) + v = Rz * v + axis = Rz * axis + + #print "# 2. rotate space about the y-axis so that the rotation axis lies along the z-axis" + beta = 0.0 + if axis.x != 0: + beta = -axis.x/abs(axis.x)*math.acos(axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z)) + Ry = rotate_atoms_around_y_axis(beta) + v = Ry * v + axis = Ry *axis + + #print "# 3. perform the desired rotation by theta about the z-axis" + Rz = rotate_atoms_around_z_axis(theta) + v = Rz * v + + #print "# 4. apply the inverse of step 2." + Ry = rotate_atoms_around_y_axis(-beta) + v = Ry * v + + #print "# 5. apply the inverse of step 1." + Rz = rotate_atoms_around_z_axis(-gamma) + v = Rz * v + + return v + +def rotate_atoms_around_z_axis(angle): + Rz = matrix4x4( + a11i = math.cos(angle), a12i = -math.sin(angle), a13i = 0.0, a14i = 0.0, + a21i = math.sin(angle), a22i = math.cos(angle), a23i = 0.0, a24i = 0.0, + a31i = 0.0 , a32i = 0.0 , a33i = 1.0, a34i = 0.0, + a41i = 0.0 , a42i = 0.0 , a43i = 0.0, a44i = 1.0 + ) + + return Rz + + +def rotate_atoms_around_y_axis(angle): + Ry = matrix4x4( + a11i = math.cos(angle), a12i = 0.0, a13i = math.sin(angle), a14i = 0.0, + a21i = 0.0 , a22i = 1.0, a23i = 0.0 , a24i = 0.0, + a31i = -math.sin(angle), a32i = 0.0, a33i = math.cos(angle), a34i = 0.0, + a41i = 0.0 , a42i = 0.0, a43i = 0.0 , a44i = 1.0 + ) + + return Ry + + + +class multi_vector: + def __init__(self, atom1=0, atom2=0): + self.vectors = [] + self.keys = [] + + # store vectors for all configurations of atoms + if atom1!=0: + self.keys = lib.get_sorted_configurations(atom1.configurations.keys()) + if atom2!=0: + keys2 = lib.get_sorted_configurations(atom2.configurations.keys()) + if self.keys != keys2: + raise 'Cannot make multi vector: Atomic configurations mismatch for\n %s\n %s\n'%(atom1,atom2) + for key in self.keys: + atom1.setConfiguration(key) + if atom2!=0: + atom2.setConfiguration(key) + v = vector(atom1=atom1, atom2=atom2) + self.vectors.append(v) + #print(key,v) + return + + def __getattribute__(self,name): + try: + return object.__getattribute__(self, name) + except AttributeError: + return self.do_job(name) + + def __str__(self): + res = '' + for i in range(len(self.keys)): + res += '%s %s\n'%(self.keys[i], self.vectors[i]) + return res + + + def do_job(self, job): + #print(job) + self.res = multi_vector() + for i in range(len(self.vectors)): + self.res.vectors.append(eval('self.vectors[%d].%s()'%(i,job))) + self.res.keys.append(self.keys[i]) + return self.get_result + + def get_result(self): + return self.res + + def generic_operation(self, operation, other): + if self.keys != other.keys: + raise 'Incompatable keys' + + self.res = multi_vector() + for i in range(len(self.vectors)): + self.res.vectors.append(eval('self.vectors[%d] %s other.vectors[%d]'%(i,operation,i))) + self.res.keys.append(self.keys[i]) + return + + def __add__(self, other): + self.generic_operation('+',other) + return self.res + + def __sub__(self, other): + self.generic_operation('-',other) + return self.res + + def __mul__(self, other): + self.generic_operation('*',other) + return self.res + + def __pow__(self, other): + self.generic_operation('**',other) + return self.res + + def generic_self_operation(self, operation): + return + + def __neg__(self): + self.generic_operation('*',-1.0) + return self.res + + def rescale(self, new_length): + self.res = multi_vector() + for i in range(len(self.vectors)): + self.res.vectors.append(self.vectors[i].rescale(new_length)) + self.res.keys.append(self.keys[i]) + return self.res + + +def rotate_multi_vector_around_an_axis(theta, axis, v): + """ both axis ans v must be multi_vectors """ + + if axis.keys != v.keys: + raise 'Incompatible keys in rotate multi_vector' + + res = multi_vector() + for i in range(len(v.keys)): + res.vectors.append(rotate_vector_around_an_axis(theta, axis.vectors[i], v.vectors[i])) + res.keys.append(v.keys[i]) + + return res diff --git a/Source/version.py b/Source/version.py new file mode 100644 index 0000000..5a5f603 --- /dev/null +++ b/Source/version.py @@ -0,0 +1,212 @@ +import math +import Source.lib as lib +import sys, os +import Source.calculations as calculations +import Source.parameters + + +class version: + def __init__(self,parameters): + self.parameters = parameters + return + + # desolvation + def calculate_desolvation(self, group): + return self.desolvation_model(self.parameters, group) + + def calculatePairWeight(self, Nmass1, Nmass2): + return self.weight_pair_method(self.parameters, Nmass1, Nmass2) + + # side chains + def hydrogen_bond_interaction(self, group1, group2): + return self.hydrogen_bond_interaction_model(group1, group2, self) + + def calculateSideChainEnergy(self, distance, dpka_max, cutoff, weight, f_angle): + return self.sidechain_interaction_model(distance, dpka_max, cutoff, f_angle) # weight is ignored in 3.0 Sep07 + + # coulomb + def electrostatic_interaction(self, group1, group2, distance): + return self.electrostatic_interaction_model(group1, group2, distance, self) + + def calculateCoulombEnergy(self, distance, weight): + return self.coulomb_interaction_model(distance, weight, self.parameters) + + def checkCoulombPair(self, group1, group2, distance): + return self.check_coulomb_pair_method(self.parameters, group1, group2, distance) + + # backbone re-organisation + def calculateBackBoneReorganization(self, conformation): + return self.backbone_reorganisation_method(self.parameters, conformation) + + # exceptions + def checkExceptions(self, group1, group2): + return self.exception_check_method(self, group1, group2) + + def setup_bonding_and_protonation(self, molecular_container): + return self.molecular_preparation_method(self.parameters, molecular_container) + + def setup_bonding(self, molecular_container): + return self.prepare_bonds(self.parameters, molecular_container) + + + +class version_A(version): + def __init__(self, parameters): + # set the calculation rutines used in this version + version.__init__(self, parameters) + + # atom naming, bonding, and protonation + self.molecular_preparation_method = Source.calculations.setup_bonding_and_protonation + self.prepare_bonds = Source.calculations.setup_bonding + + + # desolvation related methods + self.desolvation_model = calculations.radial_volume_desolvation + self.weight_pair_method = calculations.calculatePairWeight + + # side chain methods + self.sidechain_interaction_model = Source.calculations.HydrogenBondEnergy + self.hydrogen_bond_interaction_model = Source.calculations.hydrogen_bond_interaction + + # colomb methods + self.electrostatic_interaction_model = Source.calculations.electrostatic_interaction + self.check_coulomb_pair_method = Source.calculations.checkCoulombPair + self.coulomb_interaction_model = Source.calculations.CoulombEnergy + + #backbone + self.backbone_interaction_model = Source.calculations.HydrogenBondEnergy + self.backbone_reorganisation_method = Source.calculations.BackBoneReorganization + + # exception methods + self.exception_check_method = Source.calculations.checkExceptions + return + + def get_hydrogen_bond_parameters(self, atom1, atom2): + dpka_max = self.parameters.sidechain_interaction + cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) + return [dpka_max, cutoff] + + def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): + if backbone_atom.group_type == 'BBC': + if atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): + [v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] + return [v,[c1,c2]] + + if backbone_atom.group_type == 'BBN': + if atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): + [v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] + return [v,[c1,c2]] + + return None + + + + +class simple_hb(version_A): + def __init__(self, parameters): + # set the calculation rutines used in this version + version_A.__init__(self, parameters) + print('Using simple hb model') + return + + def get_hydrogen_bond_parameters(self, atom1, atom2): + return self.parameters.hydrogen_bonds.get_value(atom1.element, atom2.element) + + + def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): + return self.parameters.hydrogen_bonds.get_value(backbone_atom.element, atom.element) + + + + +class element_based_ligand_interactions(version_A): + def __init__(self, parameters): + # set the calculation rutines used in this version + version_A.__init__(self, parameters) + print('Using detailed SC model!') + return + + def get_hydrogen_bond_parameters(self, atom1, atom2): + if not 'hetatm' in [atom1.type, atom2.type]: + # this is a protein-protein interaction + dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) + return [dpka_max, cutoff] + + # at least one ligand atom is involved in this interaction + # make sure that we are using the heavy atoms for finding paramters + elements = [] + for a in [atom1, atom2]: + if a.element == 'H': elements.append(a.bonded_atoms[0].element) + else: elements.append(a.element) + + return self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) + + + def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): + if atom.type == 'atom': + # this is a backbone-protein interaction + if backbone_atom.group_type == 'BBC' and\ + atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): + [v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] + return [v,[c1,c2]] + + if backbone_atom.group_type == 'BBN' and\ + atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): + [v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] + return [v,[c1,c2]] + else: + # this is a backbone-ligand interaction + # make sure that we are using the heavy atoms for finding paramters + elements = [] + for a in [backbone_atom, atom]: + if a.element == 'H': elements.append(a.bonded_atoms[0].element) + else: elements.append(a.element) + + res = self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) + if not res: + print('Could not determine backbone interaction parameters for:', + backbone_atom,atom) + + return + + return None + + + +class propka30(version): + def __init__(self, parameters): + # set the calculation rutines used in this version + version.__init__(self, parameters) + + # atom naming, bonding, and protonation + self.molecular_preparation_method = Source.calculations.setup_bonding_and_protonation_30_style + + # desolvation related methods + self.desolvation_model = calculations.radial_volume_desolvation + self.weight_pair_method = calculations.calculatePairWeight + + # side chain methods + self.sidechain_interaction_model = Source.calculations.HydrogenBondEnergy + + # colomb methods + self.check_coulomb_pair_method = Source.calculations.checkCoulombPair + self.coulomb_interaction_model = Source.calculations.CoulombEnergy + + #backbone + self.backbone_reorganisation_method = Source.calculations.BackBoneReorganization + + # exception methods + self.exception_check_method = Source.calculations.checkExceptions + + + return + + def get_hydrogen_bond_parameters(self, atom1, atom2): + dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) + return [dpka_max, cutoff] + + + + diff --git a/propka.cfg b/propka.cfg new file mode 100644 index 0000000..a12e38a --- /dev/null +++ b/propka.cfg @@ -0,0 +1,398 @@ +# PropKa configuration file + +version version_A + +# Model pKa values +model_pkas C- 3.20 +model_pkas ASP 3.80 +model_pkas GLU 4.50 +model_pkas HIS 6.50 +model_pkas CYS 9.00 +model_pkas TYR 10.00 +model_pkas LYS 10.50 +model_pkas ARG 12.50 +#model_pkas SER 14.20 Jack Kyte: Structure in Protein Chemistry, 1995, Garland Publishing, Inc New York and London +model_pkas N+ 8.00 +model_pkas CG 11.50 +model_pkas C2N 11.50 +model_pkas N30 10.00 +model_pkas N31 10.00 +model_pkas N32 10.00 +model_pkas N33 10.00 +model_pkas NAR 5.00 +model_pkas OCO 4.50 +model_pkas SH 10.00 +model_pkas OP 6.00 + +# Custom ligand pKa values +# P. Acharya, P. Cheruku, S. Chatterjee, S. Acharya, and, J. Chattopadhyaya: +# Measurement of Nucleobase pKa Values in Model Mononucleotides +# Shows RNA-RNA Duplexes To Be More Stable than DNA-DNA Duplexes +# Journal of the American Chemical Society 2004 126 (9), 2862-2869 +# +custom_model_pkas DA-N1 3.82 +custom_model_pkas DA-N3 3.82 +custom_model_pkas DA-N7 3.82 +custom_model_pkas DA-OP1 1.00 +custom_model_pkas DA-OP2 1.00 + +custom_model_pkas DG-N1 9.59 +custom_model_pkas DG-N3 9.59 +custom_model_pkas DG-N7 9.59 +custom_model_pkas DG-OP1 1.00 +custom_model_pkas DG-OP2 1.00 + +custom_model_pkas DC-N3 4.34 +custom_model_pkas DC-OP1 1.00 +custom_model_pkas DC-OP2 1.00 + +custom_model_pkas DT-N3 10.12 +custom_model_pkas DT-OP1 1.00 +custom_model_pkas DT-OP2 1.00 + + +# protein group mapping +protein_group_mapping ASP-CG COO +protein_group_mapping GLU-CD COO +protein_group_mapping HIS-CG HIS +protein_group_mapping CYS-SG CYS +protein_group_mapping TYR-OH TYR +protein_group_mapping LYS-NZ LYS +protein_group_mapping ARG-CZ ARG +#protein_group_mapping SER-OG SER +protein_group_mapping THR-OG1 ROH +protein_group_mapping SER-OG ROH# +protein_group_mapping ASN-CG AMD +protein_group_mapping GLN-CD AMD +protein_group_mapping TRP-NE1 TRP + + +# matrix for propka interactions +# 'N' non-iterative interaction +# 'I' iterative interaction +# '-' no interaction + #CYS +interaction_matrix CYS I#N+ +interaction_matrix N+ N I#HIS +interaction_matrix HIS I N I#LYS +interaction_matrix LYS N N N I#AMD +interaction_matrix AMD N - N - -#COO +interaction_matrix COO I N I N N I#ARG +interaction_matrix ARG N N N N - N I#TRP +interaction_matrix TRP N - - - - N - -#ROH +interaction_matrix ROH N - - - - N - - -#TYR +interaction_matrix TYR N I I I N N N N N I#SER +interaction_matrix SER N N N N N N I N N N I #CG +interaction_matrix CG N N N N - N I - - N I I#C2N +interaction_matrix C2N N N N N - N I - - N I I I#N30 +interaction_matrix N30 N I N N - N N - - I N I I I#N31 +interaction_matrix N31 N I N N - N N - - I N I I I I#N32 +interaction_matrix N32 N I N N - N N - - I N I I I I I#N33 +interaction_matrix N33 N I N N - N N - - I N I I I I I I#NAR +interaction_matrix NAR I N I I N I N - - I N N N N N N N I#OCO +interaction_matrix OCO I N I N N I N N N N N N N N N N N I I#NP1 +interaction_matrix NP1 N - N - - N - - - N N - - - - - - N N -#OH +interaction_matrix OH N - - - - N - - - N N - - - - - - - N - -#O3 +interaction_matrix O3 N - N - - N - - - N N - - - - - - N N - - -#CL +interaction_matrix CL N - N - - N - - - N N - - - - - - N N - - - -#F +interaction_matrix F N - N - - N - - - N N - - - - - - N N - - - - -#NAM +interaction_matrix NAM N - N - - N - - - N N - - - - - - N N - - - - - -#N1 +interaction_matrix N1 N - N - - N - - - N N - - - - - - N N - - - - - - -#O2 +interaction_matrix O2 N - N - - N - - - N N - - - - - - N N - - - - - - - -#OP +interaction_matrix OP I N I N N I N N N N N N N N N N N I I N N N N N N N N I#SH +interaction_matrix SH I N N N N N N N N N N I I I I I I N N N N N N N N N N N I + +# Cutoff values for side chain interactions +# default value +sidechain_cutoffs default 3.0 4.0 +# COO +sidechain_cutoffs COO COO 2.5 3.5 +Sidechain_cutoffs COO SER 2.65 3.65 +sidechain_cutoffs COO ARG 1.85 2.85 +sidechain_cutoffs COO LYS 2.85 3.85 +sidechain_cutoffs COO HIS 2.0 3.0 +sidechain_cutoffs COO AMD 2.0 3.0 +sidechain_cutoffs COO TRP 2.0 3.0 +sidechain_cutoffs COO ROH 2.65 3.65 +sidechain_cutoffs COO TYR 2.65 3.65 +sidechain_cutoffs COO N+ 2.85 3.85 +sidechain_cutoffs COO CG 1.85 2.85 +sidechain_cutoffs COO C2N 1.85 2.85 +sidechain_cutoffs COO N30 2.85 3.85 +sidechain_cutoffs COO N31 2.85 3.85 +sidechain_cutoffs COO N32 2.85 3.85 +sidechain_cutoffs COO N33 2.85 3.85 +sidechain_cutoffs COO NAR 2.0 3.0 +sidechain_cutoffs COO OCO 2.5 3.5 +sidechain_cutoffs COO OH 2.65 3.65 +sidechain_cutoffs COO NAM 2.0 3.0 +# SER +sidechain_cutoffs SER SER 3.5 4.5 +sidechain_cutoffs SER ARG 2.5 4.0 +sidechain_cutoffs SER HIS 2.0 3.0 +sidechain_cutoffs SER AMD 2.5 3.5 +sidechain_cutoffs SER CYS 3.5 4.5 +sidechain_cutoffs SER TRP 2.5 3.5 +sidechain_cutoffs SER ROH 3.5 4.5 +sidechain_cutoffs SER CG 2.5 4.0 +sidechain_cutoffs SER C2N 2.5 4.0 +sidechain_cutoffs SER NAR 2.0 3.0 +sidechain_cutoffs SER OH 3.5 4.5 +sidechain_cutoffs SER SH 3.5 4.5 +sidechain_cutoffs SER TYR 3.5 4.5 +sidechain_cutoffs SER N+ 3.0 4.5 +sidechain_cutoffs SER NAM 2.5 3.5 +# ARG +sidechain_cutoffs ARG CYS 2.5 4.0 +sidechain_cutoffs ARG TYR 2.5 4.0 +sidechain_cutoffs ARG OCO 1.85 2.85 +sidechain_cutoffs ARG SH 2.5 4.0 +# HIS +sidechain_cutoffs HIS AMD 2.0 3.0 +sidechain_cutoffs HIS TYR 2.0 3.0 +sidechain_cutoffs HIS OCO 2.0 3.0 +# CYS +sidechain_cutoffs CYS CYS 3.0 5.0 +sidechain_cutoffs CYS TRP 2.5 3.5 +sidechain_cutoffs CYS ROH 3.5 4.5 +sidechain_cutoffs CYS AMD 2.5 3.5 +sidechain_cutoffs CYS TYR 3.5 4.5 +sidechain_cutoffs CYS N+ 3.0 4.5 +sidechain_cutoffs CYS CG 2.5 4.0 +sidechain_cutoffs CYS C2N 2.5 4.0 +sidechain_cutoffs CYS N30 3.0 4.5 +sidechain_cutoffs CYS N31 3.0 4.5 +sidechain_cutoffs CYS N32 3.0 4.5 +sidechain_cutoffs CYS N33 3.0 4.5 +sidechain_cutoffs CYS OH 3.5 4.5 +sidechain_cutoffs CYS NAM 2.5 3.5 +sidechain_cutoffs CYS SH 3.0 5.0 +# TYR +sidechain_cutoffs TYR TYR 3.5 4.5 +sidechain_cutoffs TYR N+ 3.0 4.5 +sidechain_cutoffs TYR AMD 2.5 3.5 +sidechain_cutoffs TYR TRP 2.5 3.5 +sidechain_cutoffs TYR ROH 3.5 4.5 +sidechain_cutoffs TYR CG 2.5 4.0 +sidechain_cutoffs TYR C2N 2.5 4.0 +sidechain_cutoffs TYR OCO 2.65 3.65 +sidechain_cutoffs TYR NAR 2.0 3.0 +sidechain_cutoffs TYR OH 3.5 4.5 +sidechain_cutoffs TYR NAM 2.5 3.5 +sidechain_cutoffs TYR SH 3.5 4.5 +# N+ +sidechain_cutoffs N+ OCO 2.85 3.85 +sidechain_cutoffs N+ SH 3.0 4.5 +# LYS +sidechain_cutoffs LYS OCO 2.85 3.85 +# OCO +sidechain_cutoffs OCO OCO 2.5 3.5 +sidechain_cutoffs OCO TRP 2.0 3.0 +sidechain_cutoffs OCO ROH 2.65 3.65 +sidechain_cutoffs OCO AMD 2.0 3.0 +sidechain_cutoffs OCO CG 1.85 2.85 +sidechain_cutoffs OCO C2N 1.85 2.85 +sidechain_cutoffs OCO N30 2.85 3.85 +sidechain_cutoffs OCO N31 2.85 3.85 +sidechain_cutoffs OCO N32 2.85 3.85 +sidechain_cutoffs OCO N33 2.85 3.85 +sidechain_cutoffs OCO NAR 2.0 3.0 +sidechain_cutoffs OCO OH 2.65 3.65 +sidechain_cutoffs OCO NAM 2.0 3.0 +# NAR +sidechain_cutoffs NAR AMD 2.0 3.0 +# SH +sidechain_cutoffs SH ROH 3.5 4.5 +sidechain_cutoffs SH TRP 2.5 3.5 +sidechain_cutoffs SH AMD 2.5 3.5 +sidechain_cutoffs SH NAM 2.5 3.5 +sidechain_cutoffs SH CG 2.5 4.0 +sidechain_cutoffs SH C2N 2.5 4.0 +sidechain_cutoffs SH OH 3.5 4.5 +sidechain_cutoffs SH SH 3.0 5.0 + + + +# Maximal interaction energies for side chains +sidechain_interaction 0.85 + +# Angular dependent sidechain interactions +angular_dependent_sidechain_interactions HIS +angular_dependent_sidechain_interactions ARG +angular_dependent_sidechain_interactions AMD +angular_dependent_sidechain_interactions TRP + +# exception interaction values +COO_HIS_exception 1.60 +OCO_HIS_exception 1.60 +CYS_HIS_exception 1.60 +CYS_CYS_exception 3.60 + +# Coulomb interaction parameters +coulomb_cutoff1 4.0 +coulomb_cutoff2 10.0 +coulomb_diel 80.0 + +# Backbone hydrogen bond parameters +backbone_NH_hydrogen_bond COO -0.85 2.00 3.00 +#backbone_NH_hydrogen_bond C- -0.85 2.00 3.00 +backbone_NH_hydrogen_bond CYS -0.85 3.00 4.00 +backbone_NH_hydrogen_bond TYR -0.85 2.20 3.20 +backbone_NH_hydrogen_bond OCO -0.85 2.00 3.50 +backbone_NH_hydrogen_bond NAR -0.85 2.00 3.50 + +backbone_CO_hydrogen_bond HIS 0.85 2.00 3.00 +backbone_CO_hydrogen_bond OCO 0.85 3.00 4.00 +backbone_CO_hydrogen_bond CG 0.85 2.00 4.00 +backbone_CO_hydrogen_bond C2N 0.85 2.00 4.00 +backbone_CO_hydrogen_bond N30 0.85 2.00 4.00 +backbone_CO_hydrogen_bond N31 0.85 2.00 4.00 +backbone_CO_hydrogen_bond N32 0.85 2.00 4.00 +backbone_CO_hydrogen_bond N33 0.85 2.00 4.00 +backbone_CO_hydrogen_bond NAR 0.85 2.00 3.50 + +# Group charges +charge COO -1 +charge HIS +1 +charge CYS -1 +charge TYR -1 +charge LYS +1 +charge ARG +1 +charge N+ +1 +charge C- -1 +charge OCO -1 +charge SER -1 +charge CG +1 +charge C2N +1 +charge N30 +1 +charge N31 +1 +charge N32 +1 +charge N33 +1 +charge NAR +1 +charge SH -1 +charge OP -1 + +# list of acids +acid_list ASP +acid_list GLU +acid_list CYS +acid_list TYR +acid_list SER +acid_list C- +acid_list OCO +acid_list OP +acid_list SH + +# list of bases +base_list ARG +base_list LYS +base_list HIS +base_list N+ +base_list CG +base_list C2N +base_list N30 +base_list N31 +base_list N32 +base_list N33 +base_list NAR + +# list of groups used in backbone reorganisation calculations +backbone_reorganisation_list ASP +backbone_reorganisation_list GLU + +# Residues that should be ignored +ignore_residues HOH +ignore_residues H2O +ignore_residues HOH +ignore_residues SO4 +ignore_residues PO4 +ignore_residues PEG +ignore_residues EPE +#ignore_residues NAG +ignore_residues TRS + +# Relative Van der Waals volume parameters for the radial volume model +# Radii adopted from Bondi, A. (1964). "Van der Waals Volumes and Radii". J. Phys. Chem. 68 (3): 441-51 +VanDerWaalsVolume C 1.40 # radius: 1.70, volume: 20.58 all 'C' and 'CA' atoms +VanDerWaalsVolume C4 2.64 # 38.79 hydrodphobic carbon atoms + unidentified atoms +VanDerWaalsVolume N 1.06 # radius: 1.55, volume: 15.60 all nitrogen atoms +VanDerWaalsVolume O 1.00 # radius: 1.52, volume: 14.71 all oxygen atoms +VanDerWaalsVolume S 1.66 # radius: 1.80, volume: 24.43 all sulphur atoms +VanDerWaalsVolume F 0.90 # raidus: 1.47, volume: 13.30 for fluorine +VanDerWaalsVolume Cl 1.53 # radius: 1.75, volume: 22.44 for chlorine +VanDerWaalsVolume P 1.66 # radius: 1.80, volume: 24.42 for phosphorus + +# Other desolvation parameters +desolvationSurfaceScalingFactor 0.25 +desolvationPrefactor -13.0 +desolvationAllowance 0.0 +desolv_cutoff 20.0 +buried_cutoff 15.0 +Nmin 280 +Nmax 560 + +# Ligand groups +ligand_typing groups +min_bond_distance_for_hydrogen_bonds 4 + +# covalent coupling +coupling_max_number_of_bonds 3 +shared_determinants 0 +common_charge_centre 0 +remove_penalised_group 1 + +# non-covalent coupling +max_intrinsic_pKa_diff 2.0 +min_interaction_energy 0.5 +max_free_energy_diff 1.0 +min_swap_pka_shift 1.0 +min_pka 0.0 +max_pka 10.0 +pH variable +reference neutral + +# ions +ions 1P 1 # generic charged atoms +ions 2P 2 +ions 1N -1 +ions 2N -2 + +ions MG 2 #Magnesium Ion +ions CA 2 #Calcium Ion +ions ZN 2 #Zinc Ion +ions NA 1 #Sodium Ion +ions CL -1 #Chloride Ion +ions MN 2 #Manganese (ii) Ion +ions K 1 #Potassium Ion +ions CD 2 #Cadmium Ion +ions FE 3 #Fe (iii) Ion +ions SR 2 #Strontium Ion +ions CU 2 #Copper (ii) Ion +ions IOD -1 #Iodide Ion +ions HG 2 #Mercury (ii) Ion +ions BR -1 #Bromide Ion +ions CO 2 #Cobalt (ii) Ion +ions NI 2 #Nickel (ii) Ion +ions FE2 2 #Fe (ii) Ion + +# write out order of residues +write_out_order ASP +write_out_order GLU +write_out_order C- +write_out_order HIS +write_out_order CYS +write_out_order TYR +write_out_order LYS +write_out_order ARG +write_out_order SER +write_out_order N+ +write_out_order CG +write_out_order C2N +write_out_order N30 +write_out_order N31 +write_out_order N32 +write_out_order N33 +write_out_order NAR +write_out_order OCO +write_out_order SH +write_out_order OP diff --git a/propka.py b/propka.py new file mode 100755 index 0000000..ffbfaeb --- /dev/null +++ b/propka.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3.1 + +import string,re,sys,os,math +import Source.lib, Source.molecular_container + +def main(): + """ + Reads in structure files, calculates pKa values, and prints pKa files + """ + # loading options, flaggs and arguments + options, pdbfiles = Source.lib.loadOptions() + + for pdbfile in pdbfiles: + my_molecule = Source.molecular_container.Molecular_container(pdbfile, options) + my_molecule.calculate_pka() + my_molecule.write_pka() + + +if __name__ == '__main__': + #import cProfile + #cProfile.run('main()',sort=1) + main() +