diff --git a/propka/bonds.py b/propka/bonds.py index 850c2ed..4d58dec 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -3,6 +3,7 @@ from __future__ import division from __future__ import print_function import pickle,sys,os,math,propka.calculations +import json import pkg_resources from propka.lib import info, warning @@ -28,11 +29,9 @@ class bondmaker: self.max_sq_distance = max(list(self.distances_squared.values())+[self.default_dist_squared]) # protein bonding data - self.data_file_name = pkg_resources.resource_filename(__name__, 'protein_bonds.dat') - - data = open(self.data_file_name,'rb') - self.protein_bonds = pickle.load(data) - data.close() + self.data_file_name = pkg_resources.resource_filename(__name__, 'protein_bonds.json') + with open(self.data_file_name,'rt') as json_file: + self.protein_bonds = json.load(json_file) self.intra_residue_backbone_bonds = {'N': ['CA'], diff --git a/propka/protein_bonds.dat b/propka/protein_bonds.dat deleted file mode 100644 index fc6f5d8..0000000 --- a/propka/protein_bonds.dat +++ /dev/null @@ -1,568 +0,0 @@ -(dp0 -S'CYS' -p1 -(dp2 -S'CB' -p3 -(lp4 -S'CA' -p5 -aS'SG' -p6 -asg5 -(lp7 -g3 -asg6 -(lp8 -g3 -aS'SG' -p9 -assS'GLN' -p10 -(dp11 -S'CB' -p12 -(lp13 -S'CA' -p14 -aS'CG' -p15 -asg14 -(lp16 -g12 -asg15 -(lp17 -g12 -aS'CD' -p18 -asg18 -(lp19 -g15 -aS'OE1' -p20 -aS'NE2' -p21 -asg21 -(lp22 -g18 -asg20 -(lp23 -g18 -assS'HIS' -p24 -(dp25 -S'CD2' -p26 -(lp27 -S'CG' -p28 -aS'NE2' -p29 -asS'CB' -p30 -(lp31 -S'CA' -p32 -ag28 -asg32 -(lp33 -g30 -asg28 -(lp34 -g30 -aS'ND1' -p35 -ag26 -asS'CE1' -p36 -(lp37 -g35 -ag29 -asg35 -(lp38 -g28 -ag36 -asg29 -(lp39 -g26 -ag36 -assS'ASN' -p40 -(dp41 -S'CB' -p42 -(lp43 -S'CA' -p44 -aS'CG' -p45 -asg44 -(lp46 -g42 -asS'ND2' -p47 -(lp48 -g45 -asg45 -(lp49 -g42 -aS'OD1' -p50 -ag47 -asg50 -(lp51 -g45 -assS'VAL' -p52 -(dp53 -S'CG1' -p54 -(lp55 -S'CB' -p56 -asg56 -(lp57 -S'CA' -p58 -ag54 -aS'CG2' -p59 -asg58 -(lp60 -g56 -asg59 -(lp61 -g56 -assS'LYS' -p62 -(dp63 -S'CB' -p64 -(lp65 -S'CA' -p66 -aS'CG' -p67 -asg66 -(lp68 -g64 -asg67 -(lp69 -g64 -aS'CD' -p70 -asS'CE' -p71 -(lp72 -g70 -aS'NZ' -p73 -asg70 -(lp74 -g67 -ag71 -asg73 -(lp75 -g71 -assS'ILE' -p76 -(dp77 -S'CG1' -p78 -(lp79 -S'CB' -p80 -aS'CD1' -p81 -asg80 -(lp82 -S'CA' -p83 -ag78 -aS'CG2' -p84 -asg83 -(lp85 -g80 -asg84 -(lp86 -g80 -asg81 -(lp87 -g78 -assS'PRO' -p88 -(dp89 -S'CB' -p90 -(lp91 -S'CA' -p92 -aS'CG' -p93 -asg92 -(lp94 -g90 -asS'CD' -p95 -(lp96 -S'N' -p97 -ag93 -asg93 -(lp98 -g90 -ag95 -asg97 -(lp99 -g95 -assS'THR' -p100 -(dp101 -S'CB' -p102 -(lp103 -S'CA' -p104 -aS'OG1' -p105 -aS'CG2' -p106 -asg104 -(lp107 -g102 -asg106 -(lp108 -g102 -asg105 -(lp109 -g102 -assS'PHE' -p110 -(dp111 -S'CD2' -p112 -(lp113 -S'CG' -p114 -aS'CE2' -p115 -asS'CB' -p116 -(lp117 -S'CA' -p118 -ag114 -asg118 -(lp119 -g116 -asg114 -(lp120 -g116 -aS'CD1' -p121 -ag112 -asS'CZ' -p122 -(lp123 -S'CE1' -p124 -ag115 -asg121 -(lp125 -g114 -ag124 -asg124 -(lp126 -g121 -ag122 -asg115 -(lp127 -g112 -ag122 -assS'ALA' -p128 -(dp129 -S'CB' -p130 -(lp131 -S'CA' -p132 -asg132 -(lp133 -g130 -assS'MET' -p134 -(dp135 -S'CB' -p136 -(lp137 -S'CA' -p138 -aS'CG' -p139 -asg138 -(lp140 -g136 -asg139 -(lp141 -g136 -aS'SD' -p142 -asS'CE' -p143 -(lp144 -g142 -asg142 -(lp145 -g139 -ag143 -assS'ASP' -p146 -(dp147 -S'CB' -p148 -(lp149 -S'CA' -p150 -aS'CG' -p151 -asg150 -(lp152 -g148 -asg151 -(lp153 -g148 -aS'OD1' -p154 -aS'OD2' -p155 -asg155 -(lp156 -g151 -asg154 -(lp157 -g151 -assS'LEU' -p158 -(dp159 -S'CB' -p160 -(lp161 -S'CA' -p162 -aS'CG' -p163 -asg162 -(lp164 -g160 -asg163 -(lp165 -g160 -aS'CD1' -p166 -aS'CD2' -p167 -asg166 -(lp168 -g163 -asg167 -(lp169 -g163 -assS'ARG' -p170 -(dp171 -S'CB' -p172 -(lp173 -S'CA' -p174 -aS'CG' -p175 -asg174 -(lp176 -g172 -asg175 -(lp177 -g172 -aS'CD' -p178 -asS'NE' -p179 -(lp180 -g178 -aS'CZ' -p181 -asg178 -(lp182 -g175 -ag179 -asg181 -(lp183 -g179 -aS'NH1' -p184 -aS'NH2' -p185 -asg184 -(lp186 -g181 -asg185 -(lp187 -g181 -assS'TRP' -p188 -(dp189 -S'CZ2' -p190 -(lp191 -S'CE2' -p192 -aS'CH2' -p193 -asS'CB' -p194 -(lp195 -S'CA' -p196 -aS'CG' -p197 -asg196 -(lp198 -g194 -asg197 -(lp199 -g194 -aS'CD1' -p200 -aS'CD2' -p201 -asg193 -(lp202 -g190 -aS'CZ3' -p203 -asg192 -(lp204 -g201 -aS'NE1' -p205 -ag190 -asS'CE3' -p206 -(lp207 -g201 -ag203 -asg200 -(lp208 -g197 -ag205 -asg201 -(lp209 -g197 -ag192 -ag206 -asg203 -(lp210 -g206 -ag193 -asg205 -(lp211 -g200 -ag192 -assS'GLU' -p212 -(dp213 -S'OE2' -p214 -(lp215 -S'CD' -p216 -asS'CA' -p217 -(lp218 -S'CB' -p219 -asS'CG' -p220 -(lp221 -g219 -ag216 -asg216 -(lp222 -g220 -aS'OE1' -p223 -ag214 -asg219 -(lp224 -g217 -ag220 -asg223 -(lp225 -g216 -assS'TYR' -p226 -(dp227 -S'CD2' -p228 -(lp229 -S'CG' -p230 -aS'CE2' -p231 -asS'OH' -p232 -(lp233 -S'CZ' -p234 -asS'CB' -p235 -(lp236 -S'CA' -p237 -ag230 -asg237 -(lp238 -g235 -asg230 -(lp239 -g235 -aS'CD1' -p240 -ag228 -asg234 -(lp241 -S'CE1' -p242 -ag231 -ag232 -asg240 -(lp243 -g230 -ag242 -asg242 -(lp244 -g240 -ag234 -asg231 -(lp245 -g228 -ag234 -assS'SER' -p246 -(dp247 -S'OG' -p248 -(lp249 -S'CB' -p250 -asg250 -(lp251 -S'CA' -p252 -ag248 -asg252 -(lp253 -g250 -ass. \ No newline at end of file diff --git a/propka/protein_bonds.json b/propka/protein_bonds.json new file mode 100644 index 0000000..b9cd50f --- /dev/null +++ b/propka/protein_bonds.json @@ -0,0 +1,21 @@ +{ + "CYS": { "CB": [ "CA", "SG" ], "CA": [ "CB" ], "SG": [ "CB", "SG" ] }, + "GLN": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "CD" ], "CD": [ "CG", "OE1", "NE2" ], "NE2": [ "CD" ], "OE1": [ "CD" ] }, + "HIS": { "CD2": [ "CG", "NE2" ], "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "ND1", "CD2" ], "CE1": [ "ND1", "NE2" ], "ND1": [ "CG", "CE1" ], "NE2": [ "CD2", "CE1" ] }, + "ASN": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "ND2": [ "CG" ], "CG": [ "CB", "OD1", "ND2" ], "OD1": [ "CG" ] }, + "VAL": { "CG1": [ "CB" ], "CB": [ "CA", "CG1", "CG2" ], "CA": [ "CB" ], "CG2": [ "CB" ] }, + "LYS": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "CD" ], "CE": [ "CD", "NZ" ], "CD": [ "CG", "CE" ], "NZ": [ "CE" ] }, + "ILE": { "CG1": [ "CB", "CD1" ], "CB": [ "CA", "CG1", "CG2" ], "CA": [ "CB" ], "CG2": [ "CB" ], "CD1": [ "CG1" ] }, + "PRO": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CD": [ "N", "CG" ], "CG": [ "CB", "CD" ], "N": [ "CD" ] }, + "THR": { "CB": [ "CA", "OG1", "CG2" ], "CA": [ "CB" ], "CG2": [ "CB" ], "OG1": [ "CB" ] }, + "PHE": { "CD2": [ "CG", "CE2" ], "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "CD1", "CD2" ], "CZ": [ "CE1", "CE2" ], "CD1": [ "CG", "CE1" ], "CE1": [ "CD1", "CZ" ], "CE2": [ "CD2", "CZ" ] }, + "ALA": { "CB": [ "CA" ], "CA": [ "CB" ] }, + "MET": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "SD" ], "CE": [ "SD" ], "SD": [ "CG", "CE" ] }, + "ASP": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "OD1", "OD2" ], "OD2": [ "CG" ], "OD1": [ "CG" ] }, + "LEU": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "CD1", "CD2" ], "CD1": [ "CG" ], "CD2": [ "CG" ] }, + "ARG": { "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "CD" ], "NE": [ "CD", "CZ" ], "CD": [ "CG", "NE" ], "CZ": [ "NE", "NH1", "NH2" ], "NH1": [ "CZ" ], "NH2": [ "CZ" ] }, + "TRP": { "CZ2": [ "CE2", "CH2" ], "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "CD1", "CD2" ], "CH2": [ "CZ2", "CZ3" ], "CE2": [ "CD2", "NE1", "CZ2" ], "CE3": [ "CD2", "CZ3" ], "CD1": [ "CG", "NE1" ], "CD2": [ "CG", "CE2", "CE3" ], "CZ3": [ "CE3", "CH2" ], "NE1": [ "CD1", "CE2" ] }, + "GLU": { "OE2": [ "CD" ], "CA": [ "CB" ], "CG": [ "CB", "CD" ], "CD": [ "CG", "OE1", "OE2" ], "CB": [ "CA", "CG" ], "OE1": [ "CD" ] }, + "TYR": { "CD2": [ "CG", "CE2" ], "OH": [ "CZ" ], "CB": [ "CA", "CG" ], "CA": [ "CB" ], "CG": [ "CB", "CD1", "CD2" ], "CZ": [ "CE1", "CE2", "OH" ], "CD1": [ "CG", "CE1" ], "CE1": [ "CD1", "CZ" ], "CE2": [ "CD2", "CZ" ] }, + "SER": { "OG": [ "CB" ], "CB": [ "CA", "OG" ], "CA": [ "CB" ] } +} \ No newline at end of file