diff --git a/propka/molecular_container.py b/propka/molecular_container.py index 74bf566..092989d 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -1,145 +1,140 @@ -#!/usr/bin/python -# -# Molecular container for storing all contents of pdb files -# -# -from __future__ import division -from __future__ import print_function - -import os, sys - -import propka.pdb, propka.version, propka.output, propka.conformation_container, propka.group, propka.lib +"""Molecular container for storing all contents of PDB files.""" +import os +import sys +import propka.pdb +import propka.version +import propka.output +import propka.group +import propka.lib +from propka.conformation_container import ConformationContainer from propka.lib import info, warning + +# TODO - these are constants whose origins are a little murky +UNK_PI_CUTOFF = 0.01 +# Maximum number of iterations for finding PI +MAX_ITERATION = 4 + + class Molecular_container: + """Container for storing molecular contents of PDB files.""" + def __init__(self, input_file, options=None): + """Initialize molecular container. + + Args: + input_file: molecular input file + options: options object + """ # printing out header before parsing input propka.output.printHeader() - # set up some values self.options = options self.input_file = input_file + # TODO - replace this indelicate os.path code with pathlib self.dir = os.path.split(input_file)[0] self.file = os.path.split(input_file)[1] self.name = self.file[0:self.file.rfind('.')] input_file_extension = input_file[input_file.rfind('.'):] - # set the version if options: parameters = propka.parameters.Parameters(self.options.parameters) else: parameters = propka.parameters.Parameters('propka.cfg') try: - exec('self.version = propka.version.%s(parameters)'%parameters.version) + version_class = getattr(propka.version, parameters.version) + self.version = version_class(parameters) except: - raise Exception('Error: Version %s does not exist'%parameters.version) - + errstr = 'Error: Version %s does not exist' % parameters.version + raise Exception(errstr) # read the input file if input_file_extension[0:4] == '.pdb': - # input is a pdb file - # read in atoms and top up containers to make sure that all atoms are present in all conformations - [self.conformations, self.conformation_names] = propka.pdb.read_pdb(input_file, self.version.parameters,self) - if len(self.conformations)==0: + # input is a pdb file. read in atoms and top up containers to make + # sure that all atoms are present in all conformations + [self.conformations, self.conformation_names] \ + = propka.pdb.read_pdb(input_file, self.version.parameters, self) + if len(self.conformations) == 0: info('Error: The pdb file does not seems to contain any molecular conformations') sys.exit(-1) - self.top_up_conformations() - # make a structure precheck - propka.pdb.protein_precheck(self.conformations, self.conformation_names) - + propka.pdb.protein_precheck(self.conformations, + self.conformation_names) # set up atom bonding and protonation self.version.setup_bonding_and_protonation(self) - # Extract groups self.extract_groups() - # sort atoms for name in self.conformation_names: self.conformations[name].sort_atoms() - # find coupled groups self.find_covalently_coupled_groups() - # write out the input file - filename = self.file.replace(input_file_extension,'.propka_input') + filename = self.file.replace(input_file_extension, '.propka_input') propka.pdb.write_input(self, filename) - - elif input_file_extension == '.propka_input': #input is a propka_input file - [self.conformations, self.conformation_names] = propka.pdb.read_input(input_file, self.version.parameters, self) - - # Extract groups - this merely sets up the groups found in the input file + [self.conformations, self.conformation_names] \ + = propka.pdb.read_input(input_file, self.version.parameters, + self) + # Extract groups - this merely sets up the groups found in the + # input file self.extract_groups() - # do some additional set up self.additional_setup_when_reading_input_file() - else: info('Unrecognized input file:%s' % input_file) sys.exit(-1) - - return def top_up_conformations(self): - """ Makes sure that all atoms are present in all conformations """ + """Makes sure that all atoms are present in all conformations.""" for name in self.conformation_names: - if name!='1A' and (len(self.conformations[name]) < len(self.conformations['1A'])): + if name != '1A' and (len(self.conformations[name]) \ + < len(self.conformations['1A'])): self.conformations[name].top_up(self.conformations['1A']) - return - def find_covalently_coupled_groups(self): + """Find covalently coupled groups.""" info('-' * 103) for name in self.conformation_names: self.conformations[name].find_covalently_coupled_groups() - return - def find_non_covalently_coupled_groups(self): + """Find non-covalently coupled groups.""" info('-' * 103) + verbose = self.options.display_coupled_residues for name in self.conformation_names: - self.conformations[name].find_non_covalently_coupled_groups(verbose=self.options.display_coupled_residues) - - return + self.conformations[name].find_non_covalently_coupled_groups(verbose=verbose) def extract_groups(self): - """ Identify the groups needed for pKa calculation """ + """Identify the groups needed for pKa calculation.""" for name in self.conformation_names: self.conformations[name].extract_groups() - return - def additional_setup_when_reading_input_file(self): + """Additional setup.""" for name in self.conformation_names: self.conformations[name].additional_setup_when_reading_input_file() - return - - def calculate_pka(self): + """Calculate pKa values.""" # calculate for each conformation for name in self.conformation_names: self.conformations[name].calculate_pka(self.version, self.options) - # find non-covalently coupled groups self.find_non_covalently_coupled_groups() - # find the average of the conformations self.average_of_conformations() - # print out the conformation-average results propka.output.printResult(self, 'AVR', self.version.parameters) - return - def average_of_conformations(self): + """Generate an average of conformations.""" + parameters = self.conformations[self.conformation_names[0]].parameters # make a new configuration to hold the average values - avr_conformation = propka.conformation_container.ConformationContainer(name='average', - parameters=self.conformations[self.conformation_names[0]].parameters, - molecular_container=self) - + avr_conformation = ConformationContainer(name='average', + parameters=parameters, + molecular_container=self) container = self.conformations[self.conformation_names[0]] for group in container.get_groups_for_calculations(): # new group to hold average values @@ -150,104 +145,133 @@ class Molecular_container: if group_to_add: avr_group += group_to_add else: - warning('Group %s could not be found in conformation %s.' % (group.atom.residue_label, name)) + str_ = 'Group %s could not be found in conformation %s.' \ + % (group.atom.residue_label, name) + warning(str_) # ... and store the average value avr_group = avr_group / len(self.conformation_names) avr_conformation.groups.append(avr_group) - # store information on coupling in the average container - if len(list(filter(lambda c: c.non_covalently_coupled_groups, self.conformations.values()))): + if len(list(filter(lambda c: c.non_covalently_coupled_groups, + self.conformations.values()))): avr_conformation.non_covalently_coupled_groups = True - # store chain info avr_conformation.chains = self.conformations[self.conformation_names[0]].chains - self.conformations['AVR'] = avr_conformation - return - def write_pka(self, filename=None, reference="neutral", direction="folding", options=None): - #for name in self.conformation_names: - # propka.output.writePKA(self, self.version.parameters, filename='%s_3.1_%s.pka'%(self.name, name), - # conformation=name,reference=reference, - # direction=direction, options=options) + def write_pka(self, filename=None, reference="neutral", + direction="folding", options=None): + """Write pKa information to a file. + Args: + filename: file to write to + reference: reference state + direction: folding vs. unfolding + options: options object + """ # write out the average conformation - filename=os.path.join('%s.pka'%(self.name)) - - # if the display_coupled_residues option is true, - # write the results out to an alternative pka file + filename = os.path.join('%s.pka' % (self.name)) + # if the display_coupled_residues option is true, write the results out + # to an alternative pka file if self.options.display_coupled_residues: - filename=os.path.join('%s_alt_state.pka'%(self.name)) - - if hasattr(self.version.parameters, 'output_file_tag') and len(self.version.parameters.output_file_tag)>0: - filename=os.path.join('%s_%s.pka'%(self.name,self.version.parameters.output_file_tag)) - + filename = os.path.join('%s_alt_state.pka' % (self.name)) + if hasattr(self.version.parameters, 'output_file_tag') \ + and len(self.version.parameters.output_file_tag) > 0: + filename = os.path.join('%s_%s.pka' % (self.name, + self.version.parameters.output_file_tag)) propka.output.writePKA(self, self.version.parameters, filename=filename, - conformation='AVR',reference=reference, + conformation='AVR', reference=reference, direction=direction, options=options) - return + def get_folding_profile(self, conformation='AVR', reference="neutral", + grid=[0., 14., 0.1]): + """Get a folding profile. - def getFoldingProfile(self, conformation='AVR',reference="neutral", direction="folding", grid=[0., 14., 0.1], options=None): + Args: + conformation: conformation to select + reference: reference state + direction: folding direction (folding) + grid: the grid of pH values [min, max, step_size] + options: options object + Returns: + TODO - figure out what these are + 1. profile + 2. opt + 3. range_80pct + 4. stability_range + """ # calculate stability profile profile = [] for ph in propka.lib.make_grid(*grid): - ddg = self.conformations[conformation].calculate_folding_energy( ph=ph, reference=reference) - #info(ph,ddg) + conf = self.conformations[conformation] + ddg = conf.calculate_folding_energy(ph=ph, reference=reference) profile.append([ph, ddg]) - # find optimum - opt =[None, 1e6] + opt = [None, 1e6] for point in profile: - opt = min(opt, point, key=lambda v:v[1]) - + opt = min(opt, point, key=lambda v: v[1]) # find values within 80 % of optimum range_80pct = [None, None] - values_within_80pct = [p[0] for p in profile if p[1]< 0.8*opt[1]] - if len(values_within_80pct)>0: + values_within_80pct = [p[0] for p in profile if p[1] < 0.8*opt[1]] + if len(values_within_80pct) > 0: range_80pct = [min(values_within_80pct), max(values_within_80pct)] - # find stability range stability_range = [None, None] - stable_values = [p[0] for p in profile if p[1]< 0.0] - - if len(stable_values)>0: + stable_values = [p[0] for p in profile if p[1] < 0.0] + if len(stable_values) > 0: stability_range = [min(stable_values), max(stable_values)] - return profile, opt, range_80pct, stability_range + def get_charge_profile(self, conformation='AVR', grid=[0., 14., .1]): + """Get charge profile for conformation as function of pH. - def getChargeProfile(self, conformation='AVR', grid=[0., 14., .1]): + Args: + conformation: conformation to test + grid: grid of pH values [min, max, step] + Returns: + list of charge state values + """ charge_profile = [] for ph in propka.lib.make_grid(*grid): - q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, ph=ph) + conf = self.conformations[conformation] + q_unfolded, q_folded = conf.calculate_charge(self.version.parameters, + ph=ph) charge_profile.append([ph, q_unfolded, q_folded]) - return charge_profile - def getPI(self, conformation='AVR', grid=[0., 14., 1], iteration=0): - #info('staring',grid, iteration) - # search - charge_profile = self.getChargeProfile(conformation=conformation, grid=grid) - pi = [] - pi_folded = pi_unfolded = [None, 1e6,1e6] - for point in charge_profile: - pi_folded = min(pi_folded, point, key=lambda v:abs(v[2])) - pi_unfolded = min(pi_unfolded, point, key=lambda v:abs(v[1])) + def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0): + """Get the isoelectric points for folded and unfolded states. - # If results are not good enough, do it again with a higher sampling resolution - pi_folded_value = pi_folded[0] + Args: + conformation: conformation to test + grid: grid of pH values [min, max, step] + iteration: iteration number of process + Returns: + 1. Folded state PI + 2. Unfolded state PI + """ + charge_profile = self.get_charge_profile(conformation=conformation, + grid=grid) + pi_folded = pi_unfolded = [None, 1e6, 1e6] + for point in charge_profile: + pi_folded = min(pi_folded, point, key=lambda v: abs(v[2])) + pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1])) + # If results are not good enough, do it again with a higher sampling + # resolution + pi_folded_value = pi_folded[0] pi_unfolded_value = pi_unfolded[0] step = grid[2] - if (pi_folded[2] > 0.01 or pi_unfolded[1] > 0.01) and iteration<4: - pi_folded_value, x = self.getPI(conformation=conformation, grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], iteration=iteration+1) - x, pi_unfolded_value = self.getPI(conformation=conformation, grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], iteration=iteration+1) - - + # TODO - need to warn if maximum number of iterations is exceeded + if (pi_folded[2] > UNK_PI_CUTOFF or pi_unfolded[1] > UNK_PI_CUTOFF) \ + and iteration < MAX_ITERATION: + pi_folded_value, _ = self.get_pi(conformation=conformation, + grid=[pi_folded[0]-step, + pi_folded[0]+step, + step/10.0], + iteration=iteration+1) + _, pi_unfolded_value = self.get_pi(conformation=conformation, + grid=[pi_unfolded[0]-step, + pi_unfolded[0]+step, + step/10.0], + iteration=iteration+1) return pi_folded_value, pi_unfolded_value - - - -if __name__ == '__main__': - input_file = sys.argv[1] - mc = Molecular_container(input_file) diff --git a/propka/output.py b/propka/output.py index 6e15d32..93c0e05 100644 --- a/propka/output.py +++ b/propka/output.py @@ -76,10 +76,10 @@ def writePKA(protein, parameters, filename=None, conformation ='1A',reference="n str += "%s\n" % ( getTheLine() ) # printing Folding Profile - str += getFoldingProfileSection(protein, conformation=conformation, reference=reference, direction=direction, window=[0., 14., 1.0], options=options) + str += get_folding_profileSection(protein, conformation=conformation, reference=reference, window=[0., 14., 1.0]) # printing Protein Charge Profile - str += getChargeProfileSection(protein, conformation=conformation) + str += get_charge_profileSection(protein, conformation=conformation) # now, writing the pka text to file file.write(str) @@ -156,7 +156,7 @@ def getSummarySection(protein, conformation, parameters): return str -def getFoldingProfileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): +def get_folding_profileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): """ returns the protein-folding-profile section """ @@ -164,9 +164,9 @@ def getFoldingProfileSection(protein, conformation='AVR', direction="folding", r str += "\n" str += "Free energy of %9s (kcal/mol) as a function of pH (using %s reference)\n" % (direction, reference) - profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.getFoldingProfile(conformation=conformation, + profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.get_folding_profile(conformation=conformation, reference=reference, - direction=direction, grid=[0., 14., 0.1], options=options) + grid=[0., 14., 0.1]) if profile == None: str += "Could not determine folding profile\n" else: @@ -195,13 +195,13 @@ def getFoldingProfileSection(protein, conformation='AVR', direction="folding", r -def getChargeProfileSection(protein, conformation='AVR', options=None): +def get_charge_profileSection(protein, conformation='AVR', options=None): """ returns the protein-folding-profile section """ str = "Protein charge of folded and unfolded state as a function of pH\n" - profile = protein.getChargeProfile(conformation=conformation,grid=[0., 14., 1.]) + profile = protein.get_charge_profile(conformation=conformation,grid=[0., 14., 1.]) if profile == None: str += "Could not determine charge profile\n" else: @@ -210,7 +210,7 @@ def getChargeProfileSection(protein, conformation='AVR', options=None): str += "%6.2lf%10.2lf%8.2lf\n" % (pH, Q_mod, Q_pro) - pI_pro, pI_mod = protein.getPI(conformation=conformation) + pI_pro, pI_mod = protein.get_pi(conformation=conformation) if pI_pro == None or pI_mod == None: str += "Could not determine the pI\n\n" else: