From 3a5bae5b41a9d6eb9ca18ca6c4aa13bc38998464 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 16:39:59 -0700 Subject: [PATCH] De-lint ligand_pka_values.py --- propka/group.py | 8 +- propka/ligand_pka_values.py | 207 ++++++++++++++++++++++++------------ 2 files changed, 142 insertions(+), 73 deletions(-) diff --git a/propka/group.py b/propka/group.py index 49e2552..fa746bf 100644 --- a/propka/group.py +++ b/propka/group.py @@ -2,7 +2,7 @@ import math import propka.ligand import propka.protonate -from propka.ligand_pka_values import ligand_pka_values +from propka.ligand_pka_values import LigandPkaValues from propka.determinant import Determinant from propka.lib import info, warning @@ -1368,10 +1368,10 @@ def is_ligand_group_by_marvin_pkas(parameters, atom): # if not already done # TODO - double-check testing coverage of these functions. if not atom.conformation_container.marvin_pkas_calculated: - lpka = ligand_pka_values(parameters) + lpka = LigandPkaValues(parameters) lpka.get_marvin_pkas_for_molecular_container(atom.molecular_container, - min_pH=parameters.min_ligand_model_pka, - max_pH=parameters.max_ligand_model_pka) + min_ph=parameters.min_ligand_model_pka, + max_ph=parameters.max_ligand_model_pka) if atom.marvin_pka: return TitratableLigandGroup(atom) # Special case of oxygen in carboxyl group not assigned a pka value by marvin diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index 512b7cd..7831cb2 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -1,15 +1,25 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import print_function +"""Ligand pKa values""" +import os +import subprocess +import sys +import propka.molecular_container +import propka.calculations +import propka.parameters +import propka.pdb +import propka.lib from propka.lib import info, warning -import propka.molecular_container, propka.calculations, propka.calculations, propka.parameters, propka.pdb, propka.lib, os, subprocess, sys -class ligand_pka_values: +class LigandPkaValues: + """Ligand pKa value class.""" + def __init__(self, parameters): - self.parameters = parameters + """Initialize object with parameters. + Args: + parameters: parameters + """ + self.parameters = parameters # attempt to find Marvin executables in the path self.molconvert = self.find_in_path('molconvert') self.cxcalc = self.find_in_path('cxcalc') @@ -17,69 +27,130 @@ class ligand_pka_values: info(self.cxcalc) info(self.molconvert) - return + @staticmethod + def find_in_path(program): + """Find a program in the system path. - - def find_in_path(self, program): + Args: + program: program to find + Returns: + location of program + """ path = os.environ.get('PATH').split(os.pathsep) - - l = [i for i in filter(lambda loc: os.access(loc, os.F_OK), - map(lambda dir: os.path.join(dir, program),path))] - - if len(l) == 0: - info('Error: Could not find %s. Please make sure that it is found in the path.' % program) + locs = [i for i in filter(lambda loc: os.access(loc, os.F_OK), \ + map(lambda dir: os.path.join(dir, program), path))] + if len(locs) == 0: + str_ = "'Error: Could not find %s." % program + str_ += ' Please make sure that it is found in the path.' + info(str_) sys.exit(-1) + return locs[0] - return l[0] + def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, max_ph=20): + """Use Marvin executables to get pKas for a PDB file. - def get_marvin_pkas_for_pdb_file(self, file, no_pkas=10, min_pH =-10, max_pH=20): - molecule = propka.molecular_container.Molecular_container(file) - self.get_marvin_pkas_for_molecular_container(molecule, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) - return + Args: + pdbfile: PDB file + num_pkas: number of pKas to get + min_ph: minimum pH value + max_ph: maximum pH value + """ + molecule = propka.molecular_container.Molecular_container(pdbfile) + self.get_marvin_pkas_for_molecular_container(molecule, + num_pkas=num_pkas, + min_ph=min_ph, + max_ph=max_ph) - def get_marvin_pkas_for_molecular_container(self, molecule, no_pkas=10, min_pH =-10, max_pH=20): + def get_marvin_pkas_for_molecular_container(self, molecule, num_pkas=10, min_ph=-10, max_ph=20): + """Use Marvin executables to calculate pKas for a molecular container. + + Args: + molecule: molecular container + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ for name in molecule.conformation_names: - filename = '%s_%s'%(molecule.name,name) - self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], name=filename, reuse=molecule.options.reuse_ligand_mol2_file, - no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + filename = '%s_%s' % (molecule.name, name) + self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], + name=filename, + reuse=molecule.\ + options.reuse_ligand_mol2_file, + num_pkas=num_pkas, + min_ph=min_ph, + max_ph=max_ph) - return + def get_marvin_pkas_for_conformation_container(self, conformation, + name='temp', reuse=False, + num_pkas=10, min_ph=-10, + max_ph=20): + """Use Marvin executables to calculate pKas for a conformation container. - def get_marvin_pkas_for_conformation_container(self, conformation, name = 'temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + conformation: conformation container + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ conformation.marvin_pkas_calculated = True - self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse, - no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), + name=name, reuse=reuse, + num_pkas=num_pkas, min_ph=min_ph, + max_ph=max_ph) - return + def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, + num_pkas=10, min_ph=-10, max_ph=20): + """Use Marvin executables to calculate pKas for a list of atoms. - def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + atoms: list of atoms + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ # do one molecule at the time so we don't confuse marvin molecules = propka.lib.split_atoms_into_molecules(atoms) - for i in range(len(molecules)): + for i, molecule in enumerate(molecules): filename = '%s_%d.mol2'%(name, i+1) - self.get_marvin_pkas_for_molecule(molecules[i], filename=filename, reuse=reuse, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + self.get_marvin_pkas_for_molecule(molecule, filename=filename, + reuse=reuse, num_pkas=num_pkas, + min_ph=min_ph, max_ph=max_ph) - return + def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', + reuse=False, num_pkas=10, min_ph=-10, + max_ph=20): + """Use Marvin executables to calculate pKas for a molecule. - - def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + molecule: the molecule + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ # print out structure unless we are using user-modified structure if not reuse: propka.pdb.write_mol2_for_atoms(atoms, filename) # check that we actually have a file to work with if not os.path.isfile(filename): - warning('Didn\'t find a user-modified file \'%s\' - generating one' % filename) + errstr = "Didn't find a user-modified file '%s' - generating one" \ + % filename + warning(errstr) propka.pdb.write_mol2_for_atoms(atoms, filename) - - - - # Marvin - # calculate pKa values - options = 'pka -a %d -b %d --min %f --max %f -d large'%(no_pkas, no_pkas, min_pH, max_pH) - (output,errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), - stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() - - if len(errors)>0: + # Marvin calculate pKa values + options = 'pka -a %d -b %d --min %f --max %f -d large' % (num_pkas, + num_pkas, + min_ph, + max_ph) + (output, errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).communicate() + if len(errors) > 0: info('********************************************************************************************************') info('* Warning: Marvin execution failed: *') info('* %-100s *' % errors) @@ -87,36 +158,34 @@ class ligand_pka_values: info('* Please edit the ligand mol2 file and re-run PropKa with the -l option: %29s *' % filename) info('********************************************************************************************************') sys.exit(-1) - # extract calculated pkas - indices,pkas,types = self.extract_pkas(output) - + indices, pkas, types = self.extract_pkas(output) # store calculated pka values - for i in range(len(indices)): - atoms[indices[i]].marvin_pka = pkas[i] - atoms[indices[i]].charge = {'a':-1,'b':+1}[types[i]] - info('%s model pKa: %.2f' % (atoms[indices[i]], pkas[i])) + for i, index in enumerate(indices): + atoms[index].marvin_pka = pkas[i] + atoms[index].charge = {'a': -1, 'b': 1}[types[i]] + info('%s model pKa: %.2f' % (atoms[index], pkas[i])) - return + @staticmethod + def extract_pkas(output): + """Extract pKa value from output. - def extract_pkas(self, output): + Args: + output: output string to parse + Returns: + 1. Indices + 2. Values + 3. Types + """ # split output - [tags, values,empty_line] = output.decode().split('\n') - #info(tags) - #info(values) + [tags, values, _] = output.decode().split('\n') tags = tags.split('\t') values = values.split('\t') - # format values - types = [tags[i][0] for i in range(1,len(tags)-1) if len(values)>i and values[i] != ''] - indices = [int(a)-1 for a in values[-1].split(',') if a !=''] - values = [float(v.replace(',','.')) for v in values[1:-1] if v != ''] - + types = [tags[i][0] for i in range(1, len(tags)-1) if len(values) > i \ + and values[i] != ''] + indices = [int(a)-1 for a in values[-1].split(',') if a != ''] + values = [float(v.replace(',', '.')) for v in values[1:-1] if v != ''] if len(indices) != len(values) != len(types): raise Exception('Lengths of atoms and pka values mismatch') - return indices, values, types - - - -