diff --git a/propka/__init__.py b/propka/__init__.py index 8d2447f..ecfacd3 100644 --- a/propka/__init__.py +++ b/propka/__init__.py @@ -15,6 +15,6 @@ predictions." Journal of Chemical Theory and Computation 7, no. 2 (2011): 525-53 """ __all__ = ["atom", "bonds", "calculations", "conformation_container", "coupled_groups", "determinant", "determinants", "group", - "hybrid36", "iterative", "lib", "ligand_pka_values", "ligand", - "molecular_container", "output", "parameters", "pdb", "protonate", - "run", "vector_algebra", "version"] + "hybrid36", "iterative", "input", "lib", "ligand_pka_values", + "ligand", "molecular_container", "output", "parameters", + "protonate", "run", "vector_algebra", "version"] diff --git a/propka/atom.py b/propka/atom.py index 3bb3f12..fcd6825 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -1,7 +1,7 @@ """Atom class - contains all atom information found in the PDB file""" import string -from . import hybrid36 from propka.lib import make_tidy_atom_label +from . import hybrid36 # Format strings that get used in multiple places (or are very complex) @@ -25,7 +25,7 @@ STR_FMT = ( "({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}") -class Atom(object): +class Atom: """Atom class - contains all atom information found in the PDB file""" def __init__(self, line=None): diff --git a/propka/conformation_container.py b/propka/conformation_container.py index d44e30c..d1e57ed 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -36,7 +36,6 @@ class ConformationContainer: self.groups = [] self.chains = [] self.current_iter_item = 0 - # TODO - what is marvin_pkas_calculated? self.marvin_pkas_calculated = False self.non_covalently_coupled_groups = False diff --git a/propka/input.py b/propka/input.py index e8c7f07..675a770 100644 --- a/propka/input.py +++ b/propka/input.py @@ -1,5 +1,8 @@ """Input routines.""" +from pathlib import Path from pkg_resources import resource_filename +from propka.lib import protein_precheck +from propka.output import write_propka from propka.atom import Atom from propka.conformation_container import ConformationContainer from propka.group import initialize_atom_group @@ -27,6 +30,67 @@ def open_file_for_reading(input_file): return file_ +def read_molecule_file(input_file, mol_container): + """Read input file (PDB or PROPKA) for a molecular container + + Args + input_file: input file to read + mol_container: MolecularContainer object + Returns + updated MolecularContainer object + Raises + ValuError if invalid input given + """ + input_path = Path(input_file) + mol_container.name = input_path.stem + input_file_extension = input_path.suffix + if input_file_extension.lower() == '.pdb': + # input is a pdb file. read in atoms and top up containers to make + # sure that all atoms are present in all conformations + conformations, conformation_names = read_pdb( + input_path, mol_container.version.parameters, mol_container) + if len(conformations) == 0: + str_ = ('Error: The pdb file does not seems to contain any ' + 'molecular conformations') + raise ValueError(str_) + mol_container.conformations = conformations + mol_container.conformation_names = conformation_names + mol_container.top_up_conformations() + # make a structure precheck + protein_precheck( + mol_container.conformations, mol_container.conformation_names) + # set up atom bonding and protonation + mol_container.version.setup_bonding_and_protonation(mol_container) + # Extract groups + mol_container.extract_groups() + # sort atoms + for name in mol_container.conformation_names: + mol_container.conformations[name].sort_atoms() + # find coupled groups + mol_container.find_covalently_coupled_groups() + # write out the input file + # TODO - figure out why this I/O has to happen here + output_path = Path(input_path.name.replace( + input_file_extension, '.propka_input')) + write_propka(mol_container, output_path) + elif input_file_extension.lower() == '.propka_input': + # input is a propka_input file + conformations, conformation_names = read_propka( + input_file, mol_container.version.parameters, mol_container) + mol_container.conformations = conformations + mol_container.conformation_names = conformation_names + # Extract groups - this merely sets up the groups found in the + # input file + mol_container.extract_groups() + # do some additional set up + mol_container.additional_setup_when_reading_input_file() + else: + str_ = "Unknown input file type {0!s} for file {1!s}".format( + input_file_extension, input_path) + raise ValueError(str_) + return mol_container + + def read_parameter_file(input_file, parameters): """Read a parameter file. @@ -47,7 +111,6 @@ def read_parameter_file(input_file, parameters): return parameters - def conformation_sorter(conf): """TODO - figure out what this function does.""" model = int(conf[:-1]) @@ -121,7 +184,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, terminal = None -def read_input(input_file, parameters, molecule): +def read_propka(input_file, parameters, molecule): """Read PROPKA input file for molecular container. Args: @@ -235,5 +298,3 @@ def read_pdb(pdb_file, parameters, molecule): # make a sorted list of conformation names names = sorted(conformations.keys(), key=conformation_sorter) return [conformations, names] - - diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index ab7fa28..023845d 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -2,11 +2,8 @@ import os import subprocess import sys -import propka.molecular_container -import propka.calculations -import propka.parameters from propka.output import write_mol2_for_atoms -from propka.lib import info, warning +from propka.lib import info, warning, split_atoms_into_molecules class LigandPkaValues: @@ -47,17 +44,17 @@ class LigandPkaValues: sys.exit(-1) return locs[0] - def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, - max_ph=20): + def get_marvin_pkas_for_pdb_file( + self, molecule, parameters, num_pkas=10, min_ph=-10, max_ph=20): """Use Marvin executables to get pKas for a PDB file. Args: pdbfile: PDB file + molecule: MolecularContainer object num_pkas: number of pKas to get min_ph: minimum pH value max_ph: maximum pH value """ - molecule = propka.molecular_container.Molecular_container(pdbfile) self.get_marvin_pkas_for_molecular_container( molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) @@ -110,7 +107,7 @@ class LigandPkaValues: max_ph: maximum pH value """ # do one molecule at the time so we don't confuse marvin - molecules = propka.lib.split_atoms_into_molecules(atoms) + molecules = split_atoms_into_molecules(atoms) for i, molecule in enumerate(molecules): filename = '{0:s}_{1:d}.mol2'.format(name, i+1) self.get_marvin_pkas_for_molecule( @@ -196,4 +193,3 @@ class LigandPkaValues: if len(indices) != len(values) != len(types): raise Exception('Lengths of atoms and pka values mismatch') return indices, values, types - diff --git a/propka/molecular_container.py b/propka/molecular_container.py index ae82fb9..43ad372 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -1,12 +1,8 @@ """Molecular container for storing all contents of PDB files.""" import os -import sys import propka.version -from propka.input import read_pdb, read_input, read_parameter_file -from propka.parameters import Parameters -from propka.output import write_input from propka.conformation_container import ConformationContainer -from propka.lib import info, warning, protein_precheck, make_grid +from propka.lib import info, warning, make_grid # TODO - these are constants whose origins are a little murky @@ -15,36 +11,26 @@ UNK_PI_CUTOFF = 0.01 MAX_ITERATION = 4 -class Molecular_container: +class MolecularContainer: """Container for storing molecular contents of PDB files. TODO - this class name does not conform to PEP8 but has external use. We should deprecate and change eventually. """ - def __init__(self, input_file, options=None): + def __init__(self, parameters, options=None): """Initialize molecular container. Args: - input_file: molecular input file + parameters: Parameters() object options: options object """ # printing out header before parsing input propka.output.print_header() - # set up some values + self.conformation_names = [] + self.conformations = {} self.options = options - self.input_file = input_file - # TODO - replace this indelicate os.path code with pathlib - self.dir = os.path.split(input_file)[0] - self.file = os.path.split(input_file)[1] - self.name = self.file[0:self.file.rfind('.')] - input_file_extension = input_file[input_file.rfind('.'):] - parameters = Parameters() - if options: - parameters = read_parameter_file( - self.options.parameters, parameters) - else: - parameters = read_parameter_file('propka.cfg', parameters) + self.name = None try: version_class = getattr(propka.version, parameters.version) self.version = version_class(parameters) @@ -53,44 +39,6 @@ class Molecular_container: errstr = 'Error: Version {0:s} does not exist'.format( parameters.version) raise Exception(errstr) - # read the input file - if input_file_extension[0:4] == '.pdb': - # input is a pdb file. read in atoms and top up containers to make - # sure that all atoms are present in all conformations - [self.conformations, self.conformation_names] = ( - read_pdb(input_file, self.version.parameters, self)) - if len(self.conformations) == 0: - info('Error: The pdb file does not seems to contain any ' - 'molecular conformations') - sys.exit(-1) - self.top_up_conformations() - # make a structure precheck - protein_precheck( - self.conformations, self.conformation_names) - # set up atom bonding and protonation - self.version.setup_bonding_and_protonation(self) - # Extract groups - self.extract_groups() - # sort atoms - for name in self.conformation_names: - self.conformations[name].sort_atoms() - # find coupled groups - self.find_covalently_coupled_groups() - # write out the input file - filename = self.file.replace(input_file_extension, '.propka_input') - write_input(self, filename) - elif input_file_extension == '.propka_input': - #input is a propka_input file - [self.conformations, self.conformation_names] = read_input( - input_file, self.version.parameters, self) - # Extract groups - this merely sets up the groups found in the - # input file - self.extract_groups() - # do some additional set up - self.additional_setup_when_reading_input_file() - else: - info('Unrecognized input file:{0:s}'.format(input_file)) - sys.exit(-1) def top_up_conformations(self): """Makes sure that all atoms are present in all conformations.""" diff --git a/propka/output.py b/propka/output.py index a72114e..2ee5f89 100644 --- a/propka/output.py +++ b/propka/output.py @@ -582,7 +582,7 @@ def write_mol2_for_atoms(atoms, filename): out.write(substructure_section) out.close() -def write_input(molecular_container, filename): +def write_propka(molecular_container, filename): """Write PROPKA input file for molecular container. Args: diff --git a/propka/parameters.py b/propka/parameters.py index b708fce..5b26dee 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -1,6 +1,4 @@ """Holds parameters and settings.""" -import pkg_resources -import propka.lib as lib from propka.lib import info, warning @@ -346,7 +344,7 @@ O2 'N1', 'O2', 'OP', 'SH'] lines = [ "", - "\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)), + "\\begin{{longtable}}{{{0:s}}}".format('l'*len(agroups)), ("\\caption{{Ligand interaction parameters. For interactions not " "listed, the default value of {0:s} is applied.}}").format( str(self.sidechain_cutoffs.default)), diff --git a/propka/run.py b/propka/run.py index e851309..633e2c5 100644 --- a/propka/run.py +++ b/propka/run.py @@ -1,7 +1,9 @@ """Entry point for PROPKA script.""" import logging from propka.lib import loadOptions -from propka.molecular_container import Molecular_container +from propka.input import read_parameter_file, read_molecule_file +from propka.parameters import Parameters +from propka.molecular_container import MolecularContainer _LOGGER = logging.getLogger("PROPKA") @@ -13,8 +15,10 @@ def main(optargs=None): optargs = optargs if optargs is not None else [] options = loadOptions(*optargs) pdbfiles = options.filenames + parameters = read_parameter_file(options.parameters, Parameters()) for pdbfile in pdbfiles: - my_molecule = Molecular_container(pdbfile, options) + my_molecule = MolecularContainer(parameters, options) + my_molecule = read_molecule_file(pdbfile, my_molecule) my_molecule.calculate_pka() my_molecule.write_pka() @@ -33,9 +37,11 @@ def single(pdbfile, optargs=None): optargs = optargs if optargs is not None else [] options = loadOptions(*optargs) pdbfile = options.filenames.pop(0) + parameters = read_parameter_file(options.parameters, Parameters()) if len(options.filenames) > 0: _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames)) - my_molecule = Molecular_container(pdbfile, options) + my_molecule = MolecularContainer(parameters, options) + my_molecule = read_molecule_file(pdbfile, my_molecule) my_molecule.calculate_pka() my_molecule.write_pka() return my_molecule diff --git a/scripts/propka31.py b/scripts/propka31.py index d3459c8..8a90114 100755 --- a/scripts/propka31.py +++ b/scripts/propka31.py @@ -11,7 +11,9 @@ is the same as the module name; that's why the new script is called propka31.) """ from propka.lib import loadOptions -from propka.molecular_container import Molecular_container +from propka.input import read_parameter_file, read_molecule_file +from propka.parameters import Parameters +from propka.molecular_container import MolecularContainer def main(): @@ -19,9 +21,11 @@ def main(): # loading options, flaggs and arguments options = loadOptions([]) pdbfiles = options.filenames + parameters = read_parameter_file(options.parameters, Parameters()) for pdbfile in pdbfiles: - my_molecule = Molecular_container(pdbfile, options) + my_molecule = MolecularContainer(parameters, options) + my_molecule = read_molecule_file(pdbfile, my_molecule) my_molecule.calculate_pka() my_molecule.write_pka() diff --git a/tests/test_basic_regression.py b/tests/test_basic_regression.py index cd8036e..9a63854 100644 --- a/tests/test_basic_regression.py +++ b/tests/test_basic_regression.py @@ -5,8 +5,10 @@ import re from pathlib import Path import pytest from numpy.testing import assert_almost_equal -import propka.lib -import propka.molecular_container +from propka.parameters import Parameters +from propka.molecular_container import MolecularContainer +from propka.input import read_parameter_file, read_molecule_file +from propka.lib import loadOptions _LOGGER = logging.getLogger(__name__) @@ -64,15 +66,16 @@ def run_propka(options, pdb_path, tmp_path): tmp_path: path for working directory """ options += [str(pdb_path)] - args = propka.lib.loadOptions(options) + args = loadOptions(options) try: _LOGGER.warning( "Working in tmpdir {0:s} because of PROPKA file output; " "need to fix this.".format(str(tmp_path))) cwd = Path.cwd() os.chdir(tmp_path) - molecule = propka.molecular_container.Molecular_container( - str(pdb_path), args) + parameters = read_parameter_file(args.parameters, Parameters()) + molecule = MolecularContainer(parameters, args) + molecule = read_molecule_file(str(pdb_path), molecule) molecule.calculate_pka() molecule.write_pka() finally: