Resolve cyclic import problem.

Fixes https://github.com/jensengroup/propka-3.1/issues/49

Moved I/O into higher level of of code; should make
issue https://github.com/jensengroup/propka-3.1/issues/51
easier to resolve
This commit is contained in:
Nathan Baker
2020-05-30 12:01:30 -07:00
parent b597a6f257
commit 84846aad8c
11 changed files with 107 additions and 92 deletions

View File

@@ -15,6 +15,6 @@ predictions." Journal of Chemical Theory and Computation 7, no. 2 (2011): 525-53
""" """
__all__ = ["atom", "bonds", "calculations", "conformation_container", __all__ = ["atom", "bonds", "calculations", "conformation_container",
"coupled_groups", "determinant", "determinants", "group", "coupled_groups", "determinant", "determinants", "group",
"hybrid36", "iterative", "lib", "ligand_pka_values", "ligand", "hybrid36", "iterative", "input", "lib", "ligand_pka_values",
"molecular_container", "output", "parameters", "pdb", "protonate", "ligand", "molecular_container", "output", "parameters",
"run", "vector_algebra", "version"] "protonate", "run", "vector_algebra", "version"]

View File

@@ -1,7 +1,7 @@
"""Atom class - contains all atom information found in the PDB file""" """Atom class - contains all atom information found in the PDB file"""
import string import string
from . import hybrid36
from propka.lib import make_tidy_atom_label from propka.lib import make_tidy_atom_label
from . import hybrid36
# Format strings that get used in multiple places (or are very complex) # Format strings that get used in multiple places (or are very complex)
@@ -25,7 +25,7 @@ STR_FMT = (
"({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}") "({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}")
class Atom(object): class Atom:
"""Atom class - contains all atom information found in the PDB file""" """Atom class - contains all atom information found in the PDB file"""
def __init__(self, line=None): def __init__(self, line=None):

View File

@@ -36,7 +36,6 @@ class ConformationContainer:
self.groups = [] self.groups = []
self.chains = [] self.chains = []
self.current_iter_item = 0 self.current_iter_item = 0
# TODO - what is marvin_pkas_calculated?
self.marvin_pkas_calculated = False self.marvin_pkas_calculated = False
self.non_covalently_coupled_groups = False self.non_covalently_coupled_groups = False

View File

@@ -1,5 +1,8 @@
"""Input routines.""" """Input routines."""
from pathlib import Path
from pkg_resources import resource_filename from pkg_resources import resource_filename
from propka.lib import protein_precheck
from propka.output import write_propka
from propka.atom import Atom from propka.atom import Atom
from propka.conformation_container import ConformationContainer from propka.conformation_container import ConformationContainer
from propka.group import initialize_atom_group from propka.group import initialize_atom_group
@@ -27,6 +30,67 @@ def open_file_for_reading(input_file):
return file_ return file_
def read_molecule_file(input_file, mol_container):
"""Read input file (PDB or PROPKA) for a molecular container
Args
input_file: input file to read
mol_container: MolecularContainer object
Returns
updated MolecularContainer object
Raises
ValuError if invalid input given
"""
input_path = Path(input_file)
mol_container.name = input_path.stem
input_file_extension = input_path.suffix
if input_file_extension.lower() == '.pdb':
# input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations
conformations, conformation_names = read_pdb(
input_path, mol_container.version.parameters, mol_container)
if len(conformations) == 0:
str_ = ('Error: The pdb file does not seems to contain any '
'molecular conformations')
raise ValueError(str_)
mol_container.conformations = conformations
mol_container.conformation_names = conformation_names
mol_container.top_up_conformations()
# make a structure precheck
protein_precheck(
mol_container.conformations, mol_container.conformation_names)
# set up atom bonding and protonation
mol_container.version.setup_bonding_and_protonation(mol_container)
# Extract groups
mol_container.extract_groups()
# sort atoms
for name in mol_container.conformation_names:
mol_container.conformations[name].sort_atoms()
# find coupled groups
mol_container.find_covalently_coupled_groups()
# write out the input file
# TODO - figure out why this I/O has to happen here
output_path = Path(input_path.name.replace(
input_file_extension, '.propka_input'))
write_propka(mol_container, output_path)
elif input_file_extension.lower() == '.propka_input':
# input is a propka_input file
conformations, conformation_names = read_propka(
input_file, mol_container.version.parameters, mol_container)
mol_container.conformations = conformations
mol_container.conformation_names = conformation_names
# Extract groups - this merely sets up the groups found in the
# input file
mol_container.extract_groups()
# do some additional set up
mol_container.additional_setup_when_reading_input_file()
else:
str_ = "Unknown input file type {0!s} for file {1!s}".format(
input_file_extension, input_path)
raise ValueError(str_)
return mol_container
def read_parameter_file(input_file, parameters): def read_parameter_file(input_file, parameters):
"""Read a parameter file. """Read a parameter file.
@@ -47,7 +111,6 @@ def read_parameter_file(input_file, parameters):
return parameters return parameters
def conformation_sorter(conf): def conformation_sorter(conf):
"""TODO - figure out what this function does.""" """TODO - figure out what this function does."""
model = int(conf[:-1]) model = int(conf[:-1])
@@ -121,7 +184,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
terminal = None terminal = None
def read_input(input_file, parameters, molecule): def read_propka(input_file, parameters, molecule):
"""Read PROPKA input file for molecular container. """Read PROPKA input file for molecular container.
Args: Args:
@@ -235,5 +298,3 @@ def read_pdb(pdb_file, parameters, molecule):
# make a sorted list of conformation names # make a sorted list of conformation names
names = sorted(conformations.keys(), key=conformation_sorter) names = sorted(conformations.keys(), key=conformation_sorter)
return [conformations, names] return [conformations, names]

View File

@@ -2,11 +2,8 @@
import os import os
import subprocess import subprocess
import sys import sys
import propka.molecular_container
import propka.calculations
import propka.parameters
from propka.output import write_mol2_for_atoms from propka.output import write_mol2_for_atoms
from propka.lib import info, warning from propka.lib import info, warning, split_atoms_into_molecules
class LigandPkaValues: class LigandPkaValues:
@@ -47,17 +44,17 @@ class LigandPkaValues:
sys.exit(-1) sys.exit(-1)
return locs[0] return locs[0]
def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, def get_marvin_pkas_for_pdb_file(
max_ph=20): self, molecule, parameters, num_pkas=10, min_ph=-10, max_ph=20):
"""Use Marvin executables to get pKas for a PDB file. """Use Marvin executables to get pKas for a PDB file.
Args: Args:
pdbfile: PDB file pdbfile: PDB file
molecule: MolecularContainer object
num_pkas: number of pKas to get num_pkas: number of pKas to get
min_ph: minimum pH value min_ph: minimum pH value
max_ph: maximum pH value max_ph: maximum pH value
""" """
molecule = propka.molecular_container.Molecular_container(pdbfile)
self.get_marvin_pkas_for_molecular_container( self.get_marvin_pkas_for_molecular_container(
molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
@@ -110,7 +107,7 @@ class LigandPkaValues:
max_ph: maximum pH value max_ph: maximum pH value
""" """
# do one molecule at the time so we don't confuse marvin # do one molecule at the time so we don't confuse marvin
molecules = propka.lib.split_atoms_into_molecules(atoms) molecules = split_atoms_into_molecules(atoms)
for i, molecule in enumerate(molecules): for i, molecule in enumerate(molecules):
filename = '{0:s}_{1:d}.mol2'.format(name, i+1) filename = '{0:s}_{1:d}.mol2'.format(name, i+1)
self.get_marvin_pkas_for_molecule( self.get_marvin_pkas_for_molecule(
@@ -196,4 +193,3 @@ class LigandPkaValues:
if len(indices) != len(values) != len(types): if len(indices) != len(values) != len(types):
raise Exception('Lengths of atoms and pka values mismatch') raise Exception('Lengths of atoms and pka values mismatch')
return indices, values, types return indices, values, types

View File

@@ -1,12 +1,8 @@
"""Molecular container for storing all contents of PDB files.""" """Molecular container for storing all contents of PDB files."""
import os import os
import sys
import propka.version import propka.version
from propka.input import read_pdb, read_input, read_parameter_file
from propka.parameters import Parameters
from propka.output import write_input
from propka.conformation_container import ConformationContainer from propka.conformation_container import ConformationContainer
from propka.lib import info, warning, protein_precheck, make_grid from propka.lib import info, warning, make_grid
# TODO - these are constants whose origins are a little murky # TODO - these are constants whose origins are a little murky
@@ -15,36 +11,26 @@ UNK_PI_CUTOFF = 0.01
MAX_ITERATION = 4 MAX_ITERATION = 4
class Molecular_container: class MolecularContainer:
"""Container for storing molecular contents of PDB files. """Container for storing molecular contents of PDB files.
TODO - this class name does not conform to PEP8 but has external use. TODO - this class name does not conform to PEP8 but has external use.
We should deprecate and change eventually. We should deprecate and change eventually.
""" """
def __init__(self, input_file, options=None): def __init__(self, parameters, options=None):
"""Initialize molecular container. """Initialize molecular container.
Args: Args:
input_file: molecular input file parameters: Parameters() object
options: options object options: options object
""" """
# printing out header before parsing input # printing out header before parsing input
propka.output.print_header() propka.output.print_header()
# set up some values self.conformation_names = []
self.conformations = {}
self.options = options self.options = options
self.input_file = input_file self.name = None
# TODO - replace this indelicate os.path code with pathlib
self.dir = os.path.split(input_file)[0]
self.file = os.path.split(input_file)[1]
self.name = self.file[0:self.file.rfind('.')]
input_file_extension = input_file[input_file.rfind('.'):]
parameters = Parameters()
if options:
parameters = read_parameter_file(
self.options.parameters, parameters)
else:
parameters = read_parameter_file('propka.cfg', parameters)
try: try:
version_class = getattr(propka.version, parameters.version) version_class = getattr(propka.version, parameters.version)
self.version = version_class(parameters) self.version = version_class(parameters)
@@ -53,44 +39,6 @@ class Molecular_container:
errstr = 'Error: Version {0:s} does not exist'.format( errstr = 'Error: Version {0:s} does not exist'.format(
parameters.version) parameters.version)
raise Exception(errstr) raise Exception(errstr)
# read the input file
if input_file_extension[0:4] == '.pdb':
# input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations
[self.conformations, self.conformation_names] = (
read_pdb(input_file, self.version.parameters, self))
if len(self.conformations) == 0:
info('Error: The pdb file does not seems to contain any '
'molecular conformations')
sys.exit(-1)
self.top_up_conformations()
# make a structure precheck
protein_precheck(
self.conformations, self.conformation_names)
# set up atom bonding and protonation
self.version.setup_bonding_and_protonation(self)
# Extract groups
self.extract_groups()
# sort atoms
for name in self.conformation_names:
self.conformations[name].sort_atoms()
# find coupled groups
self.find_covalently_coupled_groups()
# write out the input file
filename = self.file.replace(input_file_extension, '.propka_input')
write_input(self, filename)
elif input_file_extension == '.propka_input':
#input is a propka_input file
[self.conformations, self.conformation_names] = read_input(
input_file, self.version.parameters, self)
# Extract groups - this merely sets up the groups found in the
# input file
self.extract_groups()
# do some additional set up
self.additional_setup_when_reading_input_file()
else:
info('Unrecognized input file:{0:s}'.format(input_file))
sys.exit(-1)
def top_up_conformations(self): def top_up_conformations(self):
"""Makes sure that all atoms are present in all conformations.""" """Makes sure that all atoms are present in all conformations."""

View File

@@ -582,7 +582,7 @@ def write_mol2_for_atoms(atoms, filename):
out.write(substructure_section) out.write(substructure_section)
out.close() out.close()
def write_input(molecular_container, filename): def write_propka(molecular_container, filename):
"""Write PROPKA input file for molecular container. """Write PROPKA input file for molecular container.
Args: Args:

View File

@@ -1,6 +1,4 @@
"""Holds parameters and settings.""" """Holds parameters and settings."""
import pkg_resources
import propka.lib as lib
from propka.lib import info, warning from propka.lib import info, warning
@@ -346,7 +344,7 @@ O2
'N1', 'O2', 'OP', 'SH'] 'N1', 'O2', 'OP', 'SH']
lines = [ lines = [
"", "",
"\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)), "\\begin{{longtable}}{{{0:s}}}".format('l'*len(agroups)),
("\\caption{{Ligand interaction parameters. For interactions not " ("\\caption{{Ligand interaction parameters. For interactions not "
"listed, the default value of {0:s} is applied.}}").format( "listed, the default value of {0:s} is applied.}}").format(
str(self.sidechain_cutoffs.default)), str(self.sidechain_cutoffs.default)),

View File

@@ -1,7 +1,9 @@
"""Entry point for PROPKA script.""" """Entry point for PROPKA script."""
import logging import logging
from propka.lib import loadOptions from propka.lib import loadOptions
from propka.molecular_container import Molecular_container from propka.input import read_parameter_file, read_molecule_file
from propka.parameters import Parameters
from propka.molecular_container import MolecularContainer
_LOGGER = logging.getLogger("PROPKA") _LOGGER = logging.getLogger("PROPKA")
@@ -13,8 +15,10 @@ def main(optargs=None):
optargs = optargs if optargs is not None else [] optargs = optargs if optargs is not None else []
options = loadOptions(*optargs) options = loadOptions(*optargs)
pdbfiles = options.filenames pdbfiles = options.filenames
parameters = read_parameter_file(options.parameters, Parameters())
for pdbfile in pdbfiles: for pdbfile in pdbfiles:
my_molecule = Molecular_container(pdbfile, options) my_molecule = MolecularContainer(parameters, options)
my_molecule = read_molecule_file(pdbfile, my_molecule)
my_molecule.calculate_pka() my_molecule.calculate_pka()
my_molecule.write_pka() my_molecule.write_pka()
@@ -33,9 +37,11 @@ def single(pdbfile, optargs=None):
optargs = optargs if optargs is not None else [] optargs = optargs if optargs is not None else []
options = loadOptions(*optargs) options = loadOptions(*optargs)
pdbfile = options.filenames.pop(0) pdbfile = options.filenames.pop(0)
parameters = read_parameter_file(options.parameters, Parameters())
if len(options.filenames) > 0: if len(options.filenames) > 0:
_LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames)) _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames))
my_molecule = Molecular_container(pdbfile, options) my_molecule = MolecularContainer(parameters, options)
my_molecule = read_molecule_file(pdbfile, my_molecule)
my_molecule.calculate_pka() my_molecule.calculate_pka()
my_molecule.write_pka() my_molecule.write_pka()
return my_molecule return my_molecule

View File

@@ -11,7 +11,9 @@ is the same as the module name; that's why the new script is called
propka31.) propka31.)
""" """
from propka.lib import loadOptions from propka.lib import loadOptions
from propka.molecular_container import Molecular_container from propka.input import read_parameter_file, read_molecule_file
from propka.parameters import Parameters
from propka.molecular_container import MolecularContainer
def main(): def main():
@@ -19,9 +21,11 @@ def main():
# loading options, flaggs and arguments # loading options, flaggs and arguments
options = loadOptions([]) options = loadOptions([])
pdbfiles = options.filenames pdbfiles = options.filenames
parameters = read_parameter_file(options.parameters, Parameters())
for pdbfile in pdbfiles: for pdbfile in pdbfiles:
my_molecule = Molecular_container(pdbfile, options) my_molecule = MolecularContainer(parameters, options)
my_molecule = read_molecule_file(pdbfile, my_molecule)
my_molecule.calculate_pka() my_molecule.calculate_pka()
my_molecule.write_pka() my_molecule.write_pka()

View File

@@ -5,8 +5,10 @@ import re
from pathlib import Path from pathlib import Path
import pytest import pytest
from numpy.testing import assert_almost_equal from numpy.testing import assert_almost_equal
import propka.lib from propka.parameters import Parameters
import propka.molecular_container from propka.molecular_container import MolecularContainer
from propka.input import read_parameter_file, read_molecule_file
from propka.lib import loadOptions
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
@@ -64,15 +66,16 @@ def run_propka(options, pdb_path, tmp_path):
tmp_path: path for working directory tmp_path: path for working directory
""" """
options += [str(pdb_path)] options += [str(pdb_path)]
args = propka.lib.loadOptions(options) args = loadOptions(options)
try: try:
_LOGGER.warning( _LOGGER.warning(
"Working in tmpdir {0:s} because of PROPKA file output; " "Working in tmpdir {0:s} because of PROPKA file output; "
"need to fix this.".format(str(tmp_path))) "need to fix this.".format(str(tmp_path)))
cwd = Path.cwd() cwd = Path.cwd()
os.chdir(tmp_path) os.chdir(tmp_path)
molecule = propka.molecular_container.Molecular_container( parameters = read_parameter_file(args.parameters, Parameters())
str(pdb_path), args) molecule = MolecularContainer(parameters, args)
molecule = read_molecule_file(str(pdb_path), molecule)
molecule.calculate_pka() molecule.calculate_pka()
molecule.write_pka() molecule.write_pka()
finally: finally: