Remove cyclic import based on I/O in pdb.py.

Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
2020-05-30 10:00:31 -07:00
parent 397d5e10aa
commit b597a6f257
6 changed files with 173 additions and 168 deletions
--- a/propka/input.py
+++ b/propka/input.py
@@ -1,96 +1,58 @@
-"""Read and parse PDB-like input files."""
+"""Input routines."""
-import propka.lib
+from pkg_resources import resource_filename
 from propka.lib import warning
 from propka.atom import Atom
 from propka.group import initialize_atom_group
 from propka.conformation_container import ConformationContainer
 from propka.group import initialize_atom_group
-EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
+def open_file_for_reading(input_file):
-                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
+    """Open file or file-like stream for reading.
                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
-
+    TODO - convert this to a context manager
 def read_pdb(pdb_file, parameters, molecule):
    """Parse a PDB file.
    Args:
-        pdb_file:  file to read
+        input_file: path to file or file-like object. If file-like object,
-        parameters:  parameters to guide parsing
+        then will attempt fseek(0).
        molecule:  molecular container
    Returns:
        list with elements:
            1. list of conformations
            2. list of names
    """
    conformations = {}
    # read in all atoms in the file
    lines = get_atom_lines_from_pdb(
        pdb_file, ignore_residues=parameters.ignore_residues,
        keep_protons=molecule.options.keep_protons,
        chains=molecule.options.chains)
    for (name, atom) in lines:
        if not name in conformations.keys():
            conformations[name] = ConformationContainer(
                name=name, parameters=parameters, molecular_container=molecule)
        conformations[name].add_atom(atom)
    # make a sorted list of conformation names
    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
    return [conformations, names]
 def protein_precheck(conformations, names):
    """Check protein for correct number of atoms, etc.
    Args:
        names:  conformation names to check
    """
    for name in names:
        atoms = conformations[name].atoms
        # Group the atoms by their residue:
        atoms_by_residue = {}
        for atom in atoms:
            if atom.element != 'H':
                res_id = resid_from_atom(atom)
    try:
-                    atoms_by_residue[res_id].append(atom)
+        input_file.fseek(0)
-                except KeyError:
+        return input_file
-                    atoms_by_residue[res_id] = [atom]
+    except AttributeError:
-        for res_id, res_atoms in atoms_by_residue.items():
+        pass
-            res_name = res_atoms[0].res_name
+
-            residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
+    try:
-            # ignore ligand residues
+        file_ = open(input_file, 'rt')
-            if res_name not in EXPECTED_ATOM_NUMBERS:
+    except:
-                continue
+        raise IOError('Cannot find file {0:s}'.format(input_file))
-            # check for c-terminal
+    return file_
            if 'C-' in [a.terminal for a in res_atoms]:
                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
                    str_ = ("Unexpected number ({num:d}) of atoms in residue "
                            "{res:s} in conformation {conf:s}".format(
                                num=len(res_atoms), res=residue_label,
                                conf=name))
                    warning(str_)
                continue
            # check number of atoms in residue
            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
                str_ = ("Unexpected number ({num:d}) of atoms in residue "
                        "{res:s} in conformation {conf:s}".format(
                            num=len(res_atoms), res=residue_label,
                            conf=name))
                warning(str_)
-def resid_from_atom(atom):
+def read_parameter_file(input_file, parameters):
-    """Return string with atom residue information.
+    """Read a parameter file.
    Args:
-        atom:  atom to generate string for
+        input_file:  input file to read
-    Returns
+        parameters:  Parameters object
-        string
+    Returns:
        updated Parameters object
    """
-    return '{0:>4d} {1:s} {2:s}'.format(
+    # try to locate the parameter file
-        atom.res_num, atom.chain_id, atom.icode)
+    try:
        ifile = resource_filename(__name__, input_file)
        input_ = open_file_for_reading(ifile)
    except (IOError, FileNotFoundError, ValueError):
        input_ = open_file_for_reading(input_file)
    for line in input_:
        parameters.parse_line(line)
    return parameters
 def conformation_sorter(conf):
    """TODO - figure out what this function does."""
    model = int(conf[:-1])
    altloc = conf[-1:]
    return model*100+ord(altloc)
 def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
@@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
        tags:  tags of lines that include atoms
        chains:  list of chains
    """
-    lines = propka.lib.open_file_for_reading(pdb_file).readlines()
+    lines = open_file_for_reading(pdb_file).readlines()
    nterm_residue = 'next_residue'
    old_residue = None
    terminal = None
@@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule):
                molecular_container=molecule)
        conformations[name].add_atom(atom)
    # make a sorted list of conformation names
-    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
+    names = sorted(conformations.keys(), key=conformation_sorter)
    return [conformations, names]
@@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
    Yields:
        conformation container, list of atoms
    """
-    lines = propka.lib.open_file_for_reading(input_file).readlines()
+    lines = open_file_for_reading(input_file).readlines()
    conformation = ''
    atoms = {}
    numbers = []
@@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
            # prepare for next conformation
            atoms = {}
            numbers = []
 def read_pdb(pdb_file, parameters, molecule):
    """Parse a PDB file.
    Args:
        pdb_file:  file to read
        parameters:  parameters to guide parsing
        molecule:  molecular container
    Returns:
        list with elements:
            1. list of conformations
            2. list of names
    """
    conformations = {}
    # read in all atoms in the file
    lines = get_atom_lines_from_pdb(
        pdb_file, ignore_residues=parameters.ignore_residues,
        keep_protons=molecule.options.keep_protons,
        chains=molecule.options.chains)
    for (name, atom) in lines:
        if not name in conformations.keys():
            conformations[name] = ConformationContainer(
                name=name, parameters=parameters, molecular_container=molecule)
        conformations[name].add_atom(atom)
    # make a sorted list of conformation names
    names = sorted(conformations.keys(), key=conformation_sorter)
    return [conformations, names]
--- a/propka/lib.py
+++ b/propka/lib.py
@@ -11,56 +11,63 @@ _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
 _LOGGER.addHandler(_STDOUT_HANDLER)
-def open_file_for_reading(input_file):
+EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
-    """Open file or file-like stream for reading.
+                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
-    TODO - convert this to a context manager
+
 def protein_precheck(conformations, names):
    """Check protein for correct number of atoms, etc.
    Args:
-        input_file: path to file or file-like object. If file-like object,
+        names:  conformation names to check
        then will attempt fseek(0).
    """
    for name in names:
        atoms = conformations[name].atoms
        # Group the atoms by their residue:
        atoms_by_residue = {}
        for atom in atoms:
            if atom.element != 'H':
                res_id = resid_from_atom(atom)
                try:
-        input_file.fseek(0)
+                    atoms_by_residue[res_id].append(atom)
-        return input_file
+                except KeyError:
-    except AttributeError:
+                    atoms_by_residue[res_id] = [atom]
-        pass
+        for res_id, res_atoms in atoms_by_residue.items():
-
+            res_name = res_atoms[0].res_name
-    try:
+            residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
-        file_ = open(input_file, 'rt')
+            # ignore ligand residues
-    except:
+            if res_name not in EXPECTED_ATOM_NUMBERS:
-        raise IOError('Cannot find file {0:s}'.format(input_file))
+                continue
-    return file_
+            # check for c-terminal
            if 'C-' in [a.terminal for a in res_atoms]:
                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
                    str_ = ("Unexpected number ({num:d}) of atoms in residue "
                            "{res:s} in conformation {conf:s}".format(
                                num=len(res_atoms), res=residue_label,
                                conf=name))
                    warning(str_)
                continue
            # check number of atoms in residue
            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
                str_ = ("Unexpected number ({num:d}) of atoms in residue "
                        "{res:s} in conformation {conf:s}".format(
                            num=len(res_atoms), res=residue_label,
                            conf=name))
                warning(str_)
-def open_file_for_writing(input_file):
+def resid_from_atom(atom):
-    """Open file or file-like stream for writing.
+    """Return string with atom residue information.
    TODO - convert this to a context manager.
    Args:
-        input_file: path to file or file-like object. If file-like object,
+        atom:  atom to generate string for
-        then will attempt to get file mode.
+    Returns
        string
    """
-    try:
+    return '{0:>4d} {1:s} {2:s}'.format(
-        mode = input_file.mode
+        atom.res_num, atom.chain_id, atom.icode)
        if not ("w" in mode or "a" in mode or "+" in mode):
            raise IOError("File/stream not open for writing")
        return input_file
    except AttributeError:
        pass
    try:
        file_ = open(input_file, 'wt')
    except FileNotFoundError:
        raise Exception('Could not open {0:s}'.format(input_file))
    return file_
 def conformation_sorter(conf):
    """TODO - figure out what this function does."""
    model = int(conf[:-1])
    altloc = conf[-1:]
    return model*100+ord(altloc)
 def split_atoms_into_molecules(atoms):
@@ -354,19 +361,6 @@ def configuration_compare(conf):
    return 100*int(conf[1:-2]) + ord(conf[-1])
 def write_file(filename, lines):
    """Writes a new file.
    Args:
        filename:  name of file
        lines:  lines to write to file
    """
    file_ = open_file_for_writing(filename)
    for line in lines:
        file_.write("{0:s}\n".format(line))
    file_.close()
 def _args_to_str(arg_list):
    """Summarize list of arguments in string.
--- a/propka/ligand_pka_values.py
+++ b/propka/ligand_pka_values.py
@@ -5,7 +5,6 @@ import sys
 import propka.molecular_container
 import propka.calculations
 import propka.parameters
 import propka.pdb
 from propka.output import write_mol2_for_atoms
 from propka.lib import info, warning
--- a/propka/molecular_container.py
+++ b/propka/molecular_container.py
@@ -2,10 +2,11 @@
 import os
 import sys
 import propka.version
-from propka.pdb import read_input
+from propka.input import read_pdb, read_input, read_parameter_file
 from propka.parameters import Parameters
 from propka.output import write_input
 from propka.conformation_container import ConformationContainer
-from propka.lib import info, warning, make_grid
+from propka.lib import info, warning, protein_precheck, make_grid
 # TODO - these are constants whose origins are a little murky
@@ -38,11 +39,12 @@ class Molecular_container:
        self.file = os.path.split(input_file)[1]
        self.name = self.file[0:self.file.rfind('.')]
        input_file_extension = input_file[input_file.rfind('.'):]
-        # set the version
+        parameters = Parameters()
        if options:
-            parameters = propka.parameters.Parameters(self.options.parameters)
+            parameters = read_parameter_file(
                self.options.parameters, parameters)
        else:
-            parameters = propka.parameters.Parameters('propka.cfg')
+            parameters = read_parameter_file('propka.cfg', parameters)
        try:
            version_class = getattr(propka.version, parameters.version)
            self.version = version_class(parameters)
@@ -56,15 +58,15 @@ class Molecular_container:
            # input is a pdb file. read in atoms and top up containers to make
            # sure that all atoms are present in all conformations
            [self.conformations, self.conformation_names] = (
-                propka.pdb.read_pdb(input_file, self.version.parameters, self))
+                read_pdb(input_file, self.version.parameters, self))
            if len(self.conformations) == 0:
                info('Error: The pdb file does not seems to contain any '
                     'molecular conformations')
                sys.exit(-1)
            self.top_up_conformations()
            # make a structure precheck
-            propka.pdb.protein_precheck(self.conformations,
+            protein_precheck(
-                                        self.conformation_names)
+                self.conformations, self.conformation_names)
            # set up atom bonding and protonation
            self.version.setup_bonding_and_protonation(self)
            # Extract groups
@@ -79,9 +81,8 @@ class Molecular_container:
            write_input(self, filename)
        elif input_file_extension == '.propka_input':
            #input is a propka_input file
-            [self.conformations, self.conformation_names] = (
+            [self.conformations, self.conformation_names] = read_input(
-                propka.pdb.read_input(input_file, self.version.parameters,
+                input_file, self.version.parameters, self)
                                      self))
            # Extract groups - this merely sets up the groups found in the
            # input file
            self.extract_groups()
--- a/propka/output.py
+++ b/propka/output.py
@@ -1,6 +1,42 @@
 """Output routines."""
 from datetime import date
-from propka.lib import info, open_file_for_writing
+from propka.lib import info
 def open_file_for_writing(input_file):
    """Open file or file-like stream for writing.
    TODO - convert this to a context manager.
    Args:
        input_file: path to file or file-like object. If file-like object,
        then will attempt to get file mode.
    """
    try:
        mode = input_file.mode
        if not ("w" in mode or "a" in mode or "+" in mode):
            raise IOError("File/stream not open for writing")
        return input_file
    except AttributeError:
        pass
    try:
        file_ = open(input_file, 'wt')
    except FileNotFoundError:
        raise Exception('Could not open {0:s}'.format(input_file))
    return file_
 def write_file(filename, lines):
    """Writes a new file.
    Args:
        filename:  name of file
        lines:  lines to write to file
    """
    file_ = open_file_for_writing(filename)
    for line in lines:
        file_.write("{0:s}\n".format(line))
    file_.close()
 def print_header():
--- a/propka/parameters.py
+++ b/propka/parameters.py
@@ -35,7 +35,7 @@ STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
 class Parameters:
    """PROPKA parameter class."""
-    def __init__(self, parameter_file):
+    def __init__(self):
        """Initialize parameter class.
        Args:
@@ -52,22 +52,6 @@ class Parameters:
        self.CYS_CYS_exception = None
        # These functions set up remaining data structures implicitly
        self.set_up_data_structures()
        self.read_parameters(parameter_file)
    def read_parameters(self, file_):
        """Read parameters from file.
        Args:
            file_:  file to read
        """
        # try to locate the parameters file
        try:
            ifile = pkg_resources.resource_filename(__name__, file_)
            input_ = lib.open_file_for_reading(ifile)
        except (IOError, FileNotFoundError, ValueError):
            input_ = lib.open_file_for_reading(file_)
        for line in input_:
            self.parse_line(line)
    def parse_line(self, line):
        """Parse parameter file line."""