From b597a6f25752cf3b5f19fe101c30115088cf779f Mon Sep 17 00:00:00 2001
From: Nathan Baker <nathanandrewbaker@gmail.com>
Date: Sat, 30 May 2020 10:00:31 -0700
Subject: [PATCH] Remove cyclic import based on I/O in pdb.py.

Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
---
 propka/{pdb.py => input.py}   | 159 ++++++++++++++++------------------
 propka/lib.py                 | 102 ++++++++++------------
 propka/ligand_pka_values.py   |   1 -
 propka/molecular_container.py |  23 ++---
 propka/output.py              |  38 +++++++-
 propka/parameters.py          |  18 +---
 6 files changed, 173 insertions(+), 168 deletions(-)
 rename propka/{pdb.py => input.py} (72%)

diff --git a/propka/pdb.py b/propka/input.py
similarity index 72%
rename from propka/pdb.py
rename to propka/input.py
index f1eaf16..e8c7f07 100644
--- a/propka/pdb.py
+++ b/propka/input.py
@@ -1,96 +1,58 @@
-"""Read and parse PDB-like input files."""
-import propka.lib
-from propka.lib import warning
+"""Input routines."""
+from pkg_resources import resource_filename
 from propka.atom import Atom
-from propka.group import initialize_atom_group
 from propka.conformation_container import ConformationContainer
+from propka.group import initialize_atom_group
 
 
-EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
-                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
-                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
-                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
+def open_file_for_reading(input_file):
+    """Open file or file-like stream for reading.
 
-
-def read_pdb(pdb_file, parameters, molecule):
-    """Parse a PDB file.
+    TODO - convert this to a context manager
 
     Args:
-        pdb_file:  file to read
-        parameters:  parameters to guide parsing
-        molecule:  molecular container
+        input_file: path to file or file-like object. If file-like object,
+        then will attempt fseek(0).
+    """
+    try:
+        input_file.fseek(0)
+        return input_file
+    except AttributeError:
+        pass
+
+    try:
+        file_ = open(input_file, 'rt')
+    except:
+        raise IOError('Cannot find file {0:s}'.format(input_file))
+    return file_
+
+
+def read_parameter_file(input_file, parameters):
+    """Read a parameter file.
+
+    Args:
+        input_file:  input file to read
+        parameters:  Parameters object
     Returns:
-        list with elements:
-            1. list of conformations
-            2. list of names
+        updated Parameters object
     """
-    conformations = {}
-    # read in all atoms in the file
-    lines = get_atom_lines_from_pdb(
-        pdb_file, ignore_residues=parameters.ignore_residues,
-        keep_protons=molecule.options.keep_protons,
-        chains=molecule.options.chains)
-    for (name, atom) in lines:
-        if not name in conformations.keys():
-            conformations[name] = ConformationContainer(
-                name=name, parameters=parameters, molecular_container=molecule)
-        conformations[name].add_atom(atom)
-    # make a sorted list of conformation names
-    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
-    return [conformations, names]
+    # try to locate the parameter file
+    try:
+        ifile = resource_filename(__name__, input_file)
+        input_ = open_file_for_reading(ifile)
+    except (IOError, FileNotFoundError, ValueError):
+        input_ = open_file_for_reading(input_file)
+    for line in input_:
+        parameters.parse_line(line)
+    return parameters
 
 
-def protein_precheck(conformations, names):
-    """Check protein for correct number of atoms, etc.
 
-    Args:
-        names:  conformation names to check
-    """
-    for name in names:
-        atoms = conformations[name].atoms
-        # Group the atoms by their residue:
-        atoms_by_residue = {}
-        for atom in atoms:
-            if atom.element != 'H':
-                res_id = resid_from_atom(atom)
-                try:
-                    atoms_by_residue[res_id].append(atom)
-                except KeyError:
-                    atoms_by_residue[res_id] = [atom]
-        for res_id, res_atoms in atoms_by_residue.items():
-            res_name = res_atoms[0].res_name
-            residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
-            # ignore ligand residues
-            if res_name not in EXPECTED_ATOM_NUMBERS:
-                continue
-            # check for c-terminal
-            if 'C-' in [a.terminal for a in res_atoms]:
-                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
-                    str_ = ("Unexpected number ({num:d}) of atoms in residue "
-                            "{res:s} in conformation {conf:s}".format(
-                                num=len(res_atoms), res=residue_label,
-                                conf=name))
-                    warning(str_)
-                continue
-            # check number of atoms in residue
-            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
-                str_ = ("Unexpected number ({num:d}) of atoms in residue "
-                        "{res:s} in conformation {conf:s}".format(
-                            num=len(res_atoms), res=residue_label,
-                            conf=name))
-                warning(str_)
-
-
-def resid_from_atom(atom):
-    """Return string with atom residue information.
-
-    Args:
-        atom:  atom to generate string for
-    Returns
-        string
-    """
-    return '{0:>4d} {1:s} {2:s}'.format(
-        atom.res_num, atom.chain_id, atom.icode)
+def conformation_sorter(conf):
+    """TODO - figure out what this function does."""
+    model = int(conf[:-1])
+    altloc = conf[-1:]
+    return model*100+ord(altloc)
 
 
 def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
@@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
         tags:  tags of lines that include atoms
         chains:  list of chains
     """
-    lines = propka.lib.open_file_for_reading(pdb_file).readlines()
+    lines = open_file_for_reading(pdb_file).readlines()
     nterm_residue = 'next_residue'
     old_residue = None
     terminal = None
@@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule):
                 molecular_container=molecule)
         conformations[name].add_atom(atom)
     # make a sorted list of conformation names
-    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
+    names = sorted(conformations.keys(), key=conformation_sorter)
     return [conformations, names]
 
 
@@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
     Yields:
         conformation container, list of atoms
     """
-    lines = propka.lib.open_file_for_reading(input_file).readlines()
+    lines = open_file_for_reading(input_file).readlines()
     conformation = ''
     atoms = {}
     numbers = []
@@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
             # prepare for next conformation
             atoms = {}
             numbers = []
+
+def read_pdb(pdb_file, parameters, molecule):
+    """Parse a PDB file.
+
+    Args:
+        pdb_file:  file to read
+        parameters:  parameters to guide parsing
+        molecule:  molecular container
+    Returns:
+        list with elements:
+            1. list of conformations
+            2. list of names
+    """
+    conformations = {}
+    # read in all atoms in the file
+    lines = get_atom_lines_from_pdb(
+        pdb_file, ignore_residues=parameters.ignore_residues,
+        keep_protons=molecule.options.keep_protons,
+        chains=molecule.options.chains)
+    for (name, atom) in lines:
+        if not name in conformations.keys():
+            conformations[name] = ConformationContainer(
+                name=name, parameters=parameters, molecular_container=molecule)
+        conformations[name].add_atom(atom)
+    # make a sorted list of conformation names
+    names = sorted(conformations.keys(), key=conformation_sorter)
+    return [conformations, names]
+
+
diff --git a/propka/lib.py b/propka/lib.py
index 48eab4e..518f7e6 100644
--- a/propka/lib.py
+++ b/propka/lib.py
@@ -11,56 +11,63 @@ _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
 _LOGGER.addHandler(_STDOUT_HANDLER)
 
 
-def open_file_for_reading(input_file):
-    """Open file or file-like stream for reading.
+EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
+                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
+                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
+                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
 
-    TODO - convert this to a context manager
+
+def protein_precheck(conformations, names):
+    """Check protein for correct number of atoms, etc.
 
     Args:
-        input_file: path to file or file-like object. If file-like object,
-        then will attempt fseek(0).
+        names:  conformation names to check
     """
-    try:
-        input_file.fseek(0)
-        return input_file
-    except AttributeError:
-        pass
-
-    try:
-        file_ = open(input_file, 'rt')
-    except:
-        raise IOError('Cannot find file {0:s}'.format(input_file))
-    return file_
+    for name in names:
+        atoms = conformations[name].atoms
+        # Group the atoms by their residue:
+        atoms_by_residue = {}
+        for atom in atoms:
+            if atom.element != 'H':
+                res_id = resid_from_atom(atom)
+                try:
+                    atoms_by_residue[res_id].append(atom)
+                except KeyError:
+                    atoms_by_residue[res_id] = [atom]
+        for res_id, res_atoms in atoms_by_residue.items():
+            res_name = res_atoms[0].res_name
+            residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
+            # ignore ligand residues
+            if res_name not in EXPECTED_ATOM_NUMBERS:
+                continue
+            # check for c-terminal
+            if 'C-' in [a.terminal for a in res_atoms]:
+                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
+                    str_ = ("Unexpected number ({num:d}) of atoms in residue "
+                            "{res:s} in conformation {conf:s}".format(
+                                num=len(res_atoms), res=residue_label,
+                                conf=name))
+                    warning(str_)
+                continue
+            # check number of atoms in residue
+            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
+                str_ = ("Unexpected number ({num:d}) of atoms in residue "
+                        "{res:s} in conformation {conf:s}".format(
+                            num=len(res_atoms), res=residue_label,
+                            conf=name))
+                warning(str_)
 
 
-def open_file_for_writing(input_file):
-    """Open file or file-like stream for writing.
-
-    TODO - convert this to a context manager.
+def resid_from_atom(atom):
+    """Return string with atom residue information.
 
     Args:
-        input_file: path to file or file-like object. If file-like object,
-        then will attempt to get file mode.
+        atom:  atom to generate string for
+    Returns
+        string
     """
-    try:
-        mode = input_file.mode
-        if not ("w" in mode or "a" in mode or "+" in mode):
-            raise IOError("File/stream not open for writing")
-        return input_file
-    except AttributeError:
-        pass
-    try:
-        file_ = open(input_file, 'wt')
-    except FileNotFoundError:
-        raise Exception('Could not open {0:s}'.format(input_file))
-    return file_
-
-
-def conformation_sorter(conf):
-    """TODO - figure out what this function does."""
-    model = int(conf[:-1])
-    altloc = conf[-1:]
-    return model*100+ord(altloc)
+    return '{0:>4d} {1:s} {2:s}'.format(
+        atom.res_num, atom.chain_id, atom.icode)
 
 
 def split_atoms_into_molecules(atoms):
@@ -354,19 +361,6 @@ def configuration_compare(conf):
     return 100*int(conf[1:-2]) + ord(conf[-1])
 
 
-def write_file(filename, lines):
-    """Writes a new file.
-
-    Args:
-        filename:  name of file
-        lines:  lines to write to file
-    """
-    file_ = open_file_for_writing(filename)
-    for line in lines:
-        file_.write("{0:s}\n".format(line))
-    file_.close()
-
-
 def _args_to_str(arg_list):
     """Summarize list of arguments in string.
 
diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py
index abfc3e4..ab7fa28 100644
--- a/propka/ligand_pka_values.py
+++ b/propka/ligand_pka_values.py
@@ -5,7 +5,6 @@ import sys
 import propka.molecular_container
 import propka.calculations
 import propka.parameters
-import propka.pdb
 from propka.output import write_mol2_for_atoms
 from propka.lib import info, warning
 
diff --git a/propka/molecular_container.py b/propka/molecular_container.py
index d99c2c0..ae82fb9 100644
--- a/propka/molecular_container.py
+++ b/propka/molecular_container.py
@@ -2,10 +2,11 @@
 import os
 import sys
 import propka.version
-from propka.pdb import read_input
+from propka.input import read_pdb, read_input, read_parameter_file
+from propka.parameters import Parameters
 from propka.output import write_input
 from propka.conformation_container import ConformationContainer
-from propka.lib import info, warning, make_grid
+from propka.lib import info, warning, protein_precheck, make_grid
 
 
 # TODO - these are constants whose origins are a little murky
@@ -38,11 +39,12 @@ class Molecular_container:
         self.file = os.path.split(input_file)[1]
         self.name = self.file[0:self.file.rfind('.')]
         input_file_extension = input_file[input_file.rfind('.'):]
-        # set the version
+        parameters = Parameters()
         if options:
-            parameters = propka.parameters.Parameters(self.options.parameters)
+            parameters = read_parameter_file(
+                self.options.parameters, parameters)
         else:
-            parameters = propka.parameters.Parameters('propka.cfg')
+            parameters = read_parameter_file('propka.cfg', parameters)
         try:
             version_class = getattr(propka.version, parameters.version)
             self.version = version_class(parameters)
@@ -56,15 +58,15 @@ class Molecular_container:
             # input is a pdb file. read in atoms and top up containers to make
             # sure that all atoms are present in all conformations
             [self.conformations, self.conformation_names] = (
-                propka.pdb.read_pdb(input_file, self.version.parameters, self))
+                read_pdb(input_file, self.version.parameters, self))
             if len(self.conformations) == 0:
                 info('Error: The pdb file does not seems to contain any '
                      'molecular conformations')
                 sys.exit(-1)
             self.top_up_conformations()
             # make a structure precheck
-            propka.pdb.protein_precheck(self.conformations,
-                                        self.conformation_names)
+            protein_precheck(
+                self.conformations, self.conformation_names)
             # set up atom bonding and protonation
             self.version.setup_bonding_and_protonation(self)
             # Extract groups
@@ -79,9 +81,8 @@ class Molecular_container:
             write_input(self, filename)
         elif input_file_extension == '.propka_input':
             #input is a propka_input file
-            [self.conformations, self.conformation_names] = (
-                propka.pdb.read_input(input_file, self.version.parameters,
-                                      self))
+            [self.conformations, self.conformation_names] = read_input(
+                input_file, self.version.parameters, self)
             # Extract groups - this merely sets up the groups found in the
             # input file
             self.extract_groups()
diff --git a/propka/output.py b/propka/output.py
index 61f8a68..a72114e 100644
--- a/propka/output.py
+++ b/propka/output.py
@@ -1,6 +1,42 @@
 """Output routines."""
 from datetime import date
-from propka.lib import info, open_file_for_writing
+from propka.lib import info
+
+
+def open_file_for_writing(input_file):
+    """Open file or file-like stream for writing.
+
+    TODO - convert this to a context manager.
+
+    Args:
+        input_file: path to file or file-like object. If file-like object,
+        then will attempt to get file mode.
+    """
+    try:
+        mode = input_file.mode
+        if not ("w" in mode or "a" in mode or "+" in mode):
+            raise IOError("File/stream not open for writing")
+        return input_file
+    except AttributeError:
+        pass
+    try:
+        file_ = open(input_file, 'wt')
+    except FileNotFoundError:
+        raise Exception('Could not open {0:s}'.format(input_file))
+    return file_
+
+
+def write_file(filename, lines):
+    """Writes a new file.
+
+    Args:
+        filename:  name of file
+        lines:  lines to write to file
+    """
+    file_ = open_file_for_writing(filename)
+    for line in lines:
+        file_.write("{0:s}\n".format(line))
+    file_.close()
 
 
 def print_header():
diff --git a/propka/parameters.py b/propka/parameters.py
index 960cb81..b708fce 100644
--- a/propka/parameters.py
+++ b/propka/parameters.py
@@ -35,7 +35,7 @@ STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
 class Parameters:
     """PROPKA parameter class."""
 
-    def __init__(self, parameter_file):
+    def __init__(self):
         """Initialize parameter class.
 
         Args:
@@ -52,22 +52,6 @@ class Parameters:
         self.CYS_CYS_exception = None
         # These functions set up remaining data structures implicitly
         self.set_up_data_structures()
-        self.read_parameters(parameter_file)
-
-    def read_parameters(self, file_):
-        """Read parameters from file.
-
-        Args:
-            file_:  file to read
-        """
-        # try to locate the parameters file
-        try:
-            ifile = pkg_resources.resource_filename(__name__, file_)
-            input_ = lib.open_file_for_reading(ifile)
-        except (IOError, FileNotFoundError, ValueError):
-            input_ = lib.open_file_for_reading(file_)
-        for line in input_:
-            self.parse_line(line)
 
     def parse_line(self, line):
         """Parse parameter file line."""