From b597a6f25752cf3b5f19fe101c30115088cf779f Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 30 May 2020 10:00:31 -0700 Subject: [PATCH] Remove cyclic import based on I/O in pdb.py. Partially addresses https://github.com/jensengroup/propka-3.1/issues/49 --- propka/{pdb.py => input.py} | 159 ++++++++++++++++------------------ propka/lib.py | 102 ++++++++++------------ propka/ligand_pka_values.py | 1 - propka/molecular_container.py | 23 ++--- propka/output.py | 38 +++++++- propka/parameters.py | 18 +--- 6 files changed, 173 insertions(+), 168 deletions(-) rename propka/{pdb.py => input.py} (72%) diff --git a/propka/pdb.py b/propka/input.py similarity index 72% rename from propka/pdb.py rename to propka/input.py index f1eaf16..e8c7f07 100644 --- a/propka/pdb.py +++ b/propka/input.py @@ -1,96 +1,58 @@ -"""Read and parse PDB-like input files.""" -import propka.lib -from propka.lib import warning +"""Input routines.""" +from pkg_resources import resource_filename from propka.atom import Atom -from propka.group import initialize_atom_group from propka.conformation_container import ConformationContainer +from propka.group import initialize_atom_group -EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, - 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, - 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, - 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7} +def open_file_for_reading(input_file): + """Open file or file-like stream for reading. - -def read_pdb(pdb_file, parameters, molecule): - """Parse a PDB file. + TODO - convert this to a context manager Args: - pdb_file: file to read - parameters: parameters to guide parsing - molecule: molecular container + input_file: path to file or file-like object. If file-like object, + then will attempt fseek(0). + """ + try: + input_file.fseek(0) + return input_file + except AttributeError: + pass + + try: + file_ = open(input_file, 'rt') + except: + raise IOError('Cannot find file {0:s}'.format(input_file)) + return file_ + + +def read_parameter_file(input_file, parameters): + """Read a parameter file. + + Args: + input_file: input file to read + parameters: Parameters object Returns: - list with elements: - 1. list of conformations - 2. list of names + updated Parameters object """ - conformations = {} - # read in all atoms in the file - lines = get_atom_lines_from_pdb( - pdb_file, ignore_residues=parameters.ignore_residues, - keep_protons=molecule.options.keep_protons, - chains=molecule.options.chains) - for (name, atom) in lines: - if not name in conformations.keys(): - conformations[name] = ConformationContainer( - name=name, parameters=parameters, molecular_container=molecule) - conformations[name].add_atom(atom) - # make a sorted list of conformation names - names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] + # try to locate the parameter file + try: + ifile = resource_filename(__name__, input_file) + input_ = open_file_for_reading(ifile) + except (IOError, FileNotFoundError, ValueError): + input_ = open_file_for_reading(input_file) + for line in input_: + parameters.parse_line(line) + return parameters -def protein_precheck(conformations, names): - """Check protein for correct number of atoms, etc. - Args: - names: conformation names to check - """ - for name in names: - atoms = conformations[name].atoms - # Group the atoms by their residue: - atoms_by_residue = {} - for atom in atoms: - if atom.element != 'H': - res_id = resid_from_atom(atom) - try: - atoms_by_residue[res_id].append(atom) - except KeyError: - atoms_by_residue[res_id] = [atom] - for res_id, res_atoms in atoms_by_residue.items(): - res_name = res_atoms[0].res_name - residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id) - # ignore ligand residues - if res_name not in EXPECTED_ATOM_NUMBERS: - continue - # check for c-terminal - if 'C-' in [a.terminal for a in res_atoms]: - if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: - str_ = ("Unexpected number ({num:d}) of atoms in residue " - "{res:s} in conformation {conf:s}".format( - num=len(res_atoms), res=residue_label, - conf=name)) - warning(str_) - continue - # check number of atoms in residue - if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: - str_ = ("Unexpected number ({num:d}) of atoms in residue " - "{res:s} in conformation {conf:s}".format( - num=len(res_atoms), res=residue_label, - conf=name)) - warning(str_) - - -def resid_from_atom(atom): - """Return string with atom residue information. - - Args: - atom: atom to generate string for - Returns - string - """ - return '{0:>4d} {1:s} {2:s}'.format( - atom.res_num, atom.chain_id, atom.icode) +def conformation_sorter(conf): + """TODO - figure out what this function does.""" + model = int(conf[:-1]) + altloc = conf[-1:] + return model*100+ord(altloc) def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, @@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, tags: tags of lines that include atoms chains: list of chains """ - lines = propka.lib.open_file_for_reading(pdb_file).readlines() + lines = open_file_for_reading(pdb_file).readlines() nterm_residue = 'next_residue' old_residue = None terminal = None @@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule): molecular_container=molecule) conformations[name].add_atom(atom) # make a sorted list of conformation names - names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) + names = sorted(conformations.keys(), key=conformation_sorter) return [conformations, names] @@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): Yields: conformation container, list of atoms """ - lines = propka.lib.open_file_for_reading(input_file).readlines() + lines = open_file_for_reading(input_file).readlines() conformation = '' atoms = {} numbers = [] @@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): # prepare for next conformation atoms = {} numbers = [] + +def read_pdb(pdb_file, parameters, molecule): + """Parse a PDB file. + + Args: + pdb_file: file to read + parameters: parameters to guide parsing + molecule: molecular container + Returns: + list with elements: + 1. list of conformations + 2. list of names + """ + conformations = {} + # read in all atoms in the file + lines = get_atom_lines_from_pdb( + pdb_file, ignore_residues=parameters.ignore_residues, + keep_protons=molecule.options.keep_protons, + chains=molecule.options.chains) + for (name, atom) in lines: + if not name in conformations.keys(): + conformations[name] = ConformationContainer( + name=name, parameters=parameters, molecular_container=molecule) + conformations[name].add_atom(atom) + # make a sorted list of conformation names + names = sorted(conformations.keys(), key=conformation_sorter) + return [conformations, names] + + diff --git a/propka/lib.py b/propka/lib.py index 48eab4e..518f7e6 100644 --- a/propka/lib.py +++ b/propka/lib.py @@ -11,56 +11,63 @@ _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s")) _LOGGER.addHandler(_STDOUT_HANDLER) -def open_file_for_reading(input_file): - """Open file or file-like stream for reading. +EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, + 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, + 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, + 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7} - TODO - convert this to a context manager + +def protein_precheck(conformations, names): + """Check protein for correct number of atoms, etc. Args: - input_file: path to file or file-like object. If file-like object, - then will attempt fseek(0). + names: conformation names to check """ - try: - input_file.fseek(0) - return input_file - except AttributeError: - pass - - try: - file_ = open(input_file, 'rt') - except: - raise IOError('Cannot find file {0:s}'.format(input_file)) - return file_ + for name in names: + atoms = conformations[name].atoms + # Group the atoms by their residue: + atoms_by_residue = {} + for atom in atoms: + if atom.element != 'H': + res_id = resid_from_atom(atom) + try: + atoms_by_residue[res_id].append(atom) + except KeyError: + atoms_by_residue[res_id] = [atom] + for res_id, res_atoms in atoms_by_residue.items(): + res_name = res_atoms[0].res_name + residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id) + # ignore ligand residues + if res_name not in EXPECTED_ATOM_NUMBERS: + continue + # check for c-terminal + if 'C-' in [a.terminal for a in res_atoms]: + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) + warning(str_) + continue + # check number of atoms in residue + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) + warning(str_) -def open_file_for_writing(input_file): - """Open file or file-like stream for writing. - - TODO - convert this to a context manager. +def resid_from_atom(atom): + """Return string with atom residue information. Args: - input_file: path to file or file-like object. If file-like object, - then will attempt to get file mode. + atom: atom to generate string for + Returns + string """ - try: - mode = input_file.mode - if not ("w" in mode or "a" in mode or "+" in mode): - raise IOError("File/stream not open for writing") - return input_file - except AttributeError: - pass - try: - file_ = open(input_file, 'wt') - except FileNotFoundError: - raise Exception('Could not open {0:s}'.format(input_file)) - return file_ - - -def conformation_sorter(conf): - """TODO - figure out what this function does.""" - model = int(conf[:-1]) - altloc = conf[-1:] - return model*100+ord(altloc) + return '{0:>4d} {1:s} {2:s}'.format( + atom.res_num, atom.chain_id, atom.icode) def split_atoms_into_molecules(atoms): @@ -354,19 +361,6 @@ def configuration_compare(conf): return 100*int(conf[1:-2]) + ord(conf[-1]) -def write_file(filename, lines): - """Writes a new file. - - Args: - filename: name of file - lines: lines to write to file - """ - file_ = open_file_for_writing(filename) - for line in lines: - file_.write("{0:s}\n".format(line)) - file_.close() - - def _args_to_str(arg_list): """Summarize list of arguments in string. diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index abfc3e4..ab7fa28 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -5,7 +5,6 @@ import sys import propka.molecular_container import propka.calculations import propka.parameters -import propka.pdb from propka.output import write_mol2_for_atoms from propka.lib import info, warning diff --git a/propka/molecular_container.py b/propka/molecular_container.py index d99c2c0..ae82fb9 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -2,10 +2,11 @@ import os import sys import propka.version -from propka.pdb import read_input +from propka.input import read_pdb, read_input, read_parameter_file +from propka.parameters import Parameters from propka.output import write_input from propka.conformation_container import ConformationContainer -from propka.lib import info, warning, make_grid +from propka.lib import info, warning, protein_precheck, make_grid # TODO - these are constants whose origins are a little murky @@ -38,11 +39,12 @@ class Molecular_container: self.file = os.path.split(input_file)[1] self.name = self.file[0:self.file.rfind('.')] input_file_extension = input_file[input_file.rfind('.'):] - # set the version + parameters = Parameters() if options: - parameters = propka.parameters.Parameters(self.options.parameters) + parameters = read_parameter_file( + self.options.parameters, parameters) else: - parameters = propka.parameters.Parameters('propka.cfg') + parameters = read_parameter_file('propka.cfg', parameters) try: version_class = getattr(propka.version, parameters.version) self.version = version_class(parameters) @@ -56,15 +58,15 @@ class Molecular_container: # input is a pdb file. read in atoms and top up containers to make # sure that all atoms are present in all conformations [self.conformations, self.conformation_names] = ( - propka.pdb.read_pdb(input_file, self.version.parameters, self)) + read_pdb(input_file, self.version.parameters, self)) if len(self.conformations) == 0: info('Error: The pdb file does not seems to contain any ' 'molecular conformations') sys.exit(-1) self.top_up_conformations() # make a structure precheck - propka.pdb.protein_precheck(self.conformations, - self.conformation_names) + protein_precheck( + self.conformations, self.conformation_names) # set up atom bonding and protonation self.version.setup_bonding_and_protonation(self) # Extract groups @@ -79,9 +81,8 @@ class Molecular_container: write_input(self, filename) elif input_file_extension == '.propka_input': #input is a propka_input file - [self.conformations, self.conformation_names] = ( - propka.pdb.read_input(input_file, self.version.parameters, - self)) + [self.conformations, self.conformation_names] = read_input( + input_file, self.version.parameters, self) # Extract groups - this merely sets up the groups found in the # input file self.extract_groups() diff --git a/propka/output.py b/propka/output.py index 61f8a68..a72114e 100644 --- a/propka/output.py +++ b/propka/output.py @@ -1,6 +1,42 @@ """Output routines.""" from datetime import date -from propka.lib import info, open_file_for_writing +from propka.lib import info + + +def open_file_for_writing(input_file): + """Open file or file-like stream for writing. + + TODO - convert this to a context manager. + + Args: + input_file: path to file or file-like object. If file-like object, + then will attempt to get file mode. + """ + try: + mode = input_file.mode + if not ("w" in mode or "a" in mode or "+" in mode): + raise IOError("File/stream not open for writing") + return input_file + except AttributeError: + pass + try: + file_ = open(input_file, 'wt') + except FileNotFoundError: + raise Exception('Could not open {0:s}'.format(input_file)) + return file_ + + +def write_file(filename, lines): + """Writes a new file. + + Args: + filename: name of file + lines: lines to write to file + """ + file_ = open_file_for_writing(filename) + for line in lines: + file_.write("{0:s}\n".format(line)) + file_.close() def print_header(): diff --git a/propka/parameters.py b/propka/parameters.py index 960cb81..b708fce 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -35,7 +35,7 @@ STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference'] class Parameters: """PROPKA parameter class.""" - def __init__(self, parameter_file): + def __init__(self): """Initialize parameter class. Args: @@ -52,22 +52,6 @@ class Parameters: self.CYS_CYS_exception = None # These functions set up remaining data structures implicitly self.set_up_data_structures() - self.read_parameters(parameter_file) - - def read_parameters(self, file_): - """Read parameters from file. - - Args: - file_: file to read - """ - # try to locate the parameters file - try: - ifile = pkg_resources.resource_filename(__name__, file_) - input_ = lib.open_file_for_reading(ifile) - except (IOError, FileNotFoundError, ValueError): - input_ = lib.open_file_for_reading(file_) - for line in input_: - self.parse_line(line) def parse_line(self, line): """Parse parameter file line."""