De-lint propka.py.

2020-05-25 11:54:41 -07:00
parent 47226bb4b0
commit b3e2685e94
1 changed files with 171 additions and 156 deletions
--- a/propka/pdb.py
+++ b/propka/pdb.py
@@ -1,132 +1,136 @@
-
-from __future__ import division
-from __future__ import print_function
-
-import string, sys, copy
-
+"""PDB parsing functionality."""
 import propka.lib
-from propka.lib import info, warning
-
+from propka.lib import warning
 from propka.atom import Atom
 from propka.conformation_container import ConformationContainer

-expected_atom_numbers = {'ALA':5,
-                         'ARG':11,
-                         'ASN':8,
-                         'ASP':8,
-                         'CYS':6,
-                         'GLY':4,
-                         'GLN':9,
-                         'GLU':9,
-                         'HIS':10,
-                         'ILE':8,
-                         'LEU':8,
-                         'LYS':9,
-                         'MET':8,
-                         'PHE':11,
-                         'PRO':7,
-                         'SER':6,
-                         'THR':7,
-                         'TRP':14,
-                         'TYR':12,
-                         'VAL':7}
+
+EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
+                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
+                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
+                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}


 def read_pdb(pdb_file, parameters, molecule):
-    conformations = {}
+    """Parse a PDB file.

+    Args:
+        pdb_file:  file to read
+        parameters:  parameters to guide parsing
+        molecule:  molecular container
+    Returns:
+        list with elements:
+            1. list of conformations
+            2. list of names
+    """
+    conformations = {}
    # read in all atoms in the file
-    lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains)
+    lines = get_atom_lines_from_pdb(pdb_file,
+                                    ignore_residues=parameters.ignore_residues,
+                                    keep_protons=molecule.options.keep_protons,
+                                    chains=molecule.options.chains)
    for (name, atom) in lines:
        if not name in conformations.keys():
-            conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule)
+            conformations[name] = ConformationContainer(name=name,
+                                                        parameters=parameters,
+                                                        molecular_container=molecule)
        conformations[name].add_atom(atom)
-
    # make a sorted list of conformation names
    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
-
    return [conformations, names]

-def protein_precheck(conformations, names):

+def protein_precheck(conformations, names):
+    """Check protein for correct number of atoms, etc.
+
+    Args:
+        names:  conformation names to check
+    """
    for name in names:
        atoms = conformations[name].atoms
-
        # Group the atoms by their residue:
        atoms_by_residue = {}
-        for a in atoms:
-            if a.element != 'H':
-                res_id = resid_from_atom(a)
+        for atom in atoms:
+            if atom.element != 'H':
+                res_id = resid_from_atom(atom)
                try:
-                    atoms_by_residue[res_id].append(a)
+                    atoms_by_residue[res_id].append(atom)
                except KeyError:
-                    atoms_by_residue[res_id] = [a]
-
+                    atoms_by_residue[res_id] = [atom]
        for res_id, res_atoms in atoms_by_residue.items():
            res_name = res_atoms[0].res_name
            residue_label = '%3s%5s'%(res_name, res_id)
-
            # ignore ligand residues
-            if res_name not in expected_atom_numbers:
+            if res_name not in EXPECTED_ATOM_NUMBERS:
                continue
-
            # check for c-terminal
            if 'C-' in [a.terminal for a in res_atoms]:
-                if len(res_atoms) != expected_atom_numbers[res_name]+1:
-                    warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name))
+                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
+                    str_ = ("Unexpected number (%d) of atoms in residue %s "
+                            "in conformation %s" % (len(res_atoms),
+                                                    residue_label, name))
+                    warning(str_)
                continue
-
            # check number of atoms in residue
-            if len(res_atoms) != expected_atom_numbers[res_name]:
-                warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name))
-
-    return
-
-def resid_from_atom(a):
-    return '%4d %s %s'%(a.res_num,a.chain_id,a.icode)
+            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
+                str_ = ('Unexpected number (%d) of atoms in residue %s '
+                        'in conformation %s' % (len(res_atoms),
+                                                residue_label, name))
+                warning(str_)


-def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM  ', 'HETATM'], chains=None):
+def resid_from_atom(atom):
+    """Return string with atom residue information.

+    Args:
+        atom:  atom to generate string for
+    Returns
+        string
+    """
+    return '%4d %s %s' % (atom.res_num, atom.chain_id, atom.icode)
+
+
+def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
+                            tags=['ATOM  ', 'HETATM'], chains=None):
+    """Get atom lines from PDB file.
+
+    Args:
+        pdb_file:  PDB file to parse
+        ignore_residues:  list of residues to ignore
+        keep_protons:  bool to keep/ignore protons
+        tags:  tags of lines that include atoms
+        chains:  list of chains
+    """
    lines = propka.lib.open_file_for_reading(pdb_file).readlines()
    nterm_residue = 'next_residue'
    old_residue = None
    terminal = None
    model = 1
-
-
    for line in lines:
        tag = line[0:6]
-
        # set the model number
        if tag == 'MODEL ':
            model = int(line[6:])
            nterm_residue = 'next_residue'
-
        if tag == 'TER   ':
            nterm_residue = 'next_residue'
-
        if tag in tags:
            alt_conf_tag = line[16]
-            residue_name = line[12:16]
-            residue_number = line[22:26]
-
+            residue_name = line[12: 16]
+            residue_number = line[22: 26]
            # check if we want this residue
-            if line[17:20] in ignore_residues:
+            if line[17: 20] in ignore_residues:
                continue
            if chains and line[21] not in chains:
                continue
-
            # set the Nterm residue number - nessecary because we may need to
            # identify more than one N+ group for structures with alt_conf tags
            if nterm_residue == 'next_residue' and tag == 'ATOM  ':
-                # make sure that we reached a new residue - nessecary if OXT is not the last atom in
-                # the previous residue
+                # make sure that we reached a new residue - nessecary if OXT is
+                # not the last atom inthe previous residue
                if old_residue != residue_number:
                    nterm_residue = residue_number
                    old_residue = None
-
-
            # Identify the configuration
            # convert digits to letters
            if alt_conf_tag in '123456789':
@@ -134,106 +138,116 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False,
            if alt_conf_tag == ' ':
                alt_conf_tag = 'A'
            conformation = '%d%s'%(model, alt_conf_tag)
-
            # set the terminal
            if  tag == 'ATOM  ':
-                if residue_name.strip() == 'N' and nterm_residue == residue_number:
+                if (residue_name.strip() == 'N'
+                        and nterm_residue == residue_number):
                    terminal = 'N+'
-                if  residue_name.strip() in ['OXT','O\'\'']:
+                if  residue_name.strip() in ['OXT', 'O\'\'']:
                    terminal = 'C-'
                    nterm_residue = 'next_residue'
                    old_residue = residue_number
            # and yield the atom
            atom = Atom(line=line)
            atom.terminal = terminal
-            #if keep_protons:
-            #    atom.is_protonated = True
            if not (atom.element == 'H' and not keep_protons): #ignore hydrogen
                yield (conformation, atom)
-
            terminal = None

-    return
-

 def write_pdb(conformation, filename):
+    """Write PDB conformation to a file.
+
+    Args:
+        conformation:  conformation container
+        filename:  filename for output
+    """
    write_pdb_for_atoms(conformation.atoms, filename)
-    return
+

 def write_pdb_for_atoms(atoms, filename, make_conect_section=False):
-    out = propka.lib.open_file_for_writing(filename)
+    """Write out PDB file for atoms.

+    Args:
+        atoms:  list of atoms
+        filename:  name of file
+        make_conect_section:  generate a CONECT PDB section
+    """
+    out = propka.lib.open_file_for_writing(filename)
    for atom in atoms:
        out.write(atom.make_pdb_line())
-
    if make_conect_section:
        for atom in atoms:
            out.write(atom.make_conect_line())
-
-
    out.close()

-    return
-
-

 def write_mol2_for_atoms(atoms, filename):
+    """Write out MOL2 file for atoms.

+    Args:
+        atoms:  list of atoms
+        filename:  name of file
+    """
+    # TODO - header needs to be converted to format string
    header = '@<TRIPOS>MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n'
-
    atoms_section = '@<TRIPOS>ATOM\n'
-    for i in range(len(atoms)):
-        atoms_section += atoms[i].make_mol2_line(i+1)
-
-
+    for i, atom in enumerate(atoms):
+        atoms_section += atom.make_mol2_line(i+1)
    bonds_section = '@<TRIPOS>BOND\n'
-    id = 1
-    for i in range(len(atoms)):
-        for j in range(i+1,len(atoms)):
-            if atoms[i] in atoms[j].bonded_atoms:
-                type = get_bond_order(atoms[i],atoms[j])
-                bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type)
-                id+=1
-
+    id_ = 1
+    for i, atom1 in enumerate(atoms):
+        for j, atom2 in enumerate(atoms, i+1):
+            if atom1 in atom2.bonded_atoms:
+                type_ = get_bond_order(atom1, atom2)
+                bonds_section += '%7d %7d %7d %7s\n' % (id_, i+1, j+1, type_)
+                id_ += 1
    substructure_section = '@<TRIPOS>SUBSTRUCTURE\n\n'
-    if len(atoms)>0:
-        substructure_section = '@<TRIPOS>SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].res_num,atoms[0].res_name,atoms[0].numb)
-
+    if len(atoms) > 0:
+        substructure_section = ('@<TRIPOS>SUBSTRUCTURE\n%-7d %10s %7d\n'
+                                % (atoms[0].res_num, atoms[0].res_name,
+                                   atoms[0].numb))
    out = propka.lib.open_file_for_writing(filename)
-    out.write(header%(len(atoms),id-1))
+    out.write(header % (len(atoms), id_-1))
    out.write(atoms_section)
    out.write(bonds_section)
    out.write(substructure_section)
    out.close()

-    return

 def get_bond_order(atom1, atom2):
-    type = '1'
+    """Get the order of a bond between two atoms.
+
+    Args:
+        atom1:  first atom in bond
+        atom2:  second atom in bond
+    Returns:
+        string with bond type
+    """
+    type_ = '1'
    pi_electrons1 = atom1.num_pi_elec_2_3_bonds
    pi_electrons2 = atom2.num_pi_elec_2_3_bonds
-
    if '.ar' in atom1.sybyl_type:
-        pi_electrons1 -=1
+        pi_electrons1 -= 1
    if '.ar' in atom2.sybyl_type:
-        pi_electrons2 -=1
-
+        pi_electrons2 -= 1
    if pi_electrons1 > 0 and pi_electrons2 > 0:
-        type = '%d'%(min(pi_electrons1, pi_electrons2)+1)
-
+        type_ = '%d' % (min(pi_electrons1, pi_electrons2)+1)
    if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type:
-        type = 'ar'
-
-
-    return type
-
+        type_ = 'ar'
+    return type_


 def write_input(molecular_container, filename):
-    out = propka.lib.open_file_for_writing(filename)
+    """Write PROPKA input file for molecular container.

+    Args:
+        molecular_container:  molecular container
+        filename:  output file name
+    """
+    out = propka.lib.open_file_for_writing(filename)
    for conformation_name in molecular_container.conformation_names:
-        out.write('MODEL %s\n'%conformation_name)
+        out.write('MODEL %s\n' % conformation_name)
        # write atoms
        for atom in molecular_container.conformations[conformation_name].atoms:
            out.write(atom.make_input_line())
@@ -247,43 +261,51 @@ def write_input(molecular_container, filename):
        for group in molecular_container.conformations[conformation_name].groups:
            out.write(group.make_non_covalently_coupled_line())
        out.write('ENDMDL\n')
-
    out.close()

-    return

+def read_input(input_file, parameters, molecule):
+    """Read PROPKA input file for molecular container.

-def read_input(input_file, parameters,molecule):
+    Args:
+        input_file:  input file
+        parameters:  parameters for parsing/setup
+        molecule:  molecular container
+    Returns:
+        list with [conformations, names of conformations]
+    """
    conformations = {}
-
    # read in all atoms in the input file
    lines = get_atom_lines_from_input(input_file)
    for (name, atom) in lines:
        if not name in conformations.keys():
-            conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule)
+            conformations[name] = ConformationContainer(
+                name=name, parameters=parameters,
+                molecular_container=molecule)
        conformations[name].add_atom(atom)
-
    # make a sorted list of conformation names
    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
-
    return [conformations, names]


+def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
+    """Get atom lines from a PROPKA input file.

-def get_atom_lines_from_input(input_file, tags = ['ATOM  ','HETATM']):
+    Args:
+        input_file:  input file
+        tags:  tags defining atom lines
+    Yields:
+        conformation container, list of atoms
+    """
    lines = propka.lib.open_file_for_reading(input_file).readlines()
    conformation = ''
-
    atoms = {}
    numbers = []
-
    for line in lines:
        tag = line[0:6]
-
        # set the conformation
        if tag == 'MODEL ':
            conformation = line[6:].strip()
-
        # found an atom - save it
        if tag in tags:
            atom = Atom(line=line)
@@ -292,46 +314,39 @@ def get_atom_lines_from_input(input_file, tags = ['ATOM  ','HETATM']):
            atom.is_protonated = True
            atoms[atom.numb] = atom
            numbers.append(atom.numb)
-
        # found bonding information - apply it
-        if tag == 'CONECT' and len(line)>14:
+        if tag == 'CONECT' and len(line) > 14:
            conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
            center_atom = atoms[int(conect_numbers[0])]
-            for n in conect_numbers[1:]:
-                b = atoms[int(n)]
+            for num in conect_numbers[1:]:
+                bond_atom = atoms[int(num)]
                # remember to check for cysteine bridges
-                if center_atom.element == 'S' and b.element == 'S':
+                if center_atom.element == 'S' and bond_atom.element == 'S':
                    center_atom.cysteine_bridge = True
-                        b.cysteine_bridge = True
+                    bond_atom.cysteine_bridge = True
                # set up bonding
-                if not b in center_atom.bonded_atoms:
-                    center_atom.bonded_atoms.append(b)
-                if not center_atom in b.bonded_atoms:
-                    b.bonded_atoms.append(center_atom)
-
+                if not bond_atom in center_atom.bonded_atoms:
+                    center_atom.bonded_atoms.append(bond_atom)
+                if not center_atom in bond_atom.bonded_atoms:
+                    bond_atom.bonded_atoms.append(center_atom)
        # found info on covalent coupling
-        if tag == 'CCOUPL' and len(line)>14:
+        if tag == 'CCOUPL' and len(line) > 14:
            conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
            center_atom = atoms[int(conect_numbers[0])]
-            for n in conect_numbers[1:]:
-                cg = atoms[int(n)]
-                center_atom.group.couple_covalently(cg.group)
-
+            for num in conect_numbers[1:]:
+                cov_atom = atoms[int(num)]
+                center_atom.group.couple_covalently(cov_atom.group)
        # found info on non-covalent coupling
-        if tag == 'NCOUPL' and len(line)>14:
+        if tag == 'NCOUPL' and len(line) > 14:
            conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
            center_atom = atoms[int(conect_numbers[0])]
-            for n in conect_numbers[1:]:
-                cg = atoms[int(n)]
-                center_atom.group.couple_non_covalently(cg.group)
-
+            for num in conect_numbers[1:]:
+                cov_atom = atoms[int(num)]
+                center_atom.group.couple_non_covalently(cov_atom.group)
        # this conformation is done - yield the atoms
        if tag == 'ENDMDL':
-            for n in numbers:
-                yield (conformation, atoms[n])
+            for num in numbers:
+                yield (conformation, atoms[num])
            # prepare for next conformation
            atoms = {}
            numbers = []
-
-
-    return