diff --git a/propka/pdb.py b/propka/pdb.py index a362189..6d76839 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -1,132 +1,136 @@ - -from __future__ import division -from __future__ import print_function - -import string, sys, copy - +"""PDB parsing functionality.""" import propka.lib -from propka.lib import info, warning - +from propka.lib import warning from propka.atom import Atom from propka.conformation_container import ConformationContainer -expected_atom_numbers = {'ALA':5, - 'ARG':11, - 'ASN':8, - 'ASP':8, - 'CYS':6, - 'GLY':4, - 'GLN':9, - 'GLU':9, - 'HIS':10, - 'ILE':8, - 'LEU':8, - 'LYS':9, - 'MET':8, - 'PHE':11, - 'PRO':7, - 'SER':6, - 'THR':7, - 'TRP':14, - 'TYR':12, - 'VAL':7} + +EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, + 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, + 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, + 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7} def read_pdb(pdb_file, parameters, molecule): - conformations = {} + """Parse a PDB file. + Args: + pdb_file: file to read + parameters: parameters to guide parsing + molecule: molecular container + Returns: + list with elements: + 1. list of conformations + 2. list of names + """ + conformations = {} # read in all atoms in the file - lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains) + lines = get_atom_lines_from_pdb(pdb_file, + ignore_residues=parameters.ignore_residues, + keep_protons=molecule.options.keep_protons, + chains=molecule.options.chains) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer(name=name, + parameters=parameters, + molecular_container=molecule) conformations[name].add_atom(atom) - # make a sorted list of conformation names names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] -def protein_precheck(conformations, names): +def protein_precheck(conformations, names): + """Check protein for correct number of atoms, etc. + + Args: + names: conformation names to check + """ for name in names: atoms = conformations[name].atoms - # Group the atoms by their residue: atoms_by_residue = {} - for a in atoms: - if a.element != 'H': - res_id = resid_from_atom(a) + for atom in atoms: + if atom.element != 'H': + res_id = resid_from_atom(atom) try: - atoms_by_residue[res_id].append(a) + atoms_by_residue[res_id].append(atom) except KeyError: - atoms_by_residue[res_id] = [a] - + atoms_by_residue[res_id] = [atom] for res_id, res_atoms in atoms_by_residue.items(): res_name = res_atoms[0].res_name residue_label = '%3s%5s'%(res_name, res_id) - # ignore ligand residues - if res_name not in expected_atom_numbers: + if res_name not in EXPECTED_ATOM_NUMBERS: continue - # check for c-terminal if 'C-' in [a.terminal for a in res_atoms]: - if len(res_atoms) != expected_atom_numbers[res_name]+1: - warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: + str_ = ("Unexpected number (%d) of atoms in residue %s " + "in conformation %s" % (len(res_atoms), + residue_label, name)) + warning(str_) continue - # check number of atoms in residue - if len(res_atoms) != expected_atom_numbers[res_name]: - warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) - - return - -def resid_from_atom(a): - return '%4d %s %s'%(a.res_num,a.chain_id,a.icode) + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: + str_ = ('Unexpected number (%d) of atoms in residue %s ' + 'in conformation %s' % (len(res_atoms), + residue_label, name)) + warning(str_) -def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM ', 'HETATM'], chains=None): +def resid_from_atom(atom): + """Return string with atom residue information. + Args: + atom: atom to generate string for + Returns + string + """ + return '%4d %s %s' % (atom.res_num, atom.chain_id, atom.icode) + + +def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, + tags=['ATOM ', 'HETATM'], chains=None): + """Get atom lines from PDB file. + + Args: + pdb_file: PDB file to parse + ignore_residues: list of residues to ignore + keep_protons: bool to keep/ignore protons + tags: tags of lines that include atoms + chains: list of chains + """ lines = propka.lib.open_file_for_reading(pdb_file).readlines() nterm_residue = 'next_residue' old_residue = None terminal = None model = 1 - - for line in lines: tag = line[0:6] - # set the model number if tag == 'MODEL ': model = int(line[6:]) nterm_residue = 'next_residue' - if tag == 'TER ': nterm_residue = 'next_residue' - if tag in tags: alt_conf_tag = line[16] - residue_name = line[12:16] - residue_number = line[22:26] - + residue_name = line[12: 16] + residue_number = line[22: 26] # check if we want this residue - if line[17:20] in ignore_residues: + if line[17: 20] in ignore_residues: continue if chains and line[21] not in chains: continue - # set the Nterm residue number - nessecary because we may need to # identify more than one N+ group for structures with alt_conf tags if nterm_residue == 'next_residue' and tag == 'ATOM ': - # make sure that we reached a new residue - nessecary if OXT is not the last atom in - # the previous residue + # make sure that we reached a new residue - nessecary if OXT is + # not the last atom inthe previous residue if old_residue != residue_number: nterm_residue = residue_number old_residue = None - - # Identify the configuration # convert digits to letters if alt_conf_tag in '123456789': @@ -134,106 +138,116 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, if alt_conf_tag == ' ': alt_conf_tag = 'A' conformation = '%d%s'%(model, alt_conf_tag) - # set the terminal if tag == 'ATOM ': - if residue_name.strip() == 'N' and nterm_residue == residue_number: + if (residue_name.strip() == 'N' + and nterm_residue == residue_number): terminal = 'N+' - if residue_name.strip() in ['OXT','O\'\'']: + if residue_name.strip() in ['OXT', 'O\'\'']: terminal = 'C-' nterm_residue = 'next_residue' old_residue = residue_number # and yield the atom atom = Atom(line=line) atom.terminal = terminal - #if keep_protons: - # atom.is_protonated = True if not (atom.element == 'H' and not keep_protons): #ignore hydrogen yield (conformation, atom) - terminal = None - return - def write_pdb(conformation, filename): + """Write PDB conformation to a file. + + Args: + conformation: conformation container + filename: filename for output + """ write_pdb_for_atoms(conformation.atoms, filename) - return + def write_pdb_for_atoms(atoms, filename, make_conect_section=False): - out = propka.lib.open_file_for_writing(filename) + """Write out PDB file for atoms. + Args: + atoms: list of atoms + filename: name of file + make_conect_section: generate a CONECT PDB section + """ + out = propka.lib.open_file_for_writing(filename) for atom in atoms: out.write(atom.make_pdb_line()) - if make_conect_section: for atom in atoms: out.write(atom.make_conect_line()) - - out.close() - return - - def write_mol2_for_atoms(atoms, filename): + """Write out MOL2 file for atoms. + Args: + atoms: list of atoms + filename: name of file + """ + # TODO - header needs to be converted to format string header = '@MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n' - atoms_section = '@ATOM\n' - for i in range(len(atoms)): - atoms_section += atoms[i].make_mol2_line(i+1) - - + for i, atom in enumerate(atoms): + atoms_section += atom.make_mol2_line(i+1) bonds_section = '@BOND\n' - id = 1 - for i in range(len(atoms)): - for j in range(i+1,len(atoms)): - if atoms[i] in atoms[j].bonded_atoms: - type = get_bond_order(atoms[i],atoms[j]) - bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type) - id+=1 - + id_ = 1 + for i, atom1 in enumerate(atoms): + for j, atom2 in enumerate(atoms, i+1): + if atom1 in atom2.bonded_atoms: + type_ = get_bond_order(atom1, atom2) + bonds_section += '%7d %7d %7d %7s\n' % (id_, i+1, j+1, type_) + id_ += 1 substructure_section = '@SUBSTRUCTURE\n\n' - if len(atoms)>0: - substructure_section = '@SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].res_num,atoms[0].res_name,atoms[0].numb) - + if len(atoms) > 0: + substructure_section = ('@SUBSTRUCTURE\n%-7d %10s %7d\n' + % (atoms[0].res_num, atoms[0].res_name, + atoms[0].numb)) out = propka.lib.open_file_for_writing(filename) - out.write(header%(len(atoms),id-1)) + out.write(header % (len(atoms), id_-1)) out.write(atoms_section) out.write(bonds_section) out.write(substructure_section) out.close() - return def get_bond_order(atom1, atom2): - type = '1' + """Get the order of a bond between two atoms. + + Args: + atom1: first atom in bond + atom2: second atom in bond + Returns: + string with bond type + """ + type_ = '1' pi_electrons1 = atom1.num_pi_elec_2_3_bonds pi_electrons2 = atom2.num_pi_elec_2_3_bonds - if '.ar' in atom1.sybyl_type: - pi_electrons1 -=1 + pi_electrons1 -= 1 if '.ar' in atom2.sybyl_type: - pi_electrons2 -=1 - + pi_electrons2 -= 1 if pi_electrons1 > 0 and pi_electrons2 > 0: - type = '%d'%(min(pi_electrons1, pi_electrons2)+1) - + type_ = '%d' % (min(pi_electrons1, pi_electrons2)+1) if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: - type = 'ar' - - - return type - + type_ = 'ar' + return type_ def write_input(molecular_container, filename): - out = propka.lib.open_file_for_writing(filename) + """Write PROPKA input file for molecular container. + Args: + molecular_container: molecular container + filename: output file name + """ + out = propka.lib.open_file_for_writing(filename) for conformation_name in molecular_container.conformation_names: - out.write('MODEL %s\n'%conformation_name) + out.write('MODEL %s\n' % conformation_name) # write atoms for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_input_line()) @@ -247,43 +261,51 @@ def write_input(molecular_container, filename): for group in molecular_container.conformations[conformation_name].groups: out.write(group.make_non_covalently_coupled_line()) out.write('ENDMDL\n') - out.close() - return +def read_input(input_file, parameters, molecule): + """Read PROPKA input file for molecular container. -def read_input(input_file, parameters,molecule): + Args: + input_file: input file + parameters: parameters for parsing/setup + molecule: molecular container + Returns: + list with [conformations, names of conformations] + """ conformations = {} - # read in all atoms in the input file lines = get_atom_lines_from_input(input_file) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer( + name=name, parameters=parameters, + molecular_container=molecule) conformations[name].add_atom(atom) - # make a sorted list of conformation names names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] +def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): + """Get atom lines from a PROPKA input file. -def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): + Args: + input_file: input file + tags: tags defining atom lines + Yields: + conformation container, list of atoms + """ lines = propka.lib.open_file_for_reading(input_file).readlines() conformation = '' - atoms = {} numbers = [] - for line in lines: tag = line[0:6] - # set the conformation if tag == 'MODEL ': conformation = line[6:].strip() - # found an atom - save it if tag in tags: atom = Atom(line=line) @@ -292,46 +314,39 @@ def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): atom.is_protonated = True atoms[atom.numb] = atom numbers.append(atom.numb) - # found bonding information - apply it - if tag == 'CONECT' and len(line)>14: + if tag == 'CONECT' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - b = atoms[int(n)] + for num in conect_numbers[1:]: + bond_atom = atoms[int(num)] # remember to check for cysteine bridges - if center_atom.element == 'S' and b.element == 'S': - center_atom.cysteine_bridge = True - b.cysteine_bridge = True + if center_atom.element == 'S' and bond_atom.element == 'S': + center_atom.cysteine_bridge = True + bond_atom.cysteine_bridge = True # set up bonding - if not b in center_atom.bonded_atoms: - center_atom.bonded_atoms.append(b) - if not center_atom in b.bonded_atoms: - b.bonded_atoms.append(center_atom) - + if not bond_atom in center_atom.bonded_atoms: + center_atom.bonded_atoms.append(bond_atom) + if not center_atom in bond_atom.bonded_atoms: + bond_atom.bonded_atoms.append(center_atom) # found info on covalent coupling - if tag == 'CCOUPL' and len(line)>14: + if tag == 'CCOUPL' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - cg = atoms[int(n)] - center_atom.group.couple_covalently(cg.group) - + for num in conect_numbers[1:]: + cov_atom = atoms[int(num)] + center_atom.group.couple_covalently(cov_atom.group) # found info on non-covalent coupling - if tag == 'NCOUPL' and len(line)>14: + if tag == 'NCOUPL' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - cg = atoms[int(n)] - center_atom.group.couple_non_covalently(cg.group) - + for num in conect_numbers[1:]: + cov_atom = atoms[int(num)] + center_atom.group.couple_non_covalently(cov_atom.group) # this conformation is done - yield the atoms if tag == 'ENDMDL': - for n in numbers: - yield (conformation, atoms[n]) + for num in numbers: + yield (conformation, atoms[num]) # prepare for next conformation atoms = {} numbers = [] - - - return