import string, sys, copy, Source.lib from Source.atom import Atom from Source.conformation_container import Conformation_container expected_atom_numbers = {'ALA':5, 'ARG':11, 'ASN':8, 'ASP':8, 'CYS':6, 'GLY':4, 'GLN':9, 'GLU':9, 'HIS':10, 'ILE':8, 'LEU':8, 'LYS':9, 'MET':8, 'PHE':11, 'PRO':7, 'SER':6, 'THR':7, 'TRP':14, 'TYR':12, 'VAL':7} def read_pdb(pdb_file, parameters, molecule): conformations = {} # read in all atoms in the file lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains) for (name, atom) in lines: if not name in conformations.keys(): conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) conformations[name].add_atom(atom) # make a sorted list of conformation names names = sorted(conformations.keys(), key=Source.lib.conformation_sorter) return [conformations, names] def protein_precheck(conformations, names): for name in names: atoms = conformations[name].atoms res_ids = [] [res_ids.append(resid_from_atom(a)) for a in atoms if not res_ids.count(resid_from_atom(a))] for res_id in res_ids: res_atoms = [a for a in atoms if resid_from_atom(a) == res_id and a.element != 'H'] resname = res_atoms[0].resName residue_label = '%3s%5s'%(resname, res_id) # ignore ligand residues if resname not in expected_atom_numbers: continue # check for c-terminal if 'C-' in [a.terminal for a in res_atoms]: if len(res_atoms) != expected_atom_numbers[resname]+1: print('Warning: Unexpected number (%d) of atoms in residue %s in conformation %s'%(len(res_atoms),residue_label, name)) continue # check number of atoms in residue if len(res_atoms) != expected_atom_numbers[resname]: print('Warning: Unexpected number (%d) of atoms in residue %s in conformation %s'%(len(res_atoms),residue_label, name)) return def resid_from_atom(a): return '%4d %s %s'%(a.resNumb,a.chainID,a.icode) def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM ', 'HETATM'], chains=None): lines = Source.lib.open_file_for_reading(pdb_file).readlines() nterm_residue = 'next_residue' old_residue = None terminal = None model = 1 for line in lines: tag = line[0:6] # set the model number if tag == 'MODEL ': model = int(line[6:]) nterm_residue = 'next_residue' if tag == 'TER ': nterm_residue = 'next_residue' if tag in tags: alt_conf_tag = line[16] residue_name = line[12:16] residue_number = line[22:26] # check if we want this residue if line[17:20] in ignore_residues: continue if chains and line[21] not in chains: continue # set the Nterm residue number - nessecary because we may need to # identify more than one N+ group for structures with alt_conf tags if nterm_residue == 'next_residue' and tag == 'ATOM ': # make sure that we reached a new residue - nessecary if OXT is not the last atom in # the previous residue if old_residue != residue_number: nterm_residue = residue_number old_residue = None # Identify the configuration # convert digits to letters if alt_conf_tag in '123456789': alt_conf_tag = chr(ord(alt_conf_tag)+16) if alt_conf_tag == ' ': alt_conf_tag = 'A' conformation = '%d%s'%(model, alt_conf_tag) # set the terminal if tag == 'ATOM ': if residue_name.strip() == 'N' and nterm_residue == residue_number: terminal = 'N+' if residue_name.strip() in ['OXT','O\'\'']: terminal = 'C-' nterm_residue = 'next_residue' old_residue = residue_number # and yield the atom atom = Atom(line=line) atom.terminal = terminal #if keep_protons: # atom.is_protonated = True if not (atom.element == 'H' and not keep_protons): #ignore hydrogen yield (conformation, atom) terminal = None return def write_pdb(conformation, filename): write_pdb_for_atoms(conformation.atoms, filename) return def write_pdb_for_atoms(atoms, filename, make_conect_section=False): out = Source.lib.open_file_for_writing(filename) for atom in atoms: out.write(atom.make_pdb_line()) if make_conect_section: for atom in atoms: out.write(atom.make_conect_line()) out.close() return def write_mol2_for_atoms(atoms, filename): header = '@MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n' atoms_section = '@ATOM\n' for i in range(len(atoms)): atoms_section += atoms[i].make_mol2_line(i+1) bonds_section = '@BOND\n' id = 1 for i in range(len(atoms)): for j in range(i+1,len(atoms)): if atoms[i] in atoms[j].bonded_atoms: type = get_bond_order(atoms[i],atoms[j]) bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type) id+=1 substructure_section = '@SUBSTRUCTURE\n\n' if len(atoms)>0: substructure_section = '@SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].resNumb,atoms[0].resName,atoms[0].numb) out = Source.lib.open_file_for_writing(filename) out.write(header%(len(atoms),id-1)) out.write(atoms_section) out.write(bonds_section) out.write(substructure_section) out.close() return def get_bond_order(atom1, atom2): type = '1' pi_electrons1 = atom1.number_of_pi_electrons_in_double_and_triple_bonds pi_electrons2 = atom2.number_of_pi_electrons_in_double_and_triple_bonds if '.ar' in atom1.sybyl_type: pi_electrons1 -=1 if '.ar' in atom2.sybyl_type: pi_electrons2 -=1 if pi_electrons1 > 0 and pi_electrons2 > 0: type = '%d'%(min(pi_electrons1, pi_electrons2)+1) if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: type = 'ar' return type def write_input(molecular_container, filename): out = Source.lib.open_file_for_writing(filename) for conformation_name in molecular_container.conformation_names: out.write('MODEL %s\n'%conformation_name) # write atoms for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_input_line()) # write bonds for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_conect_line()) # write covalently coupled groups for group in molecular_container.conformations[conformation_name].groups: out.write(group.make_covalently_coupled_line()) # write non-covalently coupled groups for group in molecular_container.conformations[conformation_name].groups: out.write(group.make_non_covalently_coupled_line()) out.write('ENDMDL\n') out.close() return def read_input(input_file, parameters,molecule): conformations = {} # read in all atoms in the input file lines = get_atom_lines_from_input(input_file) for (name, atom) in lines: if not name in conformations.keys(): conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) conformations[name].add_atom(atom) # make a sorted list of conformation names names = sorted(conformations.keys(), key=Source.lib.conformation_sorter) return [conformations, names] def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): lines = Source.lib.open_file_for_reading(input_file).readlines() conformation = '' atoms = {} numbers = [] for line in lines: tag = line[0:6] # set the conformation if tag == 'MODEL ': conformation = line[6:].strip() # found an atom - save it if tag in tags: atom = Atom(line=line) atom.get_input_parameters() atom.groups_extracted = 1 atom.is_protonated = True atoms[atom.numb] = atom numbers.append(atom.numb) # found bonding information - apply it if tag == 'CONECT' and len(line)>14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] for n in conect_numbers[1:]: b = atoms[int(n)] # remember to check for cysteine bridges if center_atom.element == 'S' and b.element == 'S': center_atom.cysteine_bridge = True b.cysteine_bridge = True # set up bonding if not b in center_atom.bonded_atoms: center_atom.bonded_atoms.append(b) if not center_atom in b.bonded_atoms: b.bonded_atoms.append(center_atom) # found info on covalent coupling if tag == 'CCOUPL' and len(line)>14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] for n in conect_numbers[1:]: cg = atoms[int(n)] center_atom.group.couple_covalently(cg.group) # found info on non-covalent coupling if tag == 'NCOUPL' and len(line)>14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] for n in conect_numbers[1:]: cg = atoms[int(n)] center_atom.group.couple_non_covalently(cg.group) # this conformation is done - yield the atoms if tag == 'ENDMDL': for n in numbers: yield (conformation, atoms[n]) # prepare for next conformation atoms = {} numbers = [] return