"""Tests for PROPKA""" import logging import os import re import json from pathlib import Path import pytest from pytest import approx from propka.parameters import Parameters from propka.molecular_container import MolecularContainer from propka.input import read_parameter_file, read_molecule_file from propka.lib import loadOptions from typing import List _LOGGER = logging.getLogger(__name__) # Number of decimal places for maximum tolerable error. Set by number of # decimal places in pKa output as well as need to make unmodified code work # on WSL Ubuntu 18.04 MAX_ERR_DECIMALS = 2 MAX_ERR_ABS = 10**-MAX_ERR_DECIMALS # This directory TEST_DIR = Path("tests") # Location for test PDBs PDB_DIR = Path("pdb") # Location for results for comparing output (allow running from tests/ and # ../tests/) RESULTS_DIR = Path("tests/results") if not RESULTS_DIR.is_dir(): _LOGGER.warning("Switching to sub-directory") RESULTS_DIR = Path("results") def get_test_dirs(): """Get locations of test files. Returns: dictionary with test file locations. """ path_dict = {} for key, path in [("pdbs", PDB_DIR), ("results", RESULTS_DIR)]: test_path = TEST_DIR / path if test_path.is_dir(): path_dict[key] = test_path else: test_path = path if test_path.is_dir(): path_dict[key] = test_path else: errstr = ( "Can't find {0:s} test files in {1:s}".format( key, [TEST_DIR / path, path])) raise FileNotFoundError(errstr) return path_dict def run_propka(options, pdb_path, tmp_path): """Run PROPKA software. Args: options: list of PROPKA options pdb_path: path to PDB file tmp_path: path for working directory """ options += [str(pdb_path)] args = loadOptions(options) try: _LOGGER.warning( "Working in tmpdir {0:s} because of PROPKA file output; " "need to fix this.".format(str(tmp_path))) cwd = Path.cwd() os.chdir(tmp_path) parameters = read_parameter_file(args.parameters, Parameters()) molecule = MolecularContainer(parameters, args) molecule = read_molecule_file(str(pdb_path), molecule) molecule.calculate_pka() molecule.write_pka() finally: os.chdir(cwd) def parse_pka(pka_path: Path) -> dict: """Parse testable data from a .pka file into a dictionary. """ pka_list: List[float] = [] data: dict = {"pKa": pka_list} with open(pka_path, "rt") as pka_file: at_pka = False for line in pka_file: if at_pka: if line.startswith("---"): at_pka = False else: m = re.search(r'\d+\.\d+', line[13:]) assert m is not None pka_list.append(float(m.group())) elif "model-pKa" in line: at_pka = True else: m = re.match( r"The pI is *(\d+\.\d+) .folded. and *(\d+\.\d+) .unfolded.", line) if m is not None: data["pI_folded"] = float(m.group(1)) data["pI_unfolded"] = float(m.group(2)) return data def compare_output(pdb, tmp_path, ref_path): """Compare results of test with reference. Args: pdb: PDB filename stem tmp_path: temporary directory ref_path: path with reference results Raises: ValueError if results disagree. """ with open(ref_path, "rt") as ref_file: if ref_path.name.endswith(".json"): ref_data = json.load(ref_file) else: ref_data = {"pKa": [float(line) for line in ref_file]} test_data = parse_pka(tmp_path / f"{pdb}.pka") for key in ref_data: assert test_data[key] == approx(ref_data[key], abs=MAX_ERR_ABS), key @pytest.mark.parametrize("pdb, options", [ pytest.param('sample-issue-140', [], id="sample-issue-140: no options"), pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"), pytest.param('1HPX', [], id="1HPX: no options"), pytest.param('4DFR', [], id="4DFR: no options"), pytest.param('3SGB', [], id="3SGB: no options"), pytest.param('3SGB-subset', [ "--titrate_only", "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], id="3SGB: --titrate_only"), pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")]) def test_regression(pdb, options, tmp_path): """Basic regression test of PROPKA functionality.""" path_dict = get_test_dirs() for ext in ["json", "dat"]: ref_path = path_dict["results"] / f"{pdb}.{ext}" if ref_path.is_file(): ref_path = ref_path.resolve() break else: _LOGGER.warning("Missing results file for comparison: {0:s}".format( str(ref_path))) ref_path = None pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb)) if pdb_path.is_file(): pdb_path = pdb_path.resolve() else: errstr = "Missing PDB file: {0:s}".format(pdb_path) raise FileNotFoundError(errstr) tmp_path = Path(tmp_path).resolve() run_propka(options, pdb_path, tmp_path) if ref_path is not None: compare_output(pdb, tmp_path, ref_path)