Merge pull request #37 from Electrostatics/nathan/argpase

Fix error when using PROPKA with PDB2PQR
This commit is contained in:
Nathan Baker
2020-05-23 08:04:30 -07:00
committed by GitHub
24 changed files with 185 additions and 214 deletions

4
.gitignore vendored
View File

@@ -1,5 +1,5 @@
# Compiled python files # Compiled python files
*.py[cod] *.py[cod]
# PIP stuff
*.egg-info

View File

@@ -36,10 +36,16 @@ directory:
python setup.py install --user --install-scripts ~/bin python setup.py install --user --install-scripts ~/bin
For the purposes of testing or development, you may prefer to install PROPKA as
an editable module via PIP by running
```
pip install -e .
```
from within a virtual environment (e.g., via [virtualenv](https://pypi.org/project/virtualenv/)).
## Requirements ## Requirements
* Python 2.7 or higher or Python 3.1 or higher * Python 3.1 or higher
## Getting started ## Getting started
@@ -56,7 +62,8 @@ Calculate using pdb file
## Testing (for developers) ## Testing (for developers)
Please run `Tests/runtest.py/` after changes before pushing commits. Please see [`tests/README.md`](tests/README.md) for testing instructions.
Please run these tests after making changes to the code and _before_ pushing commits.
## References / Citations ## References / Citations

View File

View File

@@ -1,57 +0,0 @@
import unittest
import propka.hybrid36 as hybrid36
class Hybrid36Test(unittest.TestCase):
def testDecode(self):
test_values = {
"99999": 99999,
"A0000": 100000,
"0": 0,
"9": 9,
"A": 10,
" ZZZZY": 43770014,
"ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
"a0000": 43770016,
"zzzzz": 87440031,
"zzzzy": 87440030,
"99": 99,
"A0": 100,
"ZZ": 1035,
"zz": 1971,
"-99999": -99999,
"-A0000": -100000,
"-0": 0,
"-9": -9,
"-A": -10,
"-ZZZZY": -43770014,
"-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
"-a0000": -43770016,
"-zzzzz": -87440031,
"-zzzzy": -87440030,
"-99": -99,
"-A0": -100,
"-ZZ": -1035,
"-zz": -1971,
"PROPKA": 954495146,
"A001Z": 100071,
"B0000": 1779616,
}
for k, v in test_values.iteritems():
self.assertEqual(hybrid36.decode(k), v)
def testErrors(self):
test_values = [
"99X99",
"X9-99",
"XYZa",
"",
"-",
"!NotOk",
]
for v in test_values:
with self.assertRaises(ValueError) as e:
hybrid36.decode(v)
self.assertTrue(v in str(e.exception))

View File

@@ -1,80 +0,0 @@
import os
import re
from subprocess import call
import sys
import unittest
import logging
# This error tolerance was chosen to make Ubuntu 18.04 pass under Windows
# Subsystem Linux.
ACCEPTABLE_ERROR = 0.001
# Setting this up as a direct translation of the original runtest.py script
# that will be run as part of 'python setup.py test'. This takes on the
# order of 10s to run.
class SystemTest(unittest.TestCase):
"""
Run the program and compare against reference results.
"""
def test_pka_calc(self):
pdbs = ['1FTJ-Chain-A',
'1HPX',
'4DFR']
test_dir = os.path.dirname(__file__)
base_dir = os.path.dirname(test_dir)
executable = os.path.join(base_dir, "scripts", "propka31.py")
env = { "PYTHONPATH" : base_dir }
for pdb in pdbs:
input_filename = os.path.join(test_dir, "pdb", pdb + ".pdb")
output_filename = os.path.join(test_dir, pdb + ".out")
output_file = open(output_filename, "w")
call([sys.executable, executable, input_filename],
stdout=output_file, env=env)
output_file.close()
# Check pka predictions.
ref = open(os.path.join(test_dir, "results", pdb + ".dat"))
output = open(output_filename)
atpka = False
errors = []
for line in output:
if not atpka:
# Start testing pka values.
if "model-pKa" in line:
atpka = True
continue
m = re.search('([0-9]+\.[0-9]+)', line)
if not m:
break
expected_value = float(ref.readline())
value = float(m.group(0))
value_error = (value-expected_value)/expected_value
if abs(value_error) > ACCEPTABLE_ERROR:
logging.error(value_error)
identity = line[:m.start()].strip()
errors.append("%12s %8.2f %8.2f" %
(identity, expected_value, value))
os.remove("%s.pka" % pdb)
os.remove("%s.propka_input" % pdb)
ref.close()
output.close()
if errors:
error_header = " Group Expected Calculated\n"
self.fail("Unexpected pKa values:\n" + error_header +
"\n".join(errors))

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env python
""" Run test for test pdbs """
from __future__ import division
from __future__ import print_function
from subprocess import call
import os, re
import sys
if __name__ == "__main__":
# A list of input structures and command-line arguments to be passed in
# to PROPKA for each:
pdbs = [('1FTJ-Chain-A', []),
('1HPX', []),
('4DFR', []),
('3SGB', []),
('3SGB-subset', ['--titrate_only', 'E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139']),
('1HPX-warn', ['--quiet']),
]
for pdb, args in pdbs:
print('')
print('RUNNING '+pdb)
# Run pka calculation
fh = open(pdb + '.out', 'w')
cmd = [sys.executable, '../scripts/propka31.py','pdb/%s.pdb' % pdb] + args
ret = call(cmd, stdout=fh, stderr=fh)
if ret != 0:
print(" ERR:")
print(" Failed to execute PROPKA on %s" % pdb)
print(" See: %s.out" % pdb)
sys.exit(1)
# Test pka predictions
result_file = 'results/%s.dat' % pdb
if not os.path.isfile(result_file):
print(" ERR:")
print(" file not found: %s" % result_file)
sys.exit(1)
result = open(result_file,'r')
atpka = False
for line in open(pdb+'.pka', 'r').readlines():
if not atpka:
if "model-pKa" in line:
# test pka
atpka = True
continue
else:
continue
if "-" in line:
# done testing
atpka = False
continue
expected_value = float(result.readline())
m = re.search('([0-9]+\.[0-9]+)', line)
value = float(m.group(0))
if value != expected_value:
print(" ERR:")
print(line)
print(" %s should be: %s" % (value, expected_value))
sys.exit(1)

View File

@@ -185,8 +185,12 @@ def build_parser(parser=None):
group.add_argument("-p", "--parameters", dest="parameters", group.add_argument("-p", "--parameters", dest="parameters",
default=pkg_resources.resource_filename(__name__, "propka.cfg"), default=pkg_resources.resource_filename(__name__, "propka.cfg"),
help="set the parameter file [%(default)s]") help="set the parameter file [%(default)s]")
try:
group.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], group.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="logging level verbosity", default="INFO") help="logging level verbosity", default="INFO")
except argparse.ArgumentError:
# It is possible that --log-level has already been set by APBS
pass
group.add_argument("-o", "--pH", dest="pH", type=float, default=7.0, group.add_argument("-o", "--pH", dest="pH", type=float, default=7.0,
help="setting pH-value used in e.g. stability calculations [7.0]") help="setting pH-value used in e.g. stability calculations [7.0]")
group.add_argument("-w", "--window", dest="window", nargs=3, type=float, group.add_argument("-w", "--window", dest="window", nargs=3, type=float,
@@ -210,17 +214,21 @@ def build_parser(parser=None):
"ligand bond orders")) "ligand bond orders"))
group.add_argument("-k", "--keep-protons", dest="keep_protons", action="store_true", group.add_argument("-k", "--keep-protons", dest="keep_protons", action="store_true",
help="Keep protons in input file", default=False) help="Keep protons in input file", default=False)
group.add_argument("-q", "--quiet", action="store_const", const="WARNING",
dest="log_level", help="supress non-warning messages")
group.add_argument("--protonate-all", dest="protonate_all", action="store_true", group.add_argument("--protonate-all", dest="protonate_all", action="store_true",
help="Protonate all atoms (will not influence pKa calculation)", help="Protonate all atoms (will not influence pKa calculation)",
default=False) default=False)
return parser return parser
def loadOptions(*args): def loadOptions(args):
""" """
Load the arguments parser with options. Note that verbosity is set as soon Load the arguments parser with options. Note that verbosity is set as soon
as this function is invoked. as this function is invoked.
Arguments:
args: list of arguments
Returns: Returns:
argparse namespace argparse namespace
""" """
@@ -235,7 +243,7 @@ def loadOptions(*args):
# command line # command line
options = parser.parse_args() options = parser.parse_args()
else: else:
options = parser.parse_args(list(args)) options = parser.parse_args(args)
# adding specified filenames to arguments # adding specified filenames to arguments
options.filenames.append(options.input_pdb) options.filenames.append(options.input_pdb)

1
requirements.txt Normal file
View File

@@ -0,0 +1 @@
pytest

View File

@@ -56,5 +56,6 @@ See http://propka.org/ for the PROPKA web server.
], ],
}, },
zip_safe=True, zip_safe=True,
test_suite="Tests", python_requires='>=3',
test_suite="tests",
) )

15
tests/README.md Normal file
View File

@@ -0,0 +1,15 @@
# Testing PROPKA
These tests assume that PROPKA is installed as a module on your system.
If you are running in a virtual environment and want to make changes to your
code, module installation accomplished by
```
pip install -e .
```
from the top level of the PROPKA source directory.
Once PROPKA is available as a module, the tests can be run by
```
python -m pytest tests
```
either in the top-level directory or `tests` subdirectory.

144
tests/regression_test.py Normal file
View File

@@ -0,0 +1,144 @@
"""Tests for PROPKA 3.1"""
import logging
import os
import re
from pathlib import Path
import pytest
import pandas as pd
import propka.lib
import propka.molecular_container
_LOGGER = logging.getLogger(__name__)
# Maximum error set by number of decimal places in pKa output as well as need
# to make unmodified code work on WSL Ubuntu 18.04
MAX_ERR = 0.01
# This directory
TEST_DIR = Path("tests")
# Location for test PDBs
PDB_DIR = Path("pdb")
# Location for results for comparing output (allow running from tests/ and ../tests/)
RESULTS_DIR = Path("tests/results")
if not RESULTS_DIR.is_dir():
_LOGGER.warning("Switching to sub-directory")
RESULTS_DIR = Path("results")
# Arguments to add to all tests
DEFAULT_ARGS = []
def get_test_dirs():
"""Get locations of test files.
Returns:
dictionary with test file locations.
"""
path_dict = {}
for key, path in [("pdbs", PDB_DIR), ("results", RESULTS_DIR)]:
test_path = TEST_DIR / path
if test_path.is_dir():
path_dict[key] = test_path
else:
test_path = path
if test_path.is_dir():
path_dict[key] = test_path
else:
errstr = "Can't find %s test files in %s" % (key, [TEST_DIR / path, path])
raise FileNotFoundError(errstr)
return path_dict
def run_propka(options, pdb_path, tmp_path):
"""Run PROPKA software.
Args:
options: list of PROPKA options
pdb_path: path to PDB file
tmp_path: path for working directory
"""
options += [str(pdb_path)]
args = propka.lib.loadOptions(options)
try:
_LOGGER.warning("Working in tmpdir %s because of PROPKA file output; need to fix this.",
tmp_path)
cwd = Path.cwd()
os.chdir(tmp_path)
molecule = propka.molecular_container.Molecular_container(str(pdb_path), args)
molecule.calculate_pka()
molecule.write_pka()
except Exception as err:
raise err
finally:
os.chdir(cwd)
def compare_output(pdb, tmp_path, ref_path):
"""Compare results of test with reference.
Args:
pdb: PDB filename stem
tmp_path: temporary directory
ref_path: path with reference results
Raises:
ValueError if results disagree.
"""
ref_data = []
with open(ref_path, "rt") as ref_file:
for line in ref_file:
ref_data.append(float(line))
test_data = []
pka_path = Path(tmp_path) / ("%s.pka" % pdb)
with open(pka_path, "rt") as pka_file:
at_pka = False
for line in pka_file:
if not at_pka:
if "model-pKa" in line:
at_pka = True
elif line.startswith("---"):
at_pka = False
else:
m = re.search(r'([0-9]+\.[0-9]+)', line)
value = float(m.group(0))
test_data.append(value)
df = pd.DataFrame({"reference": ref_data, "test": test_data})
df["difference"] = df["reference"] - df["test"]
max_err = df["difference"].abs().max()
if max_err > MAX_ERR:
errstr = "Error in test (%g) exceeds maximum (%g)" % (max_err, MAX_ERR)
raise ValueError(errstr)
@pytest.mark.parametrize("pdb, options", [
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('1HPX', [], id="1HPX: no options"),
pytest.param('4DFR', [], id="4DFR: no options"),
pytest.param('3SGB', [], id="3SGB: no options"),
pytest.param('3SGB-subset', ["--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
def test_regression(pdb, options, tmp_path):
"""Basic regression test of PROPKA functionality."""
path_dict = get_test_dirs()
ref_path = path_dict["results"] / ("%s.dat" % pdb)
if ref_path.is_file():
ref_path = ref_path.resolve()
else:
_LOGGER.warning("Missing results file for comparison: %s", ref_path)
ref_path = None
pdb_path = path_dict["pdbs"] / ("%s.pdb" % pdb)
if pdb_path.is_file():
pdb_path = pdb_path.resolve()
else:
errstr = "Missing PDB file: %s" % pdb_path
raise FileNotFoundError(errstr)
tmp_path = Path(tmp_path).resolve()
run_propka(options, pdb_path, tmp_path)
if ref_path is not None:
compare_output(pdb, tmp_path, ref_path)