Merge pull request #37 from Electrostatics/nathan/argpase

Fix error when using PROPKA with PDB2PQR
2020-05-23 08:04:30 -07:00
parent 6ae2908c4b a49015c297
commit 6f603ab17c
24 changed files with 185 additions and 214 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 # Compiled python files
 *.py[cod]
-
+# PIP stuff
-
+*.egg-info
--- a/README.md
+++ b/README.md
@@ -36,10 +36,16 @@ directory:
    python setup.py install --user --install-scripts ~/bin
 For the purposes of testing or development, you may prefer to install PROPKA as
 an editable module via PIP by running
 ```
 pip install -e .
 ```
 from within a virtual environment (e.g., via [virtualenv](https://pypi.org/project/virtualenv/)).
 ## Requirements
-* Python 2.7 or higher or Python 3.1 or higher 
+* Python 3.1 or higher 
 ## Getting started
@@ -56,7 +62,8 @@ Calculate using pdb file
 ## Testing (for developers)
-Please run `Tests/runtest.py/` after changes before pushing commits.
+Please see [`tests/README.md`](tests/README.md) for testing instructions.
 Please run these tests after making changes to the code and _before_ pushing commits.
 ## References / Citations
--- a/Tests/init.py
+++ b/Tests/init.py
--- a/Tests/hybrid36.py
+++ b/Tests/hybrid36.py
@@ -1,57 +0,0 @@
 import unittest
 import propka.hybrid36 as hybrid36
 class Hybrid36Test(unittest.TestCase):
    def testDecode(self):
        test_values = {
            "99999": 99999,
            "A0000": 100000,
            "0": 0,
            "9": 9,
            "A": 10,
            "  ZZZZY": 43770014,
            "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
            "a0000": 43770016,
            "zzzzz": 87440031,
            "zzzzy": 87440030,
            "99": 99,
            "A0": 100,
            "ZZ": 1035,
            "zz": 1971,
            "-99999": -99999,
            "-A0000": -100000,
            "-0": 0,
            "-9": -9,
            "-A": -10,
            "-ZZZZY": -43770014,
            "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
            "-a0000": -43770016,
            "-zzzzz": -87440031,
            "-zzzzy": -87440030,
            "-99": -99,
            "-A0": -100,
            "-ZZ": -1035,
            "-zz": -1971,
            "PROPKA": 954495146,
            "A001Z": 100071,
            "B0000": 1779616,
        }
        for k, v in test_values.iteritems():
            self.assertEqual(hybrid36.decode(k), v)
    def testErrors(self):
        test_values = [
            "99X99",
            "X9-99",
            "XYZa",
            "",
            "-",
            "!NotOk",
        ]
        for v in test_values:
            with self.assertRaises(ValueError) as e:
                hybrid36.decode(v)
            self.assertTrue(v in str(e.exception))
--- a/Tests/pkacalc_test.py
+++ b/Tests/pkacalc_test.py
@@ -1,80 +0,0 @@
 import os
 import re
 from subprocess import call
 import sys
 import unittest
 import logging
 # This error tolerance was chosen to make Ubuntu 18.04 pass under Windows 
 # Subsystem Linux.
 ACCEPTABLE_ERROR = 0.001
 # Setting this up as a direct translation of the original runtest.py script
 # that will be run as part of 'python setup.py test'. This takes on the
 # order of 10s to run.
 class SystemTest(unittest.TestCase):
    """
    Run the program and compare against reference results.
    """
    def test_pka_calc(self):
        pdbs = ['1FTJ-Chain-A',
                '1HPX',
                '4DFR']
        test_dir = os.path.dirname(__file__)
        base_dir = os.path.dirname(test_dir)
        executable = os.path.join(base_dir, "scripts", "propka31.py")
        env = { "PYTHONPATH" : base_dir }
        for pdb in pdbs:
            input_filename = os.path.join(test_dir, "pdb", pdb + ".pdb")
            output_filename = os.path.join(test_dir, pdb + ".out")
            output_file = open(output_filename, "w")
            call([sys.executable, executable, input_filename],
                    stdout=output_file, env=env)
            output_file.close()
            # Check pka predictions.
            ref = open(os.path.join(test_dir, "results", pdb + ".dat"))
            output = open(output_filename)
            atpka = False
            errors = []
            for line in output:
                if not atpka:
                    # Start testing pka values.
                    if "model-pKa" in line:
                        atpka = True
                    continue
                m = re.search('([0-9]+\.[0-9]+)', line)
                if not m:
                    break
                expected_value = float(ref.readline())
                value = float(m.group(0))
                value_error = (value-expected_value)/expected_value
                if abs(value_error) > ACCEPTABLE_ERROR:
                    logging.error(value_error)
                    identity = line[:m.start()].strip()
                    errors.append("%12s  %8.2f    %8.2f" %
                            (identity, expected_value, value))
            os.remove("%s.pka" % pdb)
            os.remove("%s.propka_input" % pdb)
            ref.close()
            output.close()
            if errors:
                error_header = "       Group  Expected  Calculated\n"
                self.fail("Unexpected pKa values:\n" + error_header +
                          "\n".join(errors))
--- a/Tests/runtest.py
+++ b/Tests/runtest.py
@@ -1,68 +0,0 @@
 #!/usr/bin/env python
 """ Run test for test pdbs """
 from __future__ import division
 from __future__ import print_function
 from subprocess import call
 import os, re
 import sys
 if __name__ == "__main__":
    # A list of input structures and command-line arguments to be passed in
    # to PROPKA for each:
    pdbs = [('1FTJ-Chain-A', []),
            ('1HPX', []),
            ('4DFR', []),
            ('3SGB', []),
            ('3SGB-subset', ['--titrate_only', 'E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139']),
            ('1HPX-warn', ['--quiet']),
           ]
    for pdb, args in pdbs:
        print('')
        print('RUNNING '+pdb)
        # Run pka calculation
        fh = open(pdb + '.out', 'w')
        cmd = [sys.executable, '../scripts/propka31.py','pdb/%s.pdb' % pdb] + args
        ret = call(cmd, stdout=fh, stderr=fh)
        if ret != 0:
            print(" ERR:")
            print(" Failed to execute PROPKA on %s" % pdb)
            print(" See: %s.out" % pdb)
            sys.exit(1)
        # Test pka predictions
        result_file = 'results/%s.dat' % pdb
        if not os.path.isfile(result_file):
            print(" ERR:")
            print(" file not found: %s" % result_file)
            sys.exit(1)
        result = open(result_file,'r')
        atpka = False
        for line in open(pdb+'.pka', 'r').readlines():
            if not atpka:
                if "model-pKa" in line:
                    # test pka
                    atpka = True
                    continue
                else:
                    continue
            if "-" in line:
                # done testing
                atpka = False
                continue
            expected_value = float(result.readline())
            m = re.search('([0-9]+\.[0-9]+)', line)
            value = float(m.group(0))
            if value != expected_value:
                print(" ERR:")
                print(line)
                print(" %s should be: %s" % (value, expected_value))
                sys.exit(1)
--- a/propka/lib.py
+++ b/propka/lib.py
@@ -185,8 +185,12 @@ def build_parser(parser=None):
    group.add_argument("-p", "--parameters", dest="parameters",
                       default=pkg_resources.resource_filename(__name__, "propka.cfg"),
                       help="set the parameter file [%(default)s]")
    try:
        group.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
                           help="logging level verbosity", default="INFO")
    except argparse.ArgumentError:
        # It is possible that --log-level has already been set by APBS
        pass
    group.add_argument("-o", "--pH", dest="pH", type=float, default=7.0,
                       help="setting pH-value used in e.g. stability calculations [7.0]")
    group.add_argument("-w", "--window", dest="window", nargs=3, type=float,
@@ -210,17 +214,21 @@ def build_parser(parser=None):
                             "ligand bond orders"))
    group.add_argument("-k", "--keep-protons", dest="keep_protons", action="store_true",
                       help="Keep protons in input file", default=False)
    group.add_argument("-q", "--quiet", action="store_const", const="WARNING",
                       dest="log_level", help="supress non-warning messages")
    group.add_argument("--protonate-all", dest="protonate_all", action="store_true",
                       help="Protonate all atoms (will not influence pKa calculation)",
                       default=False)
    return parser
-def loadOptions(*args):
+def loadOptions(args):
    """
    Load the arguments parser with options. Note that verbosity is set as soon
    as this function is invoked.
    Arguments:
        args:  list of arguments
    Returns:
        argparse namespace
    """
@@ -235,7 +243,7 @@ def loadOptions(*args):
        # command line
        options = parser.parse_args()
    else:
-        options = parser.parse_args(list(args))
+        options = parser.parse_args(args)
    # adding specified filenames to arguments
    options.filenames.append(options.input_pdb)
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
 pytest
--- a/setup.py
+++ b/setup.py
@@ -56,5 +56,6 @@ See http://propka.org/ for the PROPKA web server.
            ],
        },
      zip_safe=True,
-      test_suite="Tests",
+      python_requires='>=3',
      test_suite="tests",
 )
--- a/tests/.gitignore
+++ b/tests/.gitignore
--- a/tests/README.md
+++ b/tests/README.md
@@ -0,0 +1,15 @@
 # Testing PROPKA
 These tests assume that PROPKA is installed as a module on your system.
 If you are running in a virtual environment and want to make changes to your
 code, module installation accomplished by
 ```
 pip install -e .
 ```
 from the top level of the PROPKA source directory.
 Once PROPKA is available as a module, the tests can be run by
 ```
 python -m pytest tests
 ```
 either in the top-level directory or `tests` subdirectory.
--- a/tests/pdb/1FTJ-Chain-A.pdb
+++ b/tests/pdb/1FTJ-Chain-A.pdb
--- a/tests/pdb/1HPX-warn.pdb
+++ b/tests/pdb/1HPX-warn.pdb
--- a/tests/pdb/1HPX.pdb
+++ b/tests/pdb/1HPX.pdb
--- a/tests/pdb/3SGB-subset.pdb
+++ b/tests/pdb/3SGB-subset.pdb
--- a/tests/pdb/3SGB.pdb
+++ b/tests/pdb/3SGB.pdb
--- a/tests/pdb/4DFR.pdb
+++ b/tests/pdb/4DFR.pdb
--- a/tests/regression_test.py
+++ b/tests/regression_test.py
@@ -0,0 +1,144 @@
 """Tests for PROPKA 3.1"""
 import logging
 import os
 import re
 from pathlib import Path
 import pytest
 import pandas as pd
 import propka.lib
 import propka.molecular_container
 _LOGGER = logging.getLogger(__name__)
 # Maximum error set by number of decimal places in pKa output as well as need
 # to make unmodified code work on WSL Ubuntu 18.04
 MAX_ERR = 0.01
 # This directory
 TEST_DIR = Path("tests")
 # Location for test PDBs
 PDB_DIR = Path("pdb")
 # Location for results for comparing output (allow running from tests/ and ../tests/)
 RESULTS_DIR = Path("tests/results")
 if not RESULTS_DIR.is_dir():
    _LOGGER.warning("Switching to sub-directory")
    RESULTS_DIR = Path("results")
 # Arguments to add to all tests
 DEFAULT_ARGS = []
 def get_test_dirs():
    """Get locations of test files.
    Returns:
        dictionary with test file locations.
    """
    path_dict = {}
    for key, path in [("pdbs", PDB_DIR), ("results", RESULTS_DIR)]:
        test_path = TEST_DIR / path
        if test_path.is_dir():
            path_dict[key] = test_path
        else:
            test_path = path
            if test_path.is_dir():
                path_dict[key] = test_path
            else:
                errstr = "Can't find %s test files in %s" % (key, [TEST_DIR / path, path])
                raise FileNotFoundError(errstr)
    return path_dict
 def run_propka(options, pdb_path, tmp_path):
    """Run PROPKA software.
    Args:
        options:  list of PROPKA options
        pdb_path:  path to PDB file
        tmp_path:  path for working directory
    """
    options += [str(pdb_path)]
    args = propka.lib.loadOptions(options)
    try:
        _LOGGER.warning("Working in tmpdir %s because of PROPKA file output; need to fix this.",
                        tmp_path)
        cwd = Path.cwd()
        os.chdir(tmp_path)
        molecule = propka.molecular_container.Molecular_container(str(pdb_path), args)
        molecule.calculate_pka()
        molecule.write_pka()
    except Exception as err:
        raise err
    finally:
        os.chdir(cwd)
 def compare_output(pdb, tmp_path, ref_path):
    """Compare results of test with reference.
    Args:
        pdb:  PDB filename stem
        tmp_path:  temporary directory
        ref_path:  path with reference results
    Raises:
        ValueError if results disagree.
    """
    ref_data = []
    with open(ref_path, "rt") as ref_file:
        for line in ref_file:
            ref_data.append(float(line))
    test_data = []
    pka_path = Path(tmp_path) / ("%s.pka" % pdb)
    with open(pka_path, "rt") as pka_file:
        at_pka = False
        for line in pka_file:
            if not at_pka:
                if "model-pKa" in line:
                    at_pka = True
            elif line.startswith("---"):
                at_pka = False
            else:
                m = re.search(r'([0-9]+\.[0-9]+)', line)
                value = float(m.group(0))
                test_data.append(value)
    df = pd.DataFrame({"reference": ref_data, "test": test_data})
    df["difference"] = df["reference"] - df["test"]
    max_err = df["difference"].abs().max()
    if max_err > MAX_ERR:
        errstr = "Error in test (%g) exceeds maximum (%g)" % (max_err, MAX_ERR)
        raise ValueError(errstr)
@pytest.mark.parametrize("pdb, options", [
    pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
    pytest.param('1HPX', [], id="1HPX: no options"),
    pytest.param('4DFR', [], id="4DFR: no options"),
    pytest.param('3SGB', [], id="3SGB: no options"),
    pytest.param('3SGB-subset', ["--titrate_only", 
        "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], 
        id="3SGB: --titrate_only"),
    pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
 def test_regression(pdb, options, tmp_path):
    """Basic regression test of PROPKA functionality."""
    path_dict = get_test_dirs()
    ref_path = path_dict["results"] / ("%s.dat" % pdb)
    if ref_path.is_file():
        ref_path = ref_path.resolve()
    else:
        _LOGGER.warning("Missing results file for comparison: %s", ref_path)
        ref_path = None
    pdb_path = path_dict["pdbs"] / ("%s.pdb" % pdb)
    if pdb_path.is_file():
        pdb_path = pdb_path.resolve()
    else:
        errstr = "Missing PDB file: %s" % pdb_path
        raise FileNotFoundError(errstr)
    tmp_path = Path(tmp_path).resolve()
    run_propka(options, pdb_path, tmp_path)
    if ref_path is not None:
        compare_output(pdb, tmp_path, ref_path)
--- a/tests/results/1FTJ-Chain-A.dat
+++ b/tests/results/1FTJ-Chain-A.dat
--- a/tests/results/1HPX-warn.dat
+++ b/tests/results/1HPX-warn.dat
--- a/tests/results/1HPX.dat
+++ b/tests/results/1HPX.dat
--- a/tests/results/3SGB-subset.dat
+++ b/tests/results/3SGB-subset.dat
--- a/tests/results/3SGB.dat
+++ b/tests/results/3SGB.dat
--- a/tests/results/4DFR.dat
+++ b/tests/results/4DFR.dat