Merge pull request #148 from speleo3/fix-get_pi
Refactor and fix MolecularContainer.get_pi()
This commit is contained in:
@@ -15,12 +15,6 @@ from propka.lib import make_grid
|
|||||||
_LOGGER = logging.getLogger(__name__)
|
_LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# TODO - these are constants whose origins are a little murky
|
|
||||||
UNK_PI_CUTOFF = 0.01
|
|
||||||
# Maximum number of iterations for finding PI
|
|
||||||
MAX_ITERATION = 4
|
|
||||||
|
|
||||||
|
|
||||||
class MolecularContainer:
|
class MolecularContainer:
|
||||||
"""Container for storing molecular contents of PDB files.
|
"""Container for storing molecular contents of PDB files.
|
||||||
|
|
||||||
@@ -207,37 +201,38 @@ class MolecularContainer:
|
|||||||
charge_profile.append([ph, q_unfolded, q_folded])
|
charge_profile.append([ph, q_unfolded, q_folded])
|
||||||
return charge_profile
|
return charge_profile
|
||||||
|
|
||||||
def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0):
|
def get_pi(self, conformation='AVR', grid=[0., 14., 1], *,
|
||||||
|
precision: float = 1e-4):
|
||||||
"""Get the isoelectric points for folded and unfolded states.
|
"""Get the isoelectric points for folded and unfolded states.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
conformation: conformation to test
|
conformation: conformation to test
|
||||||
grid: grid of pH values [min, max, step]
|
grid: grid of pH values [min, max, step]
|
||||||
iteration: iteration number of process
|
precision: Compute pI up to this precision
|
||||||
Returns:
|
Returns:
|
||||||
1. Folded state PI
|
1. Folded state PI
|
||||||
2. Unfolded state PI
|
2. Unfolded state PI
|
||||||
"""
|
"""
|
||||||
charge_profile = self.get_charge_profile(
|
conf = self.conformations[conformation]
|
||||||
conformation=conformation, grid=grid)
|
|
||||||
pi_folded = pi_unfolded = [None, 1e6, 1e6]
|
WHICH_UNFOLDED = 0
|
||||||
for point in charge_profile:
|
WHICH_FOLDED = 1
|
||||||
pi_folded = min(pi_folded, point, key=lambda v: abs(v[2]))
|
|
||||||
pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1]))
|
def pi(which, pH, min_, max_):
|
||||||
# If results are not good enough, do it again with a higher sampling
|
charge = conf.calculate_charge(
|
||||||
# resolution
|
self.version.parameters, ph=pH)[which]
|
||||||
pi_folded_value = pi_folded[0]
|
if max_ - min_ > precision:
|
||||||
pi_unfolded_value = pi_unfolded[0]
|
if charge > 0.0:
|
||||||
step = grid[2]
|
min_ = pH
|
||||||
# TODO - need to warn if maximum number of iterations is exceeded
|
else:
|
||||||
if ((pi_folded[2] > UNK_PI_CUTOFF
|
max_ = pH
|
||||||
or pi_unfolded[1] > UNK_PI_CUTOFF) and iteration < MAX_ITERATION):
|
next_pH = (min_ + max_) / 2
|
||||||
pi_folded_value, _ = self.get_pi(
|
return pi(which, next_pH, min_, max_)
|
||||||
conformation=conformation,
|
return pH
|
||||||
grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0],
|
|
||||||
iteration=iteration+1)
|
start = (grid[0] + grid[1]) / 2, grid[0], grid[1]
|
||||||
_, pi_unfolded_value = self.get_pi(
|
|
||||||
conformation=conformation,
|
return (
|
||||||
grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0],
|
pi(WHICH_FOLDED, *start),
|
||||||
iteration=iteration+1)
|
pi(WHICH_UNFOLDED, *start),
|
||||||
return pi_folded_value, pi_unfolded_value
|
)
|
||||||
|
|||||||
@@ -1,49 +0,0 @@
|
|||||||
5.07
|
|
||||||
3.11
|
|
||||||
4.62
|
|
||||||
2.55
|
|
||||||
9.28
|
|
||||||
1.78
|
|
||||||
4.91
|
|
||||||
2.13
|
|
||||||
4.78
|
|
||||||
3.93
|
|
||||||
3.65
|
|
||||||
3.89
|
|
||||||
4.73
|
|
||||||
3.36
|
|
||||||
4.07
|
|
||||||
3.70
|
|
||||||
2.08
|
|
||||||
2.11
|
|
||||||
6.98
|
|
||||||
7.11
|
|
||||||
9.41
|
|
||||||
11.68
|
|
||||||
9.82
|
|
||||||
11.61
|
|
||||||
9.67
|
|
||||||
9.54
|
|
||||||
10.43
|
|
||||||
10.32
|
|
||||||
11.41
|
|
||||||
10.54
|
|
||||||
10.42
|
|
||||||
10.92
|
|
||||||
10.55
|
|
||||||
11.01
|
|
||||||
11.43
|
|
||||||
10.47
|
|
||||||
10.41
|
|
||||||
11.07
|
|
||||||
13.96
|
|
||||||
12.41
|
|
||||||
14.39
|
|
||||||
12.35
|
|
||||||
12.76
|
|
||||||
12.42
|
|
||||||
13.73
|
|
||||||
12.28
|
|
||||||
8.96
|
|
||||||
8.96
|
|
||||||
4.60
|
|
||||||
55
tests/results/1HPX.json
Normal file
55
tests/results/1HPX.json
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
{
|
||||||
|
"pI_folded": 9.54,
|
||||||
|
"pI_unfolded": 8.9,
|
||||||
|
"pKa": [
|
||||||
|
5.07,
|
||||||
|
3.11,
|
||||||
|
4.62,
|
||||||
|
2.55,
|
||||||
|
9.28,
|
||||||
|
1.78,
|
||||||
|
4.91,
|
||||||
|
2.13,
|
||||||
|
4.78,
|
||||||
|
3.93,
|
||||||
|
3.65,
|
||||||
|
3.89,
|
||||||
|
4.73,
|
||||||
|
3.36,
|
||||||
|
4.07,
|
||||||
|
3.70,
|
||||||
|
2.08,
|
||||||
|
2.11,
|
||||||
|
6.98,
|
||||||
|
7.11,
|
||||||
|
9.41,
|
||||||
|
11.68,
|
||||||
|
9.82,
|
||||||
|
11.61,
|
||||||
|
9.67,
|
||||||
|
9.54,
|
||||||
|
10.43,
|
||||||
|
10.32,
|
||||||
|
11.41,
|
||||||
|
10.54,
|
||||||
|
10.42,
|
||||||
|
10.92,
|
||||||
|
10.55,
|
||||||
|
11.01,
|
||||||
|
11.43,
|
||||||
|
10.47,
|
||||||
|
10.41,
|
||||||
|
11.07,
|
||||||
|
13.96,
|
||||||
|
12.41,
|
||||||
|
14.39,
|
||||||
|
12.35,
|
||||||
|
12.76,
|
||||||
|
12.42,
|
||||||
|
13.73,
|
||||||
|
12.28,
|
||||||
|
8.96,
|
||||||
|
8.96,
|
||||||
|
4.60
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -2,9 +2,10 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pytest
|
import pytest
|
||||||
from numpy.testing import assert_almost_equal
|
from pytest import approx
|
||||||
from propka.parameters import Parameters
|
from propka.parameters import Parameters
|
||||||
from propka.molecular_container import MolecularContainer
|
from propka.molecular_container import MolecularContainer
|
||||||
from propka.input import read_parameter_file, read_molecule_file
|
from propka.input import read_parameter_file, read_molecule_file
|
||||||
@@ -18,6 +19,7 @@ _LOGGER = logging.getLogger(__name__)
|
|||||||
# decimal places in pKa output as well as need to make unmodified code work
|
# decimal places in pKa output as well as need to make unmodified code work
|
||||||
# on WSL Ubuntu 18.04
|
# on WSL Ubuntu 18.04
|
||||||
MAX_ERR_DECIMALS = 2
|
MAX_ERR_DECIMALS = 2
|
||||||
|
MAX_ERR_ABS = 10**-MAX_ERR_DECIMALS
|
||||||
|
|
||||||
|
|
||||||
# This directory
|
# This directory
|
||||||
@@ -82,6 +84,34 @@ def run_propka(options, pdb_path, tmp_path):
|
|||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_pka(pka_path: Path) -> dict:
|
||||||
|
"""Parse testable data from a .pka file into a dictionary.
|
||||||
|
"""
|
||||||
|
pka_list = []
|
||||||
|
data = {"pKa": pka_list}
|
||||||
|
|
||||||
|
with open(pka_path, "rt") as pka_file:
|
||||||
|
at_pka = False
|
||||||
|
for line in pka_file:
|
||||||
|
if at_pka:
|
||||||
|
if line.startswith("---"):
|
||||||
|
at_pka = False
|
||||||
|
else:
|
||||||
|
m = re.search(r'\d+\.\d+', line[13:])
|
||||||
|
pka_list.append(float(m.group()))
|
||||||
|
elif "model-pKa" in line:
|
||||||
|
at_pka = True
|
||||||
|
else:
|
||||||
|
m = re.match(
|
||||||
|
r"The pI is *(\d+\.\d+) .folded. and *(\d+\.\d+) .unfolded.",
|
||||||
|
line)
|
||||||
|
if m is not None:
|
||||||
|
data["pI_folded"] = float(m.group(1))
|
||||||
|
data["pI_unfolded"] = float(m.group(2))
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def compare_output(pdb, tmp_path, ref_path):
|
def compare_output(pdb, tmp_path, ref_path):
|
||||||
"""Compare results of test with reference.
|
"""Compare results of test with reference.
|
||||||
|
|
||||||
@@ -92,31 +122,16 @@ def compare_output(pdb, tmp_path, ref_path):
|
|||||||
Raises:
|
Raises:
|
||||||
ValueError if results disagree.
|
ValueError if results disagree.
|
||||||
"""
|
"""
|
||||||
ref_data = []
|
|
||||||
with open(ref_path, "rt") as ref_file:
|
with open(ref_path, "rt") as ref_file:
|
||||||
for line in ref_file:
|
if ref_path.name.endswith(".json"):
|
||||||
ref_data.append(float(line))
|
ref_data = json.load(ref_file)
|
||||||
|
|
||||||
test_data = []
|
|
||||||
pka_path = Path(tmp_path) / ("{0:s}.pka".format(pdb))
|
|
||||||
with open(pka_path, "rt") as pka_file:
|
|
||||||
at_pka = False
|
|
||||||
for line in pka_file:
|
|
||||||
if not at_pka:
|
|
||||||
if "model-pKa" in line:
|
|
||||||
at_pka = True
|
|
||||||
elif line.startswith("---"):
|
|
||||||
at_pka = False
|
|
||||||
else:
|
else:
|
||||||
match = re.search(r'([0-9]+\.[0-9]+)', line)
|
ref_data = {"pKa": [float(line) for line in ref_file]}
|
||||||
value = float(match.group(0))
|
|
||||||
test_data.append(value)
|
test_data = parse_pka(tmp_path / f"{pdb}.pka")
|
||||||
errstr = (
|
|
||||||
"Error exceeds maximum allowed value ({0:d} decimal places)".format(
|
for key in ref_data:
|
||||||
MAX_ERR_DECIMALS))
|
assert test_data[key] == approx(ref_data[key], abs=MAX_ERR_ABS), key
|
||||||
assert_almost_equal(
|
|
||||||
test_data, ref_data, decimal=MAX_ERR_DECIMALS, err_msg=errstr,
|
|
||||||
verbose=True)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("pdb, options", [
|
@pytest.mark.parametrize("pdb, options", [
|
||||||
@@ -132,9 +147,12 @@ def compare_output(pdb, tmp_path, ref_path):
|
|||||||
def test_regression(pdb, options, tmp_path):
|
def test_regression(pdb, options, tmp_path):
|
||||||
"""Basic regression test of PROPKA functionality."""
|
"""Basic regression test of PROPKA functionality."""
|
||||||
path_dict = get_test_dirs()
|
path_dict = get_test_dirs()
|
||||||
ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
|
|
||||||
|
for ext in ["json", "dat"]:
|
||||||
|
ref_path = path_dict["results"] / f"{pdb}.{ext}"
|
||||||
if ref_path.is_file():
|
if ref_path.is_file():
|
||||||
ref_path = ref_path.resolve()
|
ref_path = ref_path.resolve()
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
_LOGGER.warning("Missing results file for comparison: {0:s}".format(
|
_LOGGER.warning("Missing results file for comparison: {0:s}".format(
|
||||||
str(ref_path)))
|
str(ref_path)))
|
||||||
|
|||||||
Reference in New Issue
Block a user