Refactor MolecularContainer.get_pi

Use bisection instead of grid search. This is ~10x faster and also fixes
pI computation for one of my files which only got single digit precision
with the old code.
This commit is contained in:
Thomas Holder
2022-08-08 21:21:45 +02:00
parent 6388e0c4ee
commit 80c7bf07cd

View File

@@ -15,12 +15,6 @@ from propka.lib import make_grid
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
# TODO - these are constants whose origins are a little murky
UNK_PI_CUTOFF = 0.01
# Maximum number of iterations for finding PI
MAX_ITERATION = 4
class MolecularContainer: class MolecularContainer:
"""Container for storing molecular contents of PDB files. """Container for storing molecular contents of PDB files.
@@ -207,37 +201,38 @@ class MolecularContainer:
charge_profile.append([ph, q_unfolded, q_folded]) charge_profile.append([ph, q_unfolded, q_folded])
return charge_profile return charge_profile
def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0): def get_pi(self, conformation='AVR', grid=[0., 14., 1], *,
precision: float = 1e-4):
"""Get the isoelectric points for folded and unfolded states. """Get the isoelectric points for folded and unfolded states.
Args: Args:
conformation: conformation to test conformation: conformation to test
grid: grid of pH values [min, max, step] grid: grid of pH values [min, max, step]
iteration: iteration number of process precision: Compute pI up to this precision
Returns: Returns:
1. Folded state PI 1. Folded state PI
2. Unfolded state PI 2. Unfolded state PI
""" """
charge_profile = self.get_charge_profile( conf = self.conformations[conformation]
conformation=conformation, grid=grid)
pi_folded = pi_unfolded = [None, 1e6, 1e6] WHICH_UNFOLDED = 0
for point in charge_profile: WHICH_FOLDED = 1
pi_folded = min(pi_folded, point, key=lambda v: abs(v[2]))
pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1])) def pi(which, pH, min_, max_):
# If results are not good enough, do it again with a higher sampling charge = conf.calculate_charge(
# resolution self.version.parameters, ph=pH)[which]
pi_folded_value = pi_folded[0] if max_ - min_ > precision:
pi_unfolded_value = pi_unfolded[0] if charge > 0.0:
step = grid[2] min_ = pH
# TODO - need to warn if maximum number of iterations is exceeded else:
if ((pi_folded[2] > UNK_PI_CUTOFF max_ = pH
or pi_unfolded[1] > UNK_PI_CUTOFF) and iteration < MAX_ITERATION): next_pH = (min_ + max_) / 2
pi_folded_value, _ = self.get_pi( return pi(which, next_pH, min_, max_)
conformation=conformation, return pH
grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0],
iteration=iteration+1) start = (grid[0] + grid[1]) / 2, grid[0], grid[1]
_, pi_unfolded_value = self.get_pi(
conformation=conformation, return (
grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], pi(WHICH_FOLDED, *start),
iteration=iteration+1) pi(WHICH_UNFOLDED, *start),
return pi_folded_value, pi_unfolded_value )