Refactor MolecularContainer.get_pi
Use bisection instead of grid search. This is ~10x faster and also fixes pI computation for one of my files which only got single digit precision with the old code.
This commit is contained in:
@@ -15,12 +15,6 @@ from propka.lib import make_grid
|
|||||||
_LOGGER = logging.getLogger(__name__)
|
_LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# TODO - these are constants whose origins are a little murky
|
|
||||||
UNK_PI_CUTOFF = 0.01
|
|
||||||
# Maximum number of iterations for finding PI
|
|
||||||
MAX_ITERATION = 4
|
|
||||||
|
|
||||||
|
|
||||||
class MolecularContainer:
|
class MolecularContainer:
|
||||||
"""Container for storing molecular contents of PDB files.
|
"""Container for storing molecular contents of PDB files.
|
||||||
|
|
||||||
@@ -207,37 +201,38 @@ class MolecularContainer:
|
|||||||
charge_profile.append([ph, q_unfolded, q_folded])
|
charge_profile.append([ph, q_unfolded, q_folded])
|
||||||
return charge_profile
|
return charge_profile
|
||||||
|
|
||||||
def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0):
|
def get_pi(self, conformation='AVR', grid=[0., 14., 1], *,
|
||||||
|
precision: float = 1e-4):
|
||||||
"""Get the isoelectric points for folded and unfolded states.
|
"""Get the isoelectric points for folded and unfolded states.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
conformation: conformation to test
|
conformation: conformation to test
|
||||||
grid: grid of pH values [min, max, step]
|
grid: grid of pH values [min, max, step]
|
||||||
iteration: iteration number of process
|
precision: Compute pI up to this precision
|
||||||
Returns:
|
Returns:
|
||||||
1. Folded state PI
|
1. Folded state PI
|
||||||
2. Unfolded state PI
|
2. Unfolded state PI
|
||||||
"""
|
"""
|
||||||
charge_profile = self.get_charge_profile(
|
conf = self.conformations[conformation]
|
||||||
conformation=conformation, grid=grid)
|
|
||||||
pi_folded = pi_unfolded = [None, 1e6, 1e6]
|
WHICH_UNFOLDED = 0
|
||||||
for point in charge_profile:
|
WHICH_FOLDED = 1
|
||||||
pi_folded = min(pi_folded, point, key=lambda v: abs(v[2]))
|
|
||||||
pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1]))
|
def pi(which, pH, min_, max_):
|
||||||
# If results are not good enough, do it again with a higher sampling
|
charge = conf.calculate_charge(
|
||||||
# resolution
|
self.version.parameters, ph=pH)[which]
|
||||||
pi_folded_value = pi_folded[0]
|
if max_ - min_ > precision:
|
||||||
pi_unfolded_value = pi_unfolded[0]
|
if charge > 0.0:
|
||||||
step = grid[2]
|
min_ = pH
|
||||||
# TODO - need to warn if maximum number of iterations is exceeded
|
else:
|
||||||
if ((pi_folded[2] > UNK_PI_CUTOFF
|
max_ = pH
|
||||||
or pi_unfolded[1] > UNK_PI_CUTOFF) and iteration < MAX_ITERATION):
|
next_pH = (min_ + max_) / 2
|
||||||
pi_folded_value, _ = self.get_pi(
|
return pi(which, next_pH, min_, max_)
|
||||||
conformation=conformation,
|
return pH
|
||||||
grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0],
|
|
||||||
iteration=iteration+1)
|
start = (grid[0] + grid[1]) / 2, grid[0], grid[1]
|
||||||
_, pi_unfolded_value = self.get_pi(
|
|
||||||
conformation=conformation,
|
return (
|
||||||
grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0],
|
pi(WHICH_FOLDED, *start),
|
||||||
iteration=iteration+1)
|
pi(WHICH_UNFOLDED, *start),
|
||||||
return pi_folded_value, pi_unfolded_value
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user