Configure PRODIGY pipeline for WES execution with S3 and Harbor
This commit is contained in:
49
.github/workflows/ci.yml
vendored
Normal file
49
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: ci
|
||||
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- run: pip install '.[dev]'
|
||||
|
||||
- name: check types
|
||||
run: mypy .
|
||||
|
||||
- name: run unittests
|
||||
run: >-
|
||||
pytest
|
||||
-m "not integration"
|
||||
--cov
|
||||
--cov-report xml:coverage.xml
|
||||
--cov-append
|
||||
-vv
|
||||
--hypothesis-show-statistics
|
||||
|
||||
- name: run integration tests
|
||||
run: >-
|
||||
pytest
|
||||
-m integration
|
||||
--cov
|
||||
--cov-report xml:coverage.xml
|
||||
--cov-append
|
||||
-vv
|
||||
--hypothesis-show-statistics
|
||||
|
||||
- name: Run codacy-coverage-reporter
|
||||
uses: codacy/codacy-coverage-reporter-action@v1
|
||||
with:
|
||||
project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
|
||||
coverage-reports: coverage.xml
|
||||
48
.github/workflows/docker-publish.yml
vendored
Normal file
48
.github/workflows/docker-publish.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
#
|
||||
name: Create and publish a Docker image
|
||||
|
||||
on:
|
||||
push:
|
||||
# run only against tags
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
|
||||
jobs:
|
||||
build-and-push-image:
|
||||
runs-on: ubuntu-latest
|
||||
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
34
.github/workflows/publish.yml
vendored
Normal file
34
.github/workflows/publish.yml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
name: publish to pypi
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
pypi_release:
|
||||
name: builds and publishes to pypi
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/prodigy-prot
|
||||
permissions:
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
|
||||
- name: install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install --upgrade build
|
||||
|
||||
- name: build
|
||||
run: |
|
||||
python -m build
|
||||
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
27
.github/workflows/stale.yml
vendored
Normal file
27
.github/workflows/stale.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: "Close stale issues and PRs"
|
||||
on:
|
||||
schedule:
|
||||
- cron: "30 1 * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
issues: write
|
||||
pull-requests: write
|
||||
actions: write
|
||||
steps:
|
||||
- uses: actions/stale@v10
|
||||
with:
|
||||
stale-pr-message: "This PR is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days."
|
||||
stale-issue-message: "This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days."
|
||||
close-pr-message: 'This PR was closed because it has been stalled for 5 days with no activity.'
|
||||
close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
|
||||
days-before-stale: 30
|
||||
days-before-close: 5
|
||||
exempt-issue-labels: "bug"
|
||||
exempt-pr-labels: "bug"
|
||||
remove-stale-when-updated: true
|
||||
operations-per-run: 100
|
||||
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
work/
|
||||
.nextflow/
|
||||
.nextflow.log*
|
||||
*.log.*
|
||||
results/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.docker/
|
||||
.vscode/
|
||||
.idea/
|
||||
*.tmp
|
||||
*.swp
|
||||
tests/test_data/dataset.tgz
|
||||
9
.howfairis.yml
Normal file
9
.howfairis.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
## Uncomment a line if you want to skip a given category of checks
|
||||
|
||||
#skip_repository_checks_reason: <reason for skipping goes here>
|
||||
#skip_license_checks_reason: <reason for skipping goes here>
|
||||
#skip_registry_checks_reason: <reason for skipping goes here>
|
||||
#skip_citation_checks_reason: <reason for skipping goes here>
|
||||
skip_checklist_checks_reason: "I'm using the Codacy dashboard to guide my development"
|
||||
|
||||
ignore_commented_badges: false
|
||||
47
CITATION.cff
Normal file
47
CITATION.cff
Normal file
@@ -0,0 +1,47 @@
|
||||
# This CITATION.cff file was generated with cffinit.
|
||||
# Visit https://bit.ly/cffinit to generate yours today!
|
||||
|
||||
cff-version: 1.2.0
|
||||
title: Prodigy
|
||||
message: >-
|
||||
If you use this software, please cite it using the
|
||||
metadata from this file.
|
||||
type: software
|
||||
authors:
|
||||
- given-names: Anna
|
||||
family-names: Vangone
|
||||
affiliation: Utrecht University
|
||||
- given-names: Alexandre
|
||||
name-particle: MJJ
|
||||
family-names: Bonvin
|
||||
affiliation: Utrecht University
|
||||
- given-names: Joerg
|
||||
family-names: Schaarschmidt
|
||||
affiliation: Utrecht University
|
||||
- given-names: Rodrigo
|
||||
family-names: Vargas Honorato
|
||||
affiliation: Utrecht University
|
||||
- given-names: Brian
|
||||
family-names: Jimenez
|
||||
affiliation: Utrecht University
|
||||
- given-names: Joao
|
||||
family-names: Rodrigues
|
||||
affiliation: Utrecht University
|
||||
identifiers:
|
||||
- type: doi
|
||||
value: 10.1093/bioinformatics/btw514
|
||||
description: DOI of the web service version
|
||||
- type: doi
|
||||
value: 10.7554/eLife.07454
|
||||
- type: doi
|
||||
value: 10.1016/j.jmb.2014.04.017
|
||||
repository-code: 'https://github.com/haddocking/prodigy'
|
||||
url: 'https://wenmr.science.uu.nl/prodigy'
|
||||
abstract: >-
|
||||
A tool to predict binding affinity values for
|
||||
protein-protein complexes from atomic structures.
|
||||
keywords:
|
||||
- binding affinity
|
||||
- computational biology
|
||||
- protein-protein
|
||||
license: Apache-2.0
|
||||
132
CODE_OF_CONDUCT.md
Normal file
132
CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
- Demonstrating empathy and kindness toward other people
|
||||
- Being respectful of differing opinions, viewpoints, and experiences
|
||||
- Giving and gracefully accepting constructive feedback
|
||||
- Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
- Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
- The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
- Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
- Public or private harassment
|
||||
- Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
- Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
`prodigy.bonvinlab@gmail.com`.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
17
CONTRIBUTING.md
Normal file
17
CONTRIBUTING.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# Contributing with PRODIGY
|
||||
|
||||
## Reporting issues
|
||||
|
||||
If you find a bug or have a feature request, please report it in the [issue tracker](https://github.com/haddocking/prodigy/issues)
|
||||
|
||||
## Contributing code
|
||||
|
||||
We welcome contributions to PRODIGY. If you would like to contribute, please fork the repository and make a pull request.
|
||||
|
||||
## Development conventions
|
||||
|
||||
Please refer to the [development guidelines](DEVELOPMENT.md) for more details.
|
||||
|
||||
## Contact
|
||||
|
||||
If you have any questions, please contact us at [ask.bioexcel.eu](https://ask.bioexcel.eu)
|
||||
36
DEVELOPMENT.md
Normal file
36
DEVELOPMENT.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# PRODIGY Development
|
||||
|
||||
## Installation
|
||||
|
||||
We use `poetry` to manage the dependencies and the virtual environment, so you need to install it first; check the [official documentation](https://python-poetry.org/docs/#installation) for more details.
|
||||
|
||||
Clone the repository and install the dependencies:
|
||||
|
||||
```text
|
||||
git clone https://github.com/haddocking/prodigy.git && cd prodigy
|
||||
poetry install
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
To run the tests, use the following command:
|
||||
|
||||
```text
|
||||
python -m unittest
|
||||
```
|
||||
|
||||
## Code style
|
||||
|
||||
We use `trunk` as the "all-purpose" linting tool, check its [documentation](https://docs.trunk.io/docs/install).
|
||||
|
||||
To check for code style issues, run:
|
||||
|
||||
```text
|
||||
trunk check
|
||||
```
|
||||
|
||||
To automatically fix the issues, run:
|
||||
|
||||
```text
|
||||
trunk fmt
|
||||
```
|
||||
40
Dockerfile
Normal file
40
Dockerfile
Normal file
@@ -0,0 +1,40 @@
|
||||
FROM python:3.12
|
||||
|
||||
LABEL maintainer="Omic"
|
||||
LABEL description="PRODIGY - PROtein binDIng enerGY prediction"
|
||||
LABEL version="2.4.0"
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Install system dependencies required for freesasa compilation
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
gcc \
|
||||
g++ \
|
||||
make \
|
||||
procps \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Upgrade pip
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
|
||||
# Install PRODIGY and its dependencies
|
||||
# Dependencies: biopython>=1.80, freesasa>=2.2.1, numpy>=2
|
||||
RUN pip install --no-cache-dir \
|
||||
"biopython>=1.80" \
|
||||
"freesasa>=2.2.1" \
|
||||
"numpy>=2"
|
||||
|
||||
# Install PRODIGY
|
||||
RUN pip install --no-cache-dir prodigy-prot==2.4.0
|
||||
|
||||
# Verify installation
|
||||
RUN prodigy --help
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /data
|
||||
|
||||
CMD ["prodigy", "--help"]
|
||||
190
LICENSE
Normal file
190
LICENSE
Normal file
@@ -0,0 +1,190 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2015 Anna Vangone, Panagiotis Kastritis, Alexandre Bonvin
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
3
MANIFEST.in
Normal file
3
MANIFEST.in
Normal file
@@ -0,0 +1,3 @@
|
||||
include README.md
|
||||
include src/prodigy_prot/data/naccess.config
|
||||
|
||||
341
README.md
Normal file
341
README.md
Normal file
@@ -0,0 +1,341 @@
|
||||
# PRODIGY Nextflow Pipeline
|
||||
|
||||
A Nextflow pipeline for predicting binding affinity of protein-protein complexes using PRODIGY (PROtein binDIng enerGY prediction).
|
||||
|
||||
## Overview
|
||||
|
||||
PRODIGY is a contact-based method for predicting the binding affinity of protein-protein complexes from their 3D structures. This pipeline containerizes PRODIGY using Docker and orchestrates execution through Nextflow, enabling reproducible, scalable analysis of protein-protein interactions.
|
||||
|
||||
### Key Features
|
||||
|
||||
- **Automated binding affinity prediction** from PDB/mmCIF structures
|
||||
- **Batch processing** of multiple protein complexes
|
||||
- **Docker containerization** for reproducibility
|
||||
- **Configurable parameters** for distance cutoffs, temperature, and chain selection
|
||||
- **Optional outputs** including contact lists and PyMOL visualization scripts
|
||||
|
||||
## Scientific Background
|
||||
|
||||
PRODIGY predicts binding affinity by analyzing intermolecular contacts (ICs) at protein-protein interfaces. The method:
|
||||
|
||||
1. Identifies residue-residue contacts within a distance threshold (default: 5.5 Å)
|
||||
2. Classifies contacts by residue type (charged, polar, apolar)
|
||||
3. Analyzes the non-interacting surface (NIS) composition
|
||||
4. Predicts binding free energy (ΔG) and dissociation constant (Kd)
|
||||
|
||||
The 5.5 Å distance cutoff was optimized to capture various non-bonded interactions including salt bridges, hydrogen bonds, and hydrophobic contacts.
|
||||
|
||||
## Requirements
|
||||
|
||||
### Software Dependencies
|
||||
|
||||
- [Nextflow](https://www.nextflow.io/) (≥21.04.0)
|
||||
- [Docker](https://www.docker.com/) (≥20.10) or [Singularity](https://sylabs.io/singularity/) (≥3.0)
|
||||
|
||||
### Hardware Requirements
|
||||
|
||||
- CPU: 1+ cores per process
|
||||
- Memory: 4 GB minimum recommended
|
||||
- Storage: ~2 GB for Docker image
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Clone or Download the Pipeline
|
||||
|
||||
```bash
|
||||
# Create pipeline directory
|
||||
mkdir -p /path/to/prodigy_pipeline
|
||||
cd /path/to/prodigy_pipeline
|
||||
|
||||
# Copy pipeline files (Dockerfile, main.nf, nextflow.config, params.json)
|
||||
```
|
||||
|
||||
### 2. Build the Docker Image
|
||||
|
||||
```bash
|
||||
docker build -t prodigy:latest .
|
||||
```
|
||||
|
||||
### 3. Verify Installation
|
||||
|
||||
```bash
|
||||
# Test Docker image
|
||||
docker run --rm prodigy:latest prodigy --help
|
||||
|
||||
# Test Nextflow
|
||||
nextflow run main.nf --help
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Run on a single PDB file
|
||||
nextflow run main.nf --pdb /path/to/complex.pdb --outdir /path/to/output
|
||||
|
||||
# Run on multiple PDB files
|
||||
nextflow run main.nf --pdb '/path/to/structures/*.pdb' --outdir /path/to/output
|
||||
```
|
||||
|
||||
### With Custom Parameters
|
||||
|
||||
```bash
|
||||
nextflow run main.nf \
|
||||
--pdb '/path/to/structures/*.pdb' \
|
||||
--outdir /path/to/output \
|
||||
--distance_cutoff 5.5 \
|
||||
--acc_threshold 0.05 \
|
||||
--temperature 37.0 \
|
||||
--contact_list true \
|
||||
--pymol_selection true
|
||||
```
|
||||
|
||||
### Chain Selection for Complex Interfaces
|
||||
|
||||
For antibody-antigen complexes or multi-chain proteins:
|
||||
|
||||
```bash
|
||||
# Contacts between chains A and B only
|
||||
nextflow run main.nf --pdb complex.pdb --selection 'A B'
|
||||
|
||||
# Heavy (H) and Light (L) chains as one molecule vs Antigen (A)
|
||||
nextflow run main.nf --pdb antibody_antigen.pdb --selection 'H,L A'
|
||||
|
||||
# Three-way interface calculation
|
||||
nextflow run main.nf --pdb complex.pdb --selection 'A B C'
|
||||
```
|
||||
|
||||
### Using Singularity
|
||||
|
||||
```bash
|
||||
nextflow run main.nf -profile singularity --pdb /path/to/complex.pdb
|
||||
```
|
||||
|
||||
## Parameters
|
||||
|
||||
### Required Parameters
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `--pdb` | Path to input PDB/mmCIF file(s). Supports glob patterns. | `/mnt/OmicNAS/private/old/olamide/Prodigy/input/*.pdb` |
|
||||
| `--outdir` | Output directory for results | `/mnt/OmicNAS/private/old/olamide/Prodigy/output` |
|
||||
|
||||
### Analysis Parameters
|
||||
|
||||
| Parameter | Description | Default | Range |
|
||||
|-----------|-------------|---------|-------|
|
||||
| `--distance_cutoff` | Distance threshold (Å) for defining intermolecular contacts | `5.5` | 1.0 - 20.0 |
|
||||
| `--acc_threshold` | Relative accessibility threshold for surface residue identification | `0.05` | 0.0 - 1.0 |
|
||||
| `--temperature` | Temperature (°C) for Kd calculation | `25.0` | -273.15 - 100.0 |
|
||||
| `--selection` | Chain selection for interface calculation | `''` (all chains) | See examples |
|
||||
|
||||
### Output Control Parameters
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `--contact_list` | Generate detailed contact list file | `false` |
|
||||
| `--pymol_selection` | Generate PyMOL visualization script | `false` |
|
||||
| `--quiet` | Output only affinity values (minimal output) | `false` |
|
||||
|
||||
## Output Files
|
||||
|
||||
### Standard Output
|
||||
|
||||
For each input structure `<name>.pdb`, the pipeline generates:
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `<name>_prodigy.txt` | Main results file with binding affinity prediction |
|
||||
|
||||
### Optional Output (when enabled)
|
||||
|
||||
| File | Description | Parameter |
|
||||
|------|-------------|-----------|
|
||||
| `<name>_contacts.txt` | List of all interface contacts | `--contact_list true` |
|
||||
| `<name>_interface.pml` | PyMOL script for interface visualization | `--pymol_selection true` |
|
||||
|
||||
### Example Output
|
||||
|
||||
```
|
||||
[!] Structure contains gaps:
|
||||
E ILE16 < Fragment 0 > E ALA183
|
||||
E TYR184 < Fragment 1 > E GLY187
|
||||
|
||||
[+] Executing 1 task(s) in total
|
||||
##########################################
|
||||
[+] Processing structure 1ppe_model0
|
||||
[+] No. of intermolecular contacts: 86
|
||||
[+] No. of charged-charged contacts: 5.0
|
||||
[+] No. of charged-polar contacts: 10.0
|
||||
[+] No. of charged-apolar contacts: 27.0
|
||||
[+] No. of polar-polar contacts: 0.0
|
||||
[+] No. of apolar-polar contacts: 20.0
|
||||
[+] No. of apolar-apolar contacts: 24.0
|
||||
[+] Percentage of apolar NIS residues: 34.10
|
||||
[+] Percentage of charged NIS residues: 18.50
|
||||
[++] Predicted binding affinity (kcal.mol-1): -14.7
|
||||
[++] Predicted dissociation constant (M) at 25.0˚C: 1.6e-11
|
||||
```
|
||||
|
||||
### Output Interpretation
|
||||
|
||||
| Metric | Description |
|
||||
|--------|-------------|
|
||||
| **Intermolecular contacts** | Total number of residue-residue contacts at interface |
|
||||
| **Contact types** | Breakdown by residue character (charged/polar/apolar) |
|
||||
| **NIS residues** | Composition of non-interacting surface |
|
||||
| **Binding affinity (ΔG)** | Predicted free energy of binding (kcal/mol). More negative = stronger binding |
|
||||
| **Dissociation constant (Kd)** | Predicted Kd at specified temperature. Lower = tighter binding |
|
||||
|
||||
### Binding Affinity Scale
|
||||
|
||||
| ΔG (kcal/mol) | Kd (M) | Binding Strength |
|
||||
|---------------|--------|------------------|
|
||||
| -6 to -8 | 10⁻⁵ to 10⁻⁶ | Moderate |
|
||||
| -8 to -10 | 10⁻⁶ to 10⁻⁷ | Strong |
|
||||
| -10 to -12 | 10⁻⁷ to 10⁻⁹ | Very Strong |
|
||||
| < -12 | < 10⁻⁹ | Extremely Strong |
|
||||
|
||||
## Test Data
|
||||
|
||||
Download example protein complexes from the RCSB PDB:
|
||||
|
||||
```bash
|
||||
# Create input directory
|
||||
mkdir -p /mnt/OmicNAS/private/old/olamide/Prodigy/input
|
||||
|
||||
# Download test structures
|
||||
wget -O /mnt/OmicNAS/private/old/olamide/Prodigy/input/3bzd.pdb https://files.rcsb.org/download/3BZD.pdb
|
||||
wget -O /mnt/OmicNAS/private/old/olamide/Prodigy/input/2oob.pdb https://files.rcsb.org/download/2OOB.pdb
|
||||
wget -O /mnt/OmicNAS/private/old/olamide/Prodigy/input/1ppe.pdb https://files.rcsb.org/download/1PPE.pdb
|
||||
```
|
||||
|
||||
### Expected Results
|
||||
|
||||
| Structure | Description | Expected ΔG (kcal/mol) |
|
||||
|-----------|-------------|------------------------|
|
||||
| 3BZD | Protein-protein complex | -9.4 |
|
||||
| 2OOB | Protein-protein complex | -6.2 |
|
||||
| 1PPE | Trypsin-inhibitor complex | -14.7 |
|
||||
|
||||
## Pipeline Structure
|
||||
|
||||
```
|
||||
prodigy_pipeline/
|
||||
├── Dockerfile # Docker image definition
|
||||
├── main.nf # Nextflow pipeline script
|
||||
├── nextflow.config # Pipeline configuration
|
||||
├── params.json # Parameter documentation
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Docker Image Details
|
||||
|
||||
The Docker image is based on Python 3.12 and includes:
|
||||
|
||||
- **prodigy-prot** (v2.4.0) - Main PRODIGY package
|
||||
- **biopython** (≥1.80) - PDB structure parsing
|
||||
- **freesasa** (≥2.2.1) - Solvent accessible surface area calculation
|
||||
- **numpy** (≥2) - Numerical computations
|
||||
|
||||
### Building the Image
|
||||
|
||||
```bash
|
||||
docker build -t prodigy:latest .
|
||||
```
|
||||
|
||||
### Running Standalone
|
||||
|
||||
```bash
|
||||
# Run PRODIGY directly
|
||||
docker run --rm -v /path/to/data:/data prodigy:latest prodigy /data/complex.pdb
|
||||
|
||||
# Get help
|
||||
docker run --rm prodigy:latest prodigy --help
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**1. Docker Hub Rate Limit Error**
|
||||
```
|
||||
ERROR: toomanyrequests: You have reached your pull rate limit
|
||||
```
|
||||
Solution: Log in to Docker Hub with `docker login` or wait and retry.
|
||||
|
||||
**2. Structure Contains Gaps Warning**
|
||||
```
|
||||
[!] Structure contains gaps
|
||||
```
|
||||
This is informational, not an error. PRODIGY handles missing residues automatically.
|
||||
|
||||
**3. No Intermolecular Contacts Found**
|
||||
- Verify the structure contains multiple chains
|
||||
- Check chain selection parameters
|
||||
- Ensure chains are in contact (within distance cutoff)
|
||||
|
||||
**4. Permission Denied Errors**
|
||||
```bash
|
||||
# Run with user permissions
|
||||
docker run --rm -u $(id -u):$(id -g) -v /path/to/data:/data prodigy:latest prodigy /data/complex.pdb
|
||||
```
|
||||
|
||||
### Getting Help
|
||||
|
||||
```bash
|
||||
# PRODIGY help
|
||||
docker run --rm prodigy:latest prodigy --help
|
||||
|
||||
# Nextflow pipeline help
|
||||
nextflow run main.nf --help
|
||||
```
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this pipeline, please cite the following publications:
|
||||
|
||||
### PRODIGY Method
|
||||
|
||||
1. **Xue LC, Rodrigues JP, Kastritis PL, Bonvin AM, Vangone A.** (2016)
|
||||
PRODIGY: a web server for predicting the binding affinity of protein-protein complexes.
|
||||
*Bioinformatics*, 32(23):3676-3678.
|
||||
[DOI: 10.1093/bioinformatics/btw514](https://doi.org/10.1093/bioinformatics/btw514)
|
||||
|
||||
2. **Vangone A, Bonvin AM.** (2015)
|
||||
Contacts-based prediction of binding affinity in protein-protein complexes.
|
||||
*eLife*, 4:e07454.
|
||||
[DOI: 10.7554/eLife.07454](https://doi.org/10.7554/eLife.07454)
|
||||
|
||||
3. **Kastritis PL, Rodrigues JP, Folkers GE, Boelens R, Bonvin AM.** (2014)
|
||||
Proteins feel more than they see: Fine-tuning of binding affinity by properties of the non-interacting surface.
|
||||
*Journal of Molecular Biology*, 426(14):2632-2652.
|
||||
[DOI: 10.1016/j.jmb.2014.04.017](https://doi.org/10.1016/j.jmb.2014.04.017)
|
||||
|
||||
### Software Dependencies
|
||||
|
||||
- **Nextflow**: Di Tommaso P, et al. (2017) Nextflow enables reproducible computational workflows. *Nature Biotechnology*, 35:316-319.
|
||||
- **Biopython**: Cock PJ, et al. (2009) Biopython: freely available Python tools for computational molecular biology and bioinformatics. *Bioinformatics*, 25(11):1422-1423.
|
||||
- **FreeSASA**: Mitternacht S. (2016) FreeSASA: An open source C library for solvent accessible surface area calculations. *F1000Research*, 5:189.
|
||||
|
||||
## License
|
||||
|
||||
This pipeline is distributed under the Apache License 2.0, consistent with the PRODIGY software license.
|
||||
|
||||
## Links
|
||||
|
||||
- **PRODIGY Web Server**: [https://wenmr.science.uu.nl/prodigy/](https://wenmr.science.uu.nl/prodigy/)
|
||||
- **PRODIGY GitHub**: [https://github.com/haddocking/prodigy](https://github.com/haddocking/prodigy)
|
||||
- **BonvinLab**: [https://www.bonvinlab.org/](https://www.bonvinlab.org/)
|
||||
- **Nextflow**: [https://www.nextflow.io/](https://www.nextflow.io/)
|
||||
|
||||
## Support
|
||||
|
||||
For questions about:
|
||||
- **PRODIGY method**: Contact the BonvinLab team at [ask.bioexcel.eu](https://ask.bioexcel.eu/)
|
||||
- **This pipeline**: Open an issue in the repository
|
||||
|
||||
---
|
||||
|
||||
*Pipeline version: 2.4.0 | Last updated: January 2026*
|
||||
13
examples/3BZD.ic_model
Normal file
13
examples/3BZD.ic_model
Normal file
@@ -0,0 +1,13 @@
|
||||
[+] Reading structure file: /Users/joao/software/binding_affinity/examples/3BZD.pdb
|
||||
[+] Parsed structure file 3BZD (2 chains, 343 residues)
|
||||
[+] No. of intermolecular contacts: 51
|
||||
[+] No. of charged-charged contacts: 4
|
||||
[+] No. of charged-polar contacts: 7
|
||||
[+] No. of charged-apolar contacts: 6
|
||||
[+] No. of polar-polar contacts: 7
|
||||
[+] No. of apolar-polar contacts: 15
|
||||
[+] No. of apolar-apolar contacts: 12
|
||||
[+] Percentage of apolar NIS residues: 29.48
|
||||
[+] Percentage of charged NIS residues: 29.48
|
||||
[++] Predicted binding affinity (kcal.mol-1): -9.373
|
||||
[++] Predicted dissociation constant (M): 1.333e-07
|
||||
2754
examples/3BZD.pdb
Normal file
2754
examples/3BZD.pdb
Normal file
File diff suppressed because it is too large
Load Diff
4727
examples/3bzd.cif
Normal file
4727
examples/3bzd.cif
Normal file
File diff suppressed because it is too large
Load Diff
74
main.nf
Normal file
74
main.nf
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// Default parameters
|
||||
params.pdb = 's3://omic/eureka/prodigy/input/*.pdb'
|
||||
params.outdir = 's3://omic/eureka/prodigy/output'
|
||||
params.distance_cutoff = 5.5
|
||||
params.acc_threshold = 0.05
|
||||
params.temperature = 25.0
|
||||
params.selection = ''
|
||||
params.contact_list = false
|
||||
params.pymol_selection = false
|
||||
params.quiet = false
|
||||
|
||||
// =============================================================================
|
||||
// Process: PRODIGY
|
||||
// Predicts binding affinity using intermolecular contacts
|
||||
// =============================================================================
|
||||
|
||||
process PRODIGY {
|
||||
container 'harbor.cluster.omic.ai/omic/prodigy:latest'
|
||||
publishDir params.outdir, mode: 'copy'
|
||||
stageInMode 'copy'
|
||||
|
||||
input:
|
||||
path pdb
|
||||
|
||||
output:
|
||||
path "${pdb.baseName}_prodigy.txt", emit: results
|
||||
path "${pdb.baseName}_contacts.txt", optional: true, emit: contacts
|
||||
path "${pdb.baseName}_interface.pml", optional: true, emit: pymol
|
||||
|
||||
script:
|
||||
"""
|
||||
prodigy \\
|
||||
${pdb} \\
|
||||
--distance-cutoff ${params.distance_cutoff} \\
|
||||
--acc-threshold ${params.acc_threshold} \\
|
||||
--temperature ${params.temperature} \\
|
||||
${params.selection ? '--selection ' + params.selection : ''} \\
|
||||
${params.contact_list ? '--contact_list' : ''} \\
|
||||
${params.pymol_selection ? '--pymol_selection' : ''} \\
|
||||
${params.quiet ? '--quiet' : ''} \\
|
||||
2>&1 | tee ${pdb.baseName}_prodigy.txt
|
||||
|
||||
# Rename contact list file if generated
|
||||
if [ -f "${pdb.baseName}.contacts" ]; then
|
||||
mv ${pdb.baseName}.contacts ${pdb.baseName}_contacts.txt
|
||||
fi
|
||||
|
||||
# Rename PyMOL script if generated
|
||||
if [ -f "${pdb.baseName}.pml" ]; then
|
||||
mv ${pdb.baseName}.pml ${pdb.baseName}_interface.pml
|
||||
fi
|
||||
"""
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Workflow
|
||||
// =============================================================================
|
||||
|
||||
workflow {
|
||||
// Validate input
|
||||
if (!params.pdb) {
|
||||
error "ERROR: Please provide input PDB file(s) using --pdb parameter"
|
||||
}
|
||||
|
||||
// Create input channel
|
||||
pdb_ch = Channel.fromPath(params.pdb, checkIfExists: true)
|
||||
|
||||
// Run PRODIGY
|
||||
PRODIGY(pdb_ch)
|
||||
}
|
||||
71
nextflow.config
Normal file
71
nextflow.config
Normal file
@@ -0,0 +1,71 @@
|
||||
// =============================================================================
|
||||
// PRODIGY Nextflow Pipeline Configuration
|
||||
// Protein binding affinity prediction from structural data
|
||||
// =============================================================================
|
||||
|
||||
// Manifest for Nextflow metadata
|
||||
manifest {
|
||||
name = 'PRODIGY-Nextflow'
|
||||
author = 'Olamide'
|
||||
homePage = 'https://trs-gitea.cluster.omic.ai/omic/prodigy'
|
||||
description = 'Nextflow pipeline for PRODIGY - Protein binding affinity prediction based on intermolecular contacts'
|
||||
mainScript = 'main.nf'
|
||||
version = '2.4.0'
|
||||
}
|
||||
|
||||
// Global default parameters
|
||||
params {
|
||||
pdb = 's3://omic/eureka/prodigy/input/*.pdb'
|
||||
outdir = 's3://omic/eureka/prodigy/output'
|
||||
distance_cutoff = 5.5
|
||||
acc_threshold = 0.05
|
||||
temperature = 25.0
|
||||
selection = ''
|
||||
contact_list = false
|
||||
pymol_selection = false
|
||||
quiet = false
|
||||
}
|
||||
|
||||
// Container configurations
|
||||
docker {
|
||||
enabled = true
|
||||
runOptions = '-u $(id -u):$(id -g)'
|
||||
}
|
||||
|
||||
// Process configurations
|
||||
process {
|
||||
cpus = 1
|
||||
memory = '4 GB'
|
||||
container = 'harbor.cluster.omic.ai/omic/prodigy:latest'
|
||||
}
|
||||
|
||||
// Execution configurations
|
||||
executor {
|
||||
$local {
|
||||
cpus = 4
|
||||
memory = '8 GB'
|
||||
}
|
||||
}
|
||||
|
||||
// Profiles for different execution environments
|
||||
profiles {
|
||||
standard {
|
||||
docker.enabled = true
|
||||
}
|
||||
|
||||
k8s {
|
||||
docker.enabled = true
|
||||
process.container = 'harbor.cluster.omic.ai/omic/prodigy:latest'
|
||||
}
|
||||
|
||||
k8s_gpu {
|
||||
docker.enabled = true
|
||||
process.container = 'harbor.cluster.omic.ai/omic/prodigy:latest'
|
||||
}
|
||||
|
||||
singularity {
|
||||
singularity.enabled = true
|
||||
singularity.autoMounts = true
|
||||
docker.enabled = false
|
||||
}
|
||||
}
|
||||
157
params.json
Normal file
157
params.json
Normal file
@@ -0,0 +1,157 @@
|
||||
{
|
||||
"params": {
|
||||
"pdb": {
|
||||
"type": "file",
|
||||
"description": "Path to input PDB or mmCIF structure file(s) for binding affinity prediction",
|
||||
"default": "s3://omic/eureka/prodigy/input/*.pdb",
|
||||
"required": true,
|
||||
"pipeline_io": "input",
|
||||
"var_name": "params.pdb",
|
||||
"examples": [
|
||||
"s3://omic/eureka/prodigy/input/3bzd.pdb",
|
||||
"s3://omic/eureka/prodigy/input/*.pdb"
|
||||
],
|
||||
"pattern": ".*\\.(pdb|cif)$",
|
||||
"enum": [],
|
||||
"validation": {},
|
||||
"notes": "Input protein-protein complex structure in PDB or mmCIF format. Can be a single file or glob pattern for batch processing."
|
||||
},
|
||||
"outdir": {
|
||||
"type": "folder",
|
||||
"description": "Directory for PRODIGY prediction results",
|
||||
"default": "s3://omic/eureka/prodigy/output",
|
||||
"required": true,
|
||||
"pipeline_io": "output",
|
||||
"var_name": "params.outdir",
|
||||
"examples": [
|
||||
"s3://omic/eureka/prodigy/output",
|
||||
"s3://omic/eureka/prodigy/custom_output"
|
||||
],
|
||||
"pattern": ".*",
|
||||
"enum": [],
|
||||
"validation": {},
|
||||
"notes": "Directory where prediction results will be stored. Created if it does not exist."
|
||||
},
|
||||
"distance_cutoff": {
|
||||
"type": "float",
|
||||
"description": "Distance cutoff (Angstrom) for calculating intermolecular contacts",
|
||||
"default": 5.5,
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.distance_cutoff",
|
||||
"examples": [
|
||||
5.5,
|
||||
4.0,
|
||||
6.0
|
||||
],
|
||||
"pattern": null,
|
||||
"enum": [],
|
||||
"validation": {
|
||||
"min": 1.0,
|
||||
"max": 20.0
|
||||
},
|
||||
"notes": "Default value of 5.5 Angstrom was optimized in Vangone & Bonvin (2015) eLife. This threshold includes different non-bonded interactions including salt bridges."
|
||||
},
|
||||
"acc_threshold": {
|
||||
"type": "float",
|
||||
"description": "Accessibility threshold for buried surface area (BSA) analysis",
|
||||
"default": 0.05,
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.acc_threshold",
|
||||
"examples": [
|
||||
0.05,
|
||||
0.1
|
||||
],
|
||||
"pattern": null,
|
||||
"enum": [],
|
||||
"validation": {
|
||||
"min": 0.0,
|
||||
"max": 1.0
|
||||
},
|
||||
"notes": "Relative accessibility threshold used to identify surface residues for non-interacting surface (NIS) calculations."
|
||||
},
|
||||
"temperature": {
|
||||
"type": "float",
|
||||
"description": "Temperature (Celsius) for dissociation constant (Kd) prediction",
|
||||
"default": 25.0,
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.temperature",
|
||||
"examples": [
|
||||
25.0,
|
||||
37.0,
|
||||
4.0
|
||||
],
|
||||
"pattern": null,
|
||||
"enum": [],
|
||||
"validation": {
|
||||
"min": -273.15,
|
||||
"max": 100.0
|
||||
},
|
||||
"notes": "Temperature used to convert predicted binding free energy (deltaG) to dissociation constant (Kd)."
|
||||
},
|
||||
"selection": {
|
||||
"type": "string",
|
||||
"description": "Chain selection for interface calculation",
|
||||
"default": "",
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.selection",
|
||||
"examples": [
|
||||
"A B",
|
||||
"A,B C",
|
||||
"H,L A"
|
||||
],
|
||||
"pattern": null,
|
||||
"enum": [],
|
||||
"validation": {},
|
||||
"notes": "Specify chains to consider for binding affinity calculation. Format: 'A B' calculates contacts between chains A and B. 'A,B C' treats chains A and B as one molecule interacting with chain C. Useful for antibody-antigen complexes where heavy and light chains should be grouped."
|
||||
},
|
||||
"contact_list": {
|
||||
"type": "boolean",
|
||||
"description": "Output list of intermolecular contacts",
|
||||
"default": false,
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.contact_list",
|
||||
"examples": [
|
||||
true,
|
||||
false
|
||||
],
|
||||
"enum": [true, false],
|
||||
"validation": {},
|
||||
"notes": "When enabled, outputs a detailed list of all residue-residue contacts at the interface."
|
||||
},
|
||||
"pymol_selection": {
|
||||
"type": "boolean",
|
||||
"description": "Output PyMOL script to visualize interface",
|
||||
"default": false,
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.pymol_selection",
|
||||
"examples": [
|
||||
true,
|
||||
false
|
||||
],
|
||||
"enum": [true, false],
|
||||
"validation": {},
|
||||
"notes": "When enabled, generates a PyMOL script (.pml) to highlight interface residues for visualization."
|
||||
},
|
||||
"quiet": {
|
||||
"type": "boolean",
|
||||
"description": "Output only predicted affinity values",
|
||||
"default": false,
|
||||
"required": false,
|
||||
"pipeline_io": "parameter",
|
||||
"var_name": "params.quiet",
|
||||
"examples": [
|
||||
true,
|
||||
false
|
||||
],
|
||||
"enum": [true, false],
|
||||
"validation": {},
|
||||
"notes": "When enabled, outputs only the predicted binding affinity value without detailed analysis. Useful for batch processing and downstream parsing."
|
||||
}
|
||||
}
|
||||
}
|
||||
45
pyproject.toml
Normal file
45
pyproject.toml
Normal file
@@ -0,0 +1,45 @@
|
||||
[project]
|
||||
name = "prodigy-prot"
|
||||
license = "Apache-2.0"
|
||||
version = "2.4.0"
|
||||
description = "PROtein binDIng enerGY prediction"
|
||||
authors = [
|
||||
{ name = "Anna Vangone" },
|
||||
{ name = "Joao Rodrigues" },
|
||||
{ name = "Joerg Schaarschmidt" },
|
||||
]
|
||||
maintainers = [{ name = "BonvinLab", email = "bonvinlab.support@uu.nl" }]
|
||||
readme = "README.md"
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Topic :: Scientific/Engineering :: Chemistry",
|
||||
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
||||
]
|
||||
|
||||
dependencies = ["biopython>=1.80", "freesasa>=2.2.1", "numpy>=2"]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest", "coverage", "hypothesis", "pytest-cov", "mypy"]
|
||||
|
||||
[project.scripts]
|
||||
prodigy = "prodigy_prot.cli:main"
|
||||
|
||||
[tool.setuptools]
|
||||
include-package-data = true
|
||||
packages = ["src"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = ["src"]
|
||||
markers = ["integration: marks tests as integration tests"]
|
||||
|
||||
[tool.mypy]
|
||||
disable_error_code = ["import-not-found"]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
3
src/prodigy_prot/__init__.py
Normal file
3
src/prodigy_prot/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from pathlib import Path
|
||||
|
||||
NACCESS_CONFIG = Path(Path(__file__).parents[0], "data/naccess.config")
|
||||
199
src/prodigy_prot/cli.py
Normal file
199
src/prodigy_prot/cli.py
Normal file
@@ -0,0 +1,199 @@
|
||||
"""
|
||||
Binding affinity predictor based on Intermolecular Contacts (ICs).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from argparse import RawTextHelpFormatter
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
|
||||
from Bio.PDB.Model import Model
|
||||
|
||||
from prodigy_prot.modules.parsers import parse_structure
|
||||
from prodigy_prot.modules.prodigy import Prodigy
|
||||
|
||||
# setup logging
|
||||
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
|
||||
log = logging.getLogger("Prodigy")
|
||||
|
||||
|
||||
ap = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
|
||||
ap.add_argument(
|
||||
"input_path",
|
||||
help="Path to either: \n- Structure in PDB or mmCIF format\n- Directory containing structure files",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--distance-cutoff",
|
||||
type=float,
|
||||
default=5.5,
|
||||
help="Distance cutoff to calculate ICs",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--acc-threshold",
|
||||
type=float,
|
||||
default=0.05,
|
||||
help="Accessibility threshold for BSA analysis",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--temperature",
|
||||
type=float,
|
||||
default=25.0,
|
||||
help="Temperature (C) for Kd prediction",
|
||||
)
|
||||
ap.add_argument("--contact_list", action="store_true", help="Output a list of contacts")
|
||||
ap.add_argument(
|
||||
"--pymol_selection",
|
||||
action="store_true",
|
||||
help="Output a script to highlight the interface (pymol)",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
action="store_true",
|
||||
help="Outputs only the predicted affinity value",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--showall",
|
||||
action="store_true",
|
||||
help="Outputs all original prodigy features but BSA (mutually exclusive with `-q`)",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-np",
|
||||
"--number-of-processors",
|
||||
type=int,
|
||||
action="store",
|
||||
help="Number of processors to use (default: 1)",
|
||||
default=1,
|
||||
)
|
||||
_co_help = """
|
||||
By default, all intermolecular contacts are taken into consideration,
|
||||
a molecule being defined as an isolated group of amino acids sharing
|
||||
a common chain identifier. In specific cases, for example
|
||||
antibody-antigen complexes, some chains should be considered as a
|
||||
single molecule.
|
||||
|
||||
Use the --selection option to provide collections of chains that should
|
||||
be considered for the calculation. Separate by a space the chains that
|
||||
are to be considered _different_ molecules. Use commas to include multiple
|
||||
chains as part of a single group:
|
||||
|
||||
--selection A B => Contacts calculated (only) between chains A and B.
|
||||
--selection A,B C => Contacts calculated (only) between \
|
||||
chains A and C; and B and C.
|
||||
--selection A B C => Contacts calculated (only) between \
|
||||
chains A and B; B and C; and A and C.
|
||||
"""
|
||||
sel_opt = ap.add_argument_group("Selection Options", description=_co_help)
|
||||
sel_opt.add_argument("--selection", nargs="+", metavar=("A B", "A,B C"))
|
||||
|
||||
|
||||
def main():
|
||||
args = ap.parse_args()
|
||||
log.setLevel(logging.ERROR if args.quiet else logging.INFO)
|
||||
|
||||
if args.quiet and args.showall:
|
||||
log.error("Error: --quiet (-q) and --showall (-s) are mutually exclusive arguments")
|
||||
sys.exit(1)
|
||||
log.setLevel(logging.ERROR if args.quiet else logging.INFO)
|
||||
|
||||
struct_path = Path(args.input_path)
|
||||
|
||||
input_list = []
|
||||
if struct_path.is_file():
|
||||
input_list.append(struct_path)
|
||||
|
||||
elif struct_path.is_dir():
|
||||
for input_f in struct_path.glob("*"):
|
||||
if Path(input_f).suffix in [".pdb", ".cif", ".ent"]:
|
||||
input_list.append(input_f)
|
||||
|
||||
elif not struct_path.exists():
|
||||
log.error(f"File {struct_path} does not exist")
|
||||
sys.exit(1)
|
||||
|
||||
else:
|
||||
log.error(f"Input path {struct_path} is neither a valid file nor a directory")
|
||||
sys.exit(1)
|
||||
|
||||
# Collect all tasks
|
||||
tasks = []
|
||||
for input_f in input_list:
|
||||
models, _, _ = parse_structure(str(input_f))
|
||||
struct_path = Path(input_f)
|
||||
|
||||
for model in models:
|
||||
identifier = f"{struct_path.stem}_model{model.id}"
|
||||
tasks.append((model, identifier, args, struct_path))
|
||||
|
||||
# Execute in parallel
|
||||
total_tasks = len(tasks)
|
||||
if total_tasks == 0:
|
||||
log.error("No valid structures found")
|
||||
sys.exit(1)
|
||||
max_workers = min(args.number_of_processors, total_tasks)
|
||||
log.info(f"[+] Executing {total_tasks} task(s) in total")
|
||||
if max_workers != args.number_of_processors:
|
||||
log.info("[+] Adjusting number of processors based on number of tasks")
|
||||
log.info(
|
||||
f"[+] Using {max_workers} processor(s) instead of {args.number_of_processors}"
|
||||
)
|
||||
|
||||
# Execute and collect results
|
||||
results = []
|
||||
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = [executor.submit(process_model, *task) for task in tasks]
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
log.error(f"Error processing model: {e}")
|
||||
|
||||
# Sort by identifier, then model.id
|
||||
results.sort(key=lambda x: (x[0], x[1]))
|
||||
# Print all outputs sequentially
|
||||
for identifier, _, output in results:
|
||||
print(output, end="")
|
||||
|
||||
|
||||
def process_model(model: Model, identifier: str, args: argparse.Namespace, struct_path):
|
||||
"""Process a single model"""
|
||||
# Capture stdout
|
||||
output_buffer = StringIO()
|
||||
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = output_buffer
|
||||
try:
|
||||
if not args.quiet:
|
||||
print("#" * 42)
|
||||
print(f"[+] Processing structure {identifier}")
|
||||
prodigy = Prodigy(
|
||||
model=model,
|
||||
name=identifier,
|
||||
selection=args.selection,
|
||||
temp=args.temperature,
|
||||
)
|
||||
prodigy.predict(
|
||||
distance_cutoff=args.distance_cutoff, acc_threshold=args.acc_threshold
|
||||
)
|
||||
prodigy.print_prediction(quiet=args.quiet, showall=args.showall)
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
|
||||
if args.contact_list:
|
||||
contact_list_f = struct_path.with_suffix(".ic")
|
||||
prodigy.print_contacts(outfile=str(contact_list_f))
|
||||
|
||||
if args.pymol_selection:
|
||||
pymol_script_f = struct_path.with_suffix(".pml")
|
||||
prodigy.print_pymol_script(outfile=str(pymol_script_f))
|
||||
|
||||
return identifier, model.id, output_buffer.getvalue()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
256
src/prodigy_prot/data/naccess.config
Normal file
256
src/prodigy_prot/data/naccess.config
Normal file
@@ -0,0 +1,256 @@
|
||||
# Contributed by João Rodrigues
|
||||
|
||||
name: NACCESS
|
||||
|
||||
types:
|
||||
C_ALI 1.87 apolar
|
||||
C_CAR 1.76 apolar
|
||||
C_NUC 1.80 apolar
|
||||
N_AMN 1.50 polar
|
||||
N_AMD 1.65 polar
|
||||
N_NUC 1.60 polar
|
||||
O 1.40 polar
|
||||
S 1.85 apolar
|
||||
SE 1.80 apolar
|
||||
P 1.90 apolar
|
||||
|
||||
atoms:
|
||||
ANY C C_CAR
|
||||
ANY O O
|
||||
ANY CA C_ALI
|
||||
ANY N N_AMD
|
||||
ANY CB C_ALI
|
||||
ANY OXT O
|
||||
# nucleic acid
|
||||
ANY P P
|
||||
ANY OP1 O
|
||||
ANY OP2 O
|
||||
ANY OP3 O
|
||||
ANY O5' O
|
||||
ANY O4' O
|
||||
ANY O3' O
|
||||
ANY O2' O
|
||||
ANY C5' C_NUC
|
||||
ANY C4' C_NUC
|
||||
ANY C3' C_NUC
|
||||
ANY C2' C_NUC
|
||||
ANY C1' C_NUC
|
||||
|
||||
ALA CB C_ALI # included so that RSA values will be generated
|
||||
|
||||
ARG CG C_ALI
|
||||
ARG CD C_ALI
|
||||
ARG NE N_AMD
|
||||
ARG CZ C_CAR
|
||||
ARG NH1 N_AMD
|
||||
ARG NH2 N_AMD
|
||||
|
||||
ASN CG C_CAR
|
||||
ASN OD1 O
|
||||
ASN ND2 N_AMD
|
||||
|
||||
ASP CG C_CAR
|
||||
ASP OD1 O
|
||||
ASP OD2 O
|
||||
|
||||
CYS SG S
|
||||
|
||||
GLN CG C_ALI
|
||||
GLN CD C_CAR
|
||||
GLN OE1 O
|
||||
GLN NE2 N_AMD
|
||||
|
||||
GLU CG C_ALI
|
||||
GLU CD C_CAR
|
||||
GLU OE1 O
|
||||
GLU OE2 O
|
||||
|
||||
GLY CA C_ALI # included so that RSA values will be generated
|
||||
|
||||
HIS CG C_CAR
|
||||
HIS ND1 N_AMD
|
||||
HIS CD2 C_CAR
|
||||
HIS NE2 N_AMD
|
||||
HIS CE1 C_CAR
|
||||
|
||||
ILE CG1 C_ALI
|
||||
ILE CG2 C_ALI
|
||||
ILE CD1 C_ALI
|
||||
|
||||
LEU CG C_ALI
|
||||
LEU CD1 C_ALI
|
||||
LEU CD2 C_ALI
|
||||
|
||||
LYS CG C_ALI
|
||||
LYS CD C_ALI
|
||||
LYS CE C_ALI
|
||||
LYS NZ N_AMN
|
||||
|
||||
MET CG C_ALI
|
||||
MET SD S
|
||||
MET CE C_ALI
|
||||
|
||||
PHE CG C_CAR
|
||||
PHE CD1 C_CAR
|
||||
PHE CD2 C_CAR
|
||||
PHE CE1 C_CAR
|
||||
PHE CE2 C_CAR
|
||||
PHE CZ C_CAR
|
||||
|
||||
PRO CG C_ALI
|
||||
PRO CD C_ALI
|
||||
|
||||
SEC SE SE
|
||||
|
||||
SER OG O
|
||||
|
||||
THR OG1 O
|
||||
THR CG2 C_ALI
|
||||
|
||||
TRP CG C_CAR
|
||||
TRP CD1 C_CAR
|
||||
TRP CD2 C_CAR
|
||||
TRP NE1 N_AMD
|
||||
TRP CE2 C_CAR
|
||||
TRP CE3 C_CAR
|
||||
TRP CZ2 C_CAR
|
||||
TRP CZ3 C_CAR
|
||||
TRP CH2 C_CAR
|
||||
|
||||
TYR CG C_CAR
|
||||
TYR CD1 C_CAR
|
||||
TYR CD2 C_CAR
|
||||
TYR CE1 C_CAR
|
||||
TYR CE2 C_CAR
|
||||
TYR CZ C_CAR
|
||||
TYR OH O
|
||||
|
||||
VAL CG1 C_ALI
|
||||
VAL CG2 C_ALI
|
||||
|
||||
A N9 N_NUC
|
||||
A C8 C_NUC
|
||||
A N7 N_NUC
|
||||
A C5 C_NUC
|
||||
A C6 C_NUC
|
||||
A N6 N_NUC
|
||||
A N1 N_NUC
|
||||
A C2 C_NUC
|
||||
A N3 N_NUC
|
||||
A C4 C_NUC
|
||||
|
||||
C N1 N_NUC
|
||||
C C2 C_NUC
|
||||
C O2 O
|
||||
C N3 N_NUC
|
||||
C C4 C_NUC
|
||||
C N4 N_NUC
|
||||
C C5 C_NUC
|
||||
C C6 C_NUC
|
||||
|
||||
G N9 N_NUC
|
||||
G C8 C_NUC
|
||||
G N7 N_NUC
|
||||
G C5 C_NUC
|
||||
G C6 C_NUC
|
||||
G O6 O
|
||||
G N1 N_NUC
|
||||
G C2 C_NUC
|
||||
G N2 N_NUC
|
||||
G N3 N_NUC
|
||||
G C4 C_NUC
|
||||
|
||||
I N9 N_NUC
|
||||
I C8 C_NUC
|
||||
I N7 N_NUC
|
||||
I C5 C_NUC
|
||||
I C6 C_NUC
|
||||
I O6 O
|
||||
I N1 N_NUC
|
||||
I C2 C_NUC
|
||||
I N3 N_NUC
|
||||
I C4 C_NUC
|
||||
|
||||
T N1 N_NUC
|
||||
T C2 C_NUC
|
||||
T O2 O
|
||||
T N3 N_NUC
|
||||
T C4 C_NUC
|
||||
T O4 O
|
||||
T C5 C_NUC
|
||||
T C7 C_NUC
|
||||
T C6 C_NUC
|
||||
|
||||
U N1 N_NUC
|
||||
U C2 C_NUC
|
||||
U O2 O
|
||||
U N3 N_NUC
|
||||
U C4 C_NUC
|
||||
U O4 O
|
||||
U C5 C_NUC
|
||||
U C6 C_NUC
|
||||
|
||||
DA N9 N_NUC
|
||||
DA C8 C_NUC
|
||||
DA N7 N_NUC
|
||||
DA C5 C_NUC
|
||||
DA C6 C_NUC
|
||||
DA N6 N_NUC
|
||||
DA N1 N_NUC
|
||||
DA C2 C_NUC
|
||||
DA N3 N_NUC
|
||||
DA C4 C_NUC
|
||||
|
||||
DC N1 N_NUC
|
||||
DC C2 C_NUC
|
||||
DC O2 O
|
||||
DC N3 N_NUC
|
||||
DC C4 C_NUC
|
||||
DC N4 N_NUC
|
||||
DC C5 C_NUC
|
||||
DC C6 C_NUC
|
||||
|
||||
DG N9 N_NUC
|
||||
DG C8 C_NUC
|
||||
DG N7 N_NUC
|
||||
DG C5 C_NUC
|
||||
DG C6 C_NUC
|
||||
DG O6 O
|
||||
DG N1 N_NUC
|
||||
DG C2 C_NUC
|
||||
DG N2 N_NUC
|
||||
DG N3 N_NUC
|
||||
DG C4 C_NUC
|
||||
|
||||
DI N9 N_NUC
|
||||
DI C8 C_NUC
|
||||
DI N7 N_NUC
|
||||
DI C5 C_NUC
|
||||
DI C6 C_NUC
|
||||
DI O6 O
|
||||
DI N1 N_NUC
|
||||
DI C2 C_NUC
|
||||
DI N3 N_NUC
|
||||
DI C4 C_NUC
|
||||
|
||||
DT N1 N_NUC
|
||||
DT C2 C_NUC
|
||||
DT O2 O
|
||||
DT N3 N_NUC
|
||||
DT C4 C_NUC
|
||||
DT O4 O
|
||||
DT C5 C_NUC
|
||||
DT C7 C_NUC
|
||||
DT C6 C_NUC
|
||||
|
||||
DU N1 N_NUC
|
||||
DU C2 C_NUC
|
||||
DU O2 O
|
||||
DU N3 N_NUC
|
||||
DU C4 C_NUC
|
||||
DU O4 O
|
||||
DU C5 C_NUC
|
||||
DU C6 C_NUC
|
||||
|
||||
|
||||
|
||||
0
src/prodigy_prot/modules/__init__.py
Normal file
0
src/prodigy_prot/modules/__init__.py
Normal file
148
src/prodigy_prot/modules/aa_properties.py
Normal file
148
src/prodigy_prot/modules/aa_properties.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Generic properties of amino acids required for the binding affinity
|
||||
prediction methods.
|
||||
"""
|
||||
|
||||
aa_character_ic: dict[str, str] = {
|
||||
"ALA": "A",
|
||||
"CYS": "A", # ?
|
||||
"GLU": "C",
|
||||
"ASP": "C",
|
||||
"GLY": "A",
|
||||
"PHE": "A",
|
||||
"ILE": "A",
|
||||
"HIS": "C",
|
||||
"LYS": "C",
|
||||
"MET": "A",
|
||||
"LEU": "A",
|
||||
"ASN": "P",
|
||||
"GLN": "P",
|
||||
"PRO": "A",
|
||||
"SER": "P",
|
||||
"ARG": "C",
|
||||
"THR": "P",
|
||||
"TRP": "A",
|
||||
"VAL": "A",
|
||||
"TYR": "A",
|
||||
}
|
||||
|
||||
aa_character_protorp: dict[str, str] = {
|
||||
"ALA": "A",
|
||||
"CYS": "P",
|
||||
"GLU": "C",
|
||||
"ASP": "C",
|
||||
"GLY": "A",
|
||||
"PHE": "A",
|
||||
"ILE": "A",
|
||||
"HIS": "P",
|
||||
"LYS": "C",
|
||||
"MET": "A",
|
||||
"LEU": "A",
|
||||
"ASN": "P",
|
||||
"GLN": "P",
|
||||
"PRO": "A",
|
||||
"SER": "P",
|
||||
"ARG": "C",
|
||||
"THR": "P",
|
||||
"TRP": "P",
|
||||
"VAL": "A",
|
||||
"TYR": "P",
|
||||
}
|
||||
|
||||
# Taken from pre-original prodigy code
|
||||
# B for hydrophoBic
|
||||
# Y for hydrophiLic
|
||||
aa_character_hydro: dict[str, str] = {
|
||||
"ALA": "B", #+
|
||||
"CYS": "B", #+
|
||||
"GLU": "L", #+
|
||||
"ASP": "L", #+
|
||||
"GLY": "L", # Glycine was B in my initial classification
|
||||
"PHE": "B", #+
|
||||
"ILE": "B", #+
|
||||
"HIS": "L", #+
|
||||
"LYS": "L", #+
|
||||
"MET": "B", #+
|
||||
"LEU": "B", #+
|
||||
"ASN": "L", #+
|
||||
"GLN": "L", #+
|
||||
"PRO": "L", # Proline was B my initial classification
|
||||
"SER": "L", #+
|
||||
"ARG": "L", #+
|
||||
"THR": "L", #+
|
||||
"TRP": "L", #+
|
||||
"VAL": "B", #+
|
||||
"TYR": "L", #+
|
||||
}
|
||||
|
||||
# Scaling factors for relative ASA
|
||||
# Calculated using extended ALA-X-ALA peptides
|
||||
# Taken from NACCESS
|
||||
rel_asa: dict[str, dict[str, float]] = {
|
||||
"total": {
|
||||
"ALA": 107.95,
|
||||
"CYS": 134.28,
|
||||
"ASP": 140.39,
|
||||
"GLU": 172.25,
|
||||
"PHE": 199.48,
|
||||
"GLY": 80.10,
|
||||
"HIS": 182.88,
|
||||
"ILE": 175.12,
|
||||
"LYS": 200.81,
|
||||
"LEU": 178.63,
|
||||
"MET": 194.15,
|
||||
"ASN": 143.94,
|
||||
"PRO": 136.13,
|
||||
"GLN": 178.50,
|
||||
"ARG": 238.76,
|
||||
"SER": 116.50,
|
||||
"THR": 139.27,
|
||||
"VAL": 151.44,
|
||||
"TRP": 249.36,
|
||||
"TYR": 212.76,
|
||||
},
|
||||
"bb": {
|
||||
"ALA": 38.54,
|
||||
"CYS": 37.53,
|
||||
"ASP": 37.70,
|
||||
"GLU": 37.51,
|
||||
"PHE": 35.37,
|
||||
"GLY": 47.77,
|
||||
"HIS": 35.80,
|
||||
"ILE": 37.16,
|
||||
"LYS": 37.51,
|
||||
"LEU": 37.51,
|
||||
"MET": 37.51,
|
||||
"ASN": 37.70,
|
||||
"PRO": 16.23,
|
||||
"GLN": 37.51,
|
||||
"ARG": 37.51,
|
||||
"SER": 38.40,
|
||||
"THR": 37.57,
|
||||
"VAL": 37.16,
|
||||
"TRP": 38.10,
|
||||
"TYR": 35.38,
|
||||
},
|
||||
"sc": {
|
||||
"ALA": 69.41,
|
||||
"CYS": 96.75,
|
||||
"ASP": 102.69,
|
||||
"GLU": 134.74,
|
||||
"PHE": 164.11,
|
||||
"GLY": 32.33,
|
||||
"HIS": 147.08,
|
||||
"ILE": 137.96,
|
||||
"LYS": 163.30,
|
||||
"LEU": 141.12,
|
||||
"MET": 156.64,
|
||||
"ASN": 106.24,
|
||||
"PRO": 119.90,
|
||||
"GLN": 140.99,
|
||||
"ARG": 201.25,
|
||||
"SER": 78.11,
|
||||
"THR": 101.70,
|
||||
"VAL": 114.28,
|
||||
"TRP": 211.26,
|
||||
"TYR": 177.38,
|
||||
},
|
||||
}
|
||||
71
src/prodigy_prot/modules/freesasa_tools.py
Normal file
71
src/prodigy_prot/modules/freesasa_tools.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Functions to execute freesasa and parse its output.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import freesasa
|
||||
from Bio.PDB.Model import Model
|
||||
from Bio.PDB.Structure import Structure
|
||||
from freesasa import Classifier, calc, structureFromBioPDB
|
||||
|
||||
from prodigy_prot import NACCESS_CONFIG
|
||||
from prodigy_prot.modules.aa_properties import rel_asa
|
||||
|
||||
freesasa.setVerbosity(freesasa.nowarnings)
|
||||
|
||||
|
||||
def execute_freesasa_api(model: Model) -> tuple[dict, dict]:
|
||||
"""
|
||||
Calls freesasa using its Python API and returns
|
||||
per-residue accessibilities.
|
||||
"""
|
||||
|
||||
asa_data = {}
|
||||
rsa_data: dict[tuple[str, int, str], float] = {}
|
||||
_rsa: dict = rel_asa["total"]
|
||||
|
||||
classifier = Classifier(str(NACCESS_CONFIG))
|
||||
|
||||
# NOTE: `structureFromBioPDB` requires a Structure object
|
||||
# so here build one from a model
|
||||
s = Structure(model.id)
|
||||
s.add(model)
|
||||
|
||||
try:
|
||||
struct = structureFromBioPDB(
|
||||
s,
|
||||
classifier,
|
||||
)
|
||||
result = calc(struct)
|
||||
except AssertionError as e:
|
||||
error_message = "" + os.linesep
|
||||
error_message += "[!] Error when running freesasa:" + os.linesep
|
||||
error_message += f"[!] {e}" + os.linesep
|
||||
error_message += (
|
||||
"[!] Make sure the atom names in your PDB file match"
|
||||
" the canonical naming and belong "
|
||||
"to default residues" + os.linesep
|
||||
)
|
||||
print(error_message)
|
||||
raise Exception(error_message)
|
||||
|
||||
# iterate over all atoms to get SASA and residue name
|
||||
for idx in range(struct.nAtoms()):
|
||||
atname = struct.atomName(idx)
|
||||
resname = struct.residueName(idx)
|
||||
resid = struct.residueNumber(idx)
|
||||
chain = struct.chainLabel(idx)
|
||||
at_uid = (chain, resname, resid, atname)
|
||||
res_uid = (chain, resname, resid)
|
||||
|
||||
asa = result.atomArea(idx)
|
||||
asa_data[at_uid] = asa
|
||||
# add asa to residue
|
||||
rsa_data[res_uid] = rsa_data.get(res_uid, 0) + asa
|
||||
|
||||
# convert total asa ro relative asa
|
||||
rsa_data.update(
|
||||
(res_uid, asa / _rsa[res_uid[1]]) for res_uid, asa in rsa_data.items()
|
||||
)
|
||||
return asa_data, rsa_data
|
||||
41
src/prodigy_prot/modules/models.py
Normal file
41
src/prodigy_prot/modules/models.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""
|
||||
Models to predict binding affinity based on molecular properties.
|
||||
"""
|
||||
|
||||
|
||||
def IC_NIS(
|
||||
ic_cc: float,
|
||||
ic_ca: float,
|
||||
ic_pp: float,
|
||||
ic_pa: float,
|
||||
p_nis_a: float,
|
||||
p_nis_c: float,
|
||||
) -> float:
|
||||
"""
|
||||
Calculates the predicted binding affinity value
|
||||
based on the IC-NIS model.
|
||||
"""
|
||||
|
||||
return (
|
||||
-0.09459 * ic_cc
|
||||
+ -0.10007 * ic_ca
|
||||
+ 0.19577 * ic_pp
|
||||
+ -0.22671 * ic_pa
|
||||
+ 0.18681 * p_nis_a
|
||||
+ 0.13810 * p_nis_c
|
||||
+ -15.9433
|
||||
)
|
||||
|
||||
|
||||
def NIS(p_nis_c: float, p_nis_p: float, n_int_atoms: float) -> float:
|
||||
"""
|
||||
Calculates the predicted binding affinity value
|
||||
based on the NIS model.
|
||||
"""
|
||||
|
||||
return (
|
||||
0.0856851248873 * p_nis_p
|
||||
+ -0.0685254498746 * p_nis_c
|
||||
+ 0.0261591389985 * n_int_atoms
|
||||
+ 3.0124939659498
|
||||
)
|
||||
187
src/prodigy_prot/modules/parsers.py
Normal file
187
src/prodigy_prot/modules/parsers.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
Functions to read PDB/mmCIF files
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import typing
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
from Bio.PDB.Atom import DisorderedAtom
|
||||
from Bio.PDB.Chain import Chain
|
||||
from Bio.PDB.MMCIFParser import MMCIFParser
|
||||
from Bio.PDB.Model import Model
|
||||
from Bio.PDB.PDBExceptions import PDBConstructionWarning
|
||||
from Bio.PDB.PDBParser import PDBParser
|
||||
from Bio.PDB.Polypeptide import PPBuilder, is_aa
|
||||
from Bio.PDB.Structure import Structure
|
||||
|
||||
warnings.filterwarnings("ignore", category=PDBConstructionWarning)
|
||||
log = logging.getLogger("Prodigy")
|
||||
|
||||
|
||||
def get_parser(input_f: Path) -> Union[PDBParser, MMCIFParser]:
|
||||
if input_f.suffix == ".cif":
|
||||
return MMCIFParser()
|
||||
else:
|
||||
return PDBParser()
|
||||
|
||||
|
||||
def ignore(r):
|
||||
return r.id[0][0] == "W" or r.id[0][0] == "H"
|
||||
|
||||
|
||||
def validate_structure(
|
||||
input_strcture_obj: Structure,
|
||||
selection: Optional[list[str]] = None,
|
||||
clean: bool = True,
|
||||
) -> list[Model]:
|
||||
|
||||
result: list[Model] = []
|
||||
for model in [m for m in input_strcture_obj.child_list]:
|
||||
|
||||
# process selected chains
|
||||
chains: list[Chain] = list(model.get_chains())
|
||||
chain_ids = set([c.id for c in chains])
|
||||
|
||||
if selection:
|
||||
sel_chains = []
|
||||
# Match selected chain with structure
|
||||
for sel in selection:
|
||||
for c_str in sel.split(","):
|
||||
sel_chains.append(c_str)
|
||||
if c_str not in chain_ids:
|
||||
raise ValueError(
|
||||
f"Selected chain not present in provided structure: {c_str}"
|
||||
)
|
||||
|
||||
# Remove unselected chains
|
||||
def _ignore_helper(x) -> bool:
|
||||
return x.id not in sel_chains
|
||||
|
||||
for c in chains:
|
||||
if _ignore_helper(c):
|
||||
if c.parent is not None:
|
||||
c.parent.detach_child(c.id)
|
||||
|
||||
# Double occupancy check
|
||||
for atom in list(model.get_atoms()):
|
||||
if atom.is_disordered():
|
||||
atom = typing.cast(DisorderedAtom, atom)
|
||||
residue = atom.parent
|
||||
assert residue is not None
|
||||
sel_at = atom.selected_child
|
||||
assert sel_at is not None
|
||||
sel_at.altloc = " "
|
||||
sel_at.disordered_flag = 0
|
||||
residue.detach_child(atom.id)
|
||||
residue.add(sel_at)
|
||||
|
||||
# Insertion code check
|
||||
for c in chains:
|
||||
for residue in c.get_residues():
|
||||
if residue.get_id()[2] != " ":
|
||||
c.detach_child(residue.id)
|
||||
|
||||
if clean:
|
||||
# Remove HETATMs and solvent
|
||||
res_list = list(model.get_residues())
|
||||
|
||||
for res in res_list:
|
||||
if ignore(res):
|
||||
chain = res.parent
|
||||
assert chain is not None
|
||||
chain.detach_child(res.id)
|
||||
elif not is_aa(res, standard=True):
|
||||
raise ValueError(
|
||||
"Unsupported non-standard amino acid found: {0}".format(
|
||||
res.resname
|
||||
)
|
||||
)
|
||||
|
||||
# Remove Hydrogens
|
||||
atom_list = list(model.get_atoms())
|
||||
|
||||
def _ignore(x):
|
||||
return x.element == "H"
|
||||
|
||||
for atom in atom_list:
|
||||
if _ignore(atom):
|
||||
residue = atom.parent
|
||||
assert residue is not None
|
||||
residue.detach_child(atom.name)
|
||||
|
||||
# Detect gaps and compare with no. of chains
|
||||
pep_builder = PPBuilder()
|
||||
peptides = pep_builder.build_peptides(model)
|
||||
n_peptides = len(peptides)
|
||||
|
||||
if n_peptides != len(chain_ids):
|
||||
message = "[!] Structure contains gaps:\n"
|
||||
for i_pp, pp in enumerate(peptides):
|
||||
message += (
|
||||
"\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > "
|
||||
"{2.parent.id} {2.resname}{2.id[1]}\n".format(i_pp, pp[0], pp[-1])
|
||||
)
|
||||
log.warning(message)
|
||||
|
||||
result.append(model)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_structure(path: str) -> tuple[list[Model], int, int]:
|
||||
"""Return a validated `Structure`, number of chains and number of residues"""
|
||||
|
||||
extension = Path(path).suffix
|
||||
supported_extensions = [".pdb", ".cif", ".ent"]
|
||||
if extension not in supported_extensions:
|
||||
log.error(
|
||||
f"[!] Structure format '{extension}' is "
|
||||
"not supported. Use '.pdb' or '.cif'."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
parser = get_parser(Path(path))
|
||||
structure_name = Path(path).stem
|
||||
structure_path = Path(path)
|
||||
try:
|
||||
original_structure = parser.get_structure(structure_name, structure_path)
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
sys.exit(1)
|
||||
|
||||
assert isinstance(original_structure, Structure)
|
||||
|
||||
models: list[Model] = validate_structure(original_structure)
|
||||
|
||||
# Get number of chains
|
||||
chain_dict = {}
|
||||
res_dict = {}
|
||||
for model in models:
|
||||
chain_dict.update({c.id: c for c in model.get_chains()})
|
||||
res_dict.update({r.id: r for r in model.get_residues()})
|
||||
|
||||
## Make sure all models have the same chains
|
||||
# Get chain sets for all models
|
||||
chain_sets = [set(chain.id for chain in model.get_chains()) for model in models]
|
||||
|
||||
# Check if all sets are identical
|
||||
if not all(chain_set == chain_sets[0] for chain_set in chain_sets):
|
||||
raise ValueError(
|
||||
"Not all models have the same chains. Found chain sets: "
|
||||
+ ", ".join(str(s) for s in chain_sets)
|
||||
)
|
||||
|
||||
res_sets = [set(res.id for res in model.get_residues()) for model in models]
|
||||
|
||||
if not all(res_set == res_sets[0] for res_set in res_sets):
|
||||
raise ValueError(
|
||||
"Not all models have the same residues. Found residue sets: "
|
||||
+ ", ".join(str(s) for s in res_sets)
|
||||
)
|
||||
|
||||
# structure, n_chains, n_res = parse_structure(path=str(struct_path))
|
||||
return (models, len(chain_sets[0]), len(res_sets[0]))
|
||||
301
src/prodigy_prot/modules/prodigy.py
Normal file
301
src/prodigy_prot/modules/prodigy.py
Normal file
@@ -0,0 +1,301 @@
|
||||
import sys
|
||||
from io import TextIOWrapper
|
||||
from typing import Optional, TextIO, Union
|
||||
|
||||
from Bio.PDB.Model import Model
|
||||
from Bio.PDB.NeighborSearch import NeighborSearch
|
||||
#from Bio.PDB.Structure import Structure
|
||||
|
||||
from prodigy_prot.modules import aa_properties
|
||||
from prodigy_prot.modules.freesasa_tools import execute_freesasa_api
|
||||
from prodigy_prot.modules.models import IC_NIS
|
||||
from prodigy_prot.modules.utils import dg_to_kd
|
||||
|
||||
|
||||
def calculate_ic(
|
||||
model: Model, d_cutoff: float = 5.5, selection: Optional[dict[str, int]] = None
|
||||
) -> list:
|
||||
"""
|
||||
Calculates intermolecular contacts in a parsed struct object.
|
||||
"""
|
||||
atom_list = list(model.get_atoms())
|
||||
ns = NeighborSearch(atom_list)
|
||||
all_list = ns.search_all(radius=d_cutoff, level="R")
|
||||
|
||||
assert all_list is not None
|
||||
|
||||
if selection:
|
||||
_sd = selection
|
||||
|
||||
def _chain(x):
|
||||
return x.parent.id
|
||||
|
||||
ic_list = [
|
||||
c
|
||||
for c in all_list
|
||||
if (_chain(c[0]) in _sd and _chain(c[1]) in _sd)
|
||||
and (_sd[_chain(c[0])] != _sd[_chain(c[1])])
|
||||
]
|
||||
else:
|
||||
ic_list = [c for c in all_list if c[0].parent.id != c[1].parent.id]
|
||||
|
||||
if not ic_list:
|
||||
raise ValueError("No contacts found for selection")
|
||||
|
||||
ic_list.sort()
|
||||
return ic_list
|
||||
|
||||
|
||||
def analyse_contacts(contact_list: list) -> dict[str, float]:
|
||||
"""
|
||||
Enumerates and classifies contacts based on the chemical characteristics
|
||||
of the participating amino acids.
|
||||
"""
|
||||
|
||||
bins = {
|
||||
"AA": 0.0,
|
||||
"PP": 0.0,
|
||||
"CC": 0.0,
|
||||
"AP": 0.0,
|
||||
"CP": 0.0,
|
||||
"AC": 0.0,
|
||||
"LL": 0.0,
|
||||
"BL": 0.0,
|
||||
"BB": 0.0
|
||||
}
|
||||
|
||||
_data = aa_properties.aa_character_ic
|
||||
for res_i, res_j in contact_list:
|
||||
i = _data.get(res_i.resname)
|
||||
j = _data.get(res_j.resname)
|
||||
if i is not None and j is not None:
|
||||
contact_type = "".join(sorted((i, j)))
|
||||
bins[contact_type] += 1
|
||||
|
||||
_data = aa_properties.aa_character_hydro
|
||||
for res_i, res_j in contact_list:
|
||||
i = _data.get(res_i.resname)
|
||||
j = _data.get(res_j.resname)
|
||||
if i is not None and j is not None:
|
||||
contact_type = "".join(sorted((i, j)))
|
||||
bins[contact_type] += 1
|
||||
|
||||
return bins
|
||||
|
||||
|
||||
def analyse_nis(sasa_dict: dict, acc_threshold: float = 0.05) -> list[float]:
|
||||
"""
|
||||
Returns the percentages of apolar, polar, and charged
|
||||
residues at the interface, according to an accessibility
|
||||
criterion.
|
||||
"""
|
||||
|
||||
_data = aa_properties.aa_character_protorp
|
||||
|
||||
def _char_to_index(x):
|
||||
return {"A": 0, "C": 1, "P": 2}.get(x)
|
||||
|
||||
count = [0, 0, 0]
|
||||
|
||||
for res, rsa in sasa_dict.items():
|
||||
_, resn, _ = res
|
||||
if rsa >= acc_threshold:
|
||||
aa_character = _data[resn]
|
||||
aa_index = _char_to_index(aa_character)
|
||||
assert aa_index is not None
|
||||
count[aa_index] += 1
|
||||
|
||||
percentages = [100.0 * x / sum(count) for x in count]
|
||||
return percentages
|
||||
|
||||
|
||||
class Prodigy:
|
||||
# init parameters
|
||||
def __init__(
|
||||
self,
|
||||
model: Model,
|
||||
name: str = "",
|
||||
selection: Optional[list[str]] = None,
|
||||
temp: float = 25.0,
|
||||
):
|
||||
self.temp = float(temp)
|
||||
if selection is None:
|
||||
self.selection = [chain.id for chain in model.get_chains()]
|
||||
else:
|
||||
self.selection = selection
|
||||
self.model = model
|
||||
self.name = name
|
||||
self.ic_network: list = []
|
||||
self.bins: dict[str, float] = {
|
||||
"CC": 0.0,
|
||||
"CP": 0.0,
|
||||
"AC": 0.0,
|
||||
"PP": 0.0,
|
||||
"AP": 0.0,
|
||||
"AA": 0.0,
|
||||
"LL": 0.0,
|
||||
"BL": 0.0,
|
||||
"BB": 0.0
|
||||
}
|
||||
|
||||
self.nis_a = 0.0
|
||||
self.nis_c = 0.0
|
||||
self.nis_p = 0.0
|
||||
self.ba_val = 0.0
|
||||
self.kd_val = 0.0
|
||||
|
||||
def predict(
|
||||
self,
|
||||
temp: Optional[float] = None,
|
||||
distance_cutoff: float = 5.5,
|
||||
acc_threshold: float = 0.05,
|
||||
):
|
||||
if temp is not None:
|
||||
self.temp = temp
|
||||
# Make selection dict from user option or PDB chains
|
||||
selection_dict: dict[str, int] = {}
|
||||
for igroup, group in enumerate(self.selection):
|
||||
chains = group.split(",")
|
||||
for chain in chains:
|
||||
if chain in selection_dict:
|
||||
errmsg = "Selections must be disjoint sets: " f"{chain} is repeated"
|
||||
raise ValueError(errmsg)
|
||||
selection_dict[chain] = igroup
|
||||
|
||||
# Contacts
|
||||
self.ic_network = calculate_ic(
|
||||
self.model, d_cutoff=distance_cutoff, selection=selection_dict
|
||||
)
|
||||
|
||||
self.bins = analyse_contacts(self.ic_network)
|
||||
# SASA
|
||||
_, cmplx_sasa = execute_freesasa_api(self.model)
|
||||
self.nis_a, self.nis_c, self.nis_p = analyse_nis(cmplx_sasa, acc_threshold=acc_threshold)
|
||||
|
||||
# Affinity Calculation
|
||||
self.ba_val = IC_NIS(
|
||||
self.bins["CC"],
|
||||
self.bins["AC"],
|
||||
self.bins["PP"],
|
||||
self.bins["AP"],
|
||||
self.nis_a,
|
||||
self.nis_c,
|
||||
)
|
||||
self.kd_val = dg_to_kd(self.ba_val, self.temp)
|
||||
|
||||
def as_dict(self) -> dict:
|
||||
return_dict = {
|
||||
"model": self.model.id,
|
||||
"selection": self.selection,
|
||||
"temp": self.temp,
|
||||
"ICs": len(self.ic_network),
|
||||
"nis_a": self.nis_a,
|
||||
"nis_c": self.nis_c,
|
||||
"nis_p": self.nis_p,
|
||||
"ba_val": self.ba_val,
|
||||
"kd_val": self.kd_val,
|
||||
}
|
||||
return_dict.update(self.bins)
|
||||
return return_dict
|
||||
|
||||
def print_prediction(self, outfile: str = "", quiet: bool = False, showall: bool = False) -> None:
|
||||
handle: Union[TextIOWrapper, TextIO]
|
||||
if outfile:
|
||||
handle = open(outfile, "w")
|
||||
else:
|
||||
handle = sys.stdout
|
||||
|
||||
if quiet:
|
||||
handle.write("{0}\t{1:8.3f}\n".format(self.name, self.ba_val))
|
||||
else:
|
||||
# Collect output lines in order
|
||||
lines = []
|
||||
lines.append(f"[+] No. of intermolecular contacts: {len(self.ic_network)}\n")
|
||||
lines.append(f"[+] No. of Charged-Charged contacts: {self.bins['CC']}\n")
|
||||
lines.append(f"[+] No. of Charged-Polar contacts: {self.bins['CP']}\n")
|
||||
lines.append(f"[+] No. of Charged-Apolar contacts: {self.bins['AC']}\n")
|
||||
lines.append(f"[+] No. of Polar-Polar contacts: {self.bins['PP']}\n")
|
||||
lines.append(f"[+] No. of Apolar-Polar contacts: {self.bins['AP']}\n")
|
||||
lines.append(f"[+] No. of Apolar-Apolar contacts: {self.bins['AA']}\n")
|
||||
|
||||
if showall:
|
||||
lines.append(f"[+] No. of hydrophiLic-hydrophiLic contacts: {self.bins['LL']}\n")
|
||||
lines.append(f"[+] No. of hydrophoBic-hydrophiLic contacts: {self.bins['BL']}\n")
|
||||
lines.append(f"[+] No. of hydrophoBic-hydrophoBic contacts: {self.bins['BB']}\n")
|
||||
lines.append(f"[+] Percentage of Polar NIS residues: {self.nis_p:3.2f}\n")
|
||||
|
||||
lines.append(f"[+] Percentage of Apolar NIS residues: {self.nis_a:3.2f}\n")
|
||||
lines.append(f"[+] Percentage of Charged NIS residues: {self.nis_c:3.2f}\n")
|
||||
lines.append(f"[++] predicted binding affinity (kcal.mol-1): {self.ba_val:8.1f}\n")
|
||||
lines.append(f"[++] predicted dissociation constant (M) at {self.temp:.1f}˚C: {self.kd_val:8.1e}\n")
|
||||
|
||||
handle.writelines(lines)
|
||||
|
||||
if handle is not sys.stdout:
|
||||
handle.close()
|
||||
|
||||
def print_contacts(self, outfile: str = "") -> None:
|
||||
handle: Union[TextIOWrapper, TextIO]
|
||||
if outfile:
|
||||
handle = open(outfile, "w")
|
||||
else:
|
||||
handle = sys.stdout
|
||||
|
||||
for res1, res2 in self.ic_network:
|
||||
_fmt_str = (
|
||||
"{0.resname:>5s} {0.id[1]:5} {0.parent.id:>3s} {1.resname:>5s}"
|
||||
" {1.id[1]:5} {1.parent.id:>3s}\n"
|
||||
)
|
||||
if res1.parent.id not in self.selection[0]:
|
||||
res1, res2 = res2, res1
|
||||
handle.write(_fmt_str.format(res1, res2))
|
||||
|
||||
if handle is not sys.stdout:
|
||||
handle.close()
|
||||
|
||||
def print_pymol_script(self, outfile: str = "") -> None:
|
||||
# Writing output PYMOL: pml script
|
||||
# initialize array with chains and save chain selection string
|
||||
selection_strings = []
|
||||
chains: dict[str, set] = {}
|
||||
for s in self.selection:
|
||||
selection_strings.append(s.replace(",", "+"))
|
||||
for c in s.split(","):
|
||||
chains[c] = set()
|
||||
|
||||
# loop over pairs and add interface residues to respective chains
|
||||
for pair in self.ic_network:
|
||||
for r in pair:
|
||||
chains[r.parent.id].add(str(r.id[1]))
|
||||
|
||||
# set output stream
|
||||
handle = open(outfile, "w") if outfile else sys.stdout
|
||||
|
||||
# write default setup strings
|
||||
handle.writelines(
|
||||
[
|
||||
"color silver\n",
|
||||
"as cartoon\n",
|
||||
"bg_color white\n",
|
||||
"center\n",
|
||||
"color lightblue, chain {}\n".format(selection_strings[0]),
|
||||
"color lightpink, chain {}\n".format(selection_strings[1]),
|
||||
]
|
||||
)
|
||||
|
||||
# loop over interfaces construct selection strings
|
||||
# and write interface related commands
|
||||
for color, iface in [("blue", 1), ("hotpink", 2)]:
|
||||
p_sel_string = " or ".join(
|
||||
[
|
||||
"chain {} and resi {}".format(c, "+".join(chains[c]))
|
||||
for c in selection_strings[iface - 1].split("+")
|
||||
]
|
||||
)
|
||||
handle.write("select iface{}, {}\n".format(iface, p_sel_string))
|
||||
handle.write("color {}, iface{}\n".format(color, iface))
|
||||
handle.write("show sticks, iface{}\n".format(iface))
|
||||
|
||||
# close file handle if applicable
|
||||
if handle is not sys.stdout:
|
||||
handle.close()
|
||||
25
src/prodigy_prot/modules/utils.py
Normal file
25
src/prodigy_prot/modules/utils.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Assorted utility functions.
|
||||
"""
|
||||
|
||||
import math
|
||||
import os
|
||||
|
||||
|
||||
def check_path(path: str) -> str:
|
||||
"""
|
||||
Checks if a file is readable.
|
||||
"""
|
||||
|
||||
full_path = os.path.abspath(path)
|
||||
if not os.path.isfile(full_path):
|
||||
raise IOError("Could not read file: {0}".format(path))
|
||||
return full_path
|
||||
|
||||
|
||||
def dg_to_kd(dg: float, temperature: float = 25.0) -> float:
|
||||
"""Coversion of DG into the dissociation constant kd"""
|
||||
|
||||
temp_in_k = temperature + 273.15
|
||||
rt = 0.0019858775 * temp_in_k
|
||||
return math.exp(dg / rt)
|
||||
3
tests/__init__.py
Normal file
3
tests/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from pathlib import Path
|
||||
|
||||
TEST_DATA = Path(Path(__file__).parents[0], "test_data")
|
||||
3020
tests/test_data/2oob.cif
Normal file
3020
tests/test_data/2oob.cif
Normal file
File diff suppressed because it is too large
Load Diff
1449
tests/test_data/2oob.pdb
Normal file
1449
tests/test_data/2oob.pdb
Normal file
File diff suppressed because it is too large
Load Diff
1460
tests/test_data/dataset.json
Normal file
1460
tests/test_data/dataset.json
Normal file
File diff suppressed because it is too large
Load Diff
78
tests/test_parsers.py
Normal file
78
tests/test_parsers.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from Bio.PDB.MMCIFParser import MMCIFParser
|
||||
from Bio.PDB.PDBParser import PDBParser
|
||||
from Bio.PDB.Structure import Structure
|
||||
|
||||
from prodigy_prot.modules.parsers import get_parser, parse_structure, validate_structure
|
||||
|
||||
from . import TEST_DATA
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_structure_cif():
|
||||
yield Path(TEST_DATA, "2oob.cif")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_structure_pdb() -> Path:
|
||||
return Path(TEST_DATA, "2oob.pdb")
|
||||
|
||||
|
||||
def test_get_parser_pdb(input_structure_pdb):
|
||||
|
||||
parser = get_parser(input_structure_pdb)
|
||||
assert isinstance(parser, PDBParser)
|
||||
|
||||
|
||||
def test_get_parser_cif(input_structure_cif):
|
||||
|
||||
parser = get_parser(input_structure_cif)
|
||||
assert isinstance(parser, MMCIFParser)
|
||||
|
||||
|
||||
def test_validate_structure_pdb(input_structure_pdb):
|
||||
|
||||
parser = PDBParser()
|
||||
structure = parser.get_structure("test_structure", input_structure_pdb)
|
||||
assert isinstance(structure, Structure)
|
||||
|
||||
result = validate_structure(structure)
|
||||
assert result == structure.child_list
|
||||
|
||||
|
||||
def test_validate_structure_cif(input_structure_cif):
|
||||
|
||||
parser = MMCIFParser()
|
||||
structure = parser.get_structure("test_structure", input_structure_cif)
|
||||
assert isinstance(structure, Structure)
|
||||
|
||||
result = validate_structure(structure)
|
||||
assert result == structure.child_list
|
||||
|
||||
|
||||
def test_parse_structure_pdb(input_structure_pdb):
|
||||
|
||||
parser = PDBParser()
|
||||
structure = parser.get_structure(input_structure_pdb.stem, input_structure_pdb)
|
||||
assert isinstance(structure, Structure)
|
||||
|
||||
result, num_chains, num_res = parse_structure(input_structure_pdb)
|
||||
|
||||
assert result == structure.child_list
|
||||
assert num_chains == 2
|
||||
assert num_res == 116
|
||||
|
||||
|
||||
def test_parse_structure_cif(input_structure_cif):
|
||||
|
||||
parser = MMCIFParser()
|
||||
structure = parser.get_structure(input_structure_cif.stem, input_structure_cif)
|
||||
assert isinstance(structure, Structure)
|
||||
|
||||
result, num_chains, num_res = parse_structure(input_structure_cif)
|
||||
|
||||
assert result == structure.child_list
|
||||
assert num_chains == 2
|
||||
assert num_res == 116
|
||||
239
tests/test_prodigy.py
Normal file
239
tests/test_prodigy.py
Normal file
@@ -0,0 +1,239 @@
|
||||
import json
|
||||
import tarfile
|
||||
import tempfile
|
||||
from io import BufferedReader, TextIOWrapper
|
||||
from os.path import basename, splitext
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from Bio.PDB.Model import Model
|
||||
from Bio.PDB.PDBParser import PDBParser
|
||||
from Bio.PDB.Residue import Residue
|
||||
from Bio.PDB.Structure import Structure
|
||||
|
||||
from prodigy_prot.modules.parsers import validate_structure
|
||||
from prodigy_prot.modules.prodigy import (
|
||||
Prodigy,
|
||||
analyse_contacts,
|
||||
analyse_nis,
|
||||
calculate_ic,
|
||||
)
|
||||
|
||||
from . import TEST_DATA
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_model():
|
||||
input_f = Path(TEST_DATA, "2oob.pdb")
|
||||
parser = PDBParser()
|
||||
structure = parser.get_structure(input_f.stem, input_f)
|
||||
assert isinstance(structure, Structure)
|
||||
return structure.child_list[0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def compressed_dataset_f():
|
||||
return Path(TEST_DATA, "dataset.tgz")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expected_dataset_json():
|
||||
return Path(TEST_DATA, "dataset.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def prodigy_class(input_model):
|
||||
yield Prodigy(input_model)
|
||||
|
||||
|
||||
def test_calculate_ic(input_model):
|
||||
|
||||
result = calculate_ic(model=input_model, d_cutoff=5.5)
|
||||
|
||||
assert len(result) == 78
|
||||
|
||||
first_hit: tuple[Residue, Residue] = result[0]
|
||||
|
||||
assert first_hit[0].get_resname() == "ASN"
|
||||
assert first_hit[1].get_resname() == "LYS"
|
||||
|
||||
|
||||
def test_calculate_ic_with_selection(input_model):
|
||||
|
||||
result = calculate_ic(model=input_model, d_cutoff=5.5, selection={"A": 0, "B": 1})
|
||||
|
||||
assert len(result) == 78
|
||||
|
||||
first_hit: tuple[Residue, Residue] = result[0]
|
||||
|
||||
assert first_hit[0].get_resname() == "ASN"
|
||||
assert first_hit[1].get_resname() == "LYS"
|
||||
|
||||
|
||||
def test_analyse_contacts(input_model):
|
||||
|
||||
res_a = input_model["A"][(" ", 931, " ")]
|
||||
res_b = input_model["B"][(" ", 6, " ")]
|
||||
contact = (res_a, res_b)
|
||||
|
||||
test_input = [contact]
|
||||
|
||||
result = analyse_contacts(test_input)
|
||||
|
||||
expected_output = {
|
||||
"AA": 0.0,
|
||||
"PP": 0.0,
|
||||
"CC": 0.0,
|
||||
"AP": 0.0,
|
||||
"CP": 1.0,
|
||||
"AC": 0.0,
|
||||
"LL": 1.0,
|
||||
"BL": 0.0,
|
||||
"BB": 0.0
|
||||
}
|
||||
assert result == expected_output
|
||||
|
||||
|
||||
def test_analyse_nis():
|
||||
|
||||
test_input = {("B", "ARG", "72"): 0.9}
|
||||
apolar, polar, charged = analyse_nis(test_input)
|
||||
|
||||
assert apolar == 0.0
|
||||
assert polar == 100.0
|
||||
assert charged == 0.0
|
||||
|
||||
|
||||
def test_prodigy_predict(prodigy_class):
|
||||
|
||||
prodigy_class.predict()
|
||||
|
||||
assert prodigy_class.nis_a == pytest.approx(35.5, abs=1.0)
|
||||
assert prodigy_class.nis_c == pytest.approx(38.0, abs=1.0)
|
||||
assert prodigy_class.ba_val == pytest.approx(-6.2, abs=1.0)
|
||||
|
||||
# This is the actual prediction
|
||||
assert prodigy_class.kd_val == pytest.approx(2.7e-5, abs=1e-6)
|
||||
|
||||
|
||||
def test_prodigy_as_dict(prodigy_class):
|
||||
|
||||
result = prodigy_class.as_dict()
|
||||
|
||||
assert isinstance(result, dict)
|
||||
# 14 'original' + 3 hydro + 1 %NIS
|
||||
assert len(result) == 18
|
||||
|
||||
|
||||
def test_prodigy_print_prediction(prodigy_class):
|
||||
|
||||
outfile = tempfile.NamedTemporaryFile(delete=False)
|
||||
assert Path(outfile.name).stat().st_size == 0
|
||||
|
||||
prodigy_class.print_prediction(outfile.name)
|
||||
assert Path(outfile.name).stat().st_size != 0
|
||||
|
||||
Path(outfile.name).unlink()
|
||||
|
||||
|
||||
def test_prodigy_print_prediction_quiet(prodigy_class):
|
||||
|
||||
outfile = tempfile.NamedTemporaryFile(delete=False)
|
||||
assert Path(outfile.name).stat().st_size == 0
|
||||
|
||||
prodigy_class.print_prediction(outfile.name, True)
|
||||
assert Path(outfile.name).stat().st_size != 0
|
||||
|
||||
Path(outfile.name).unlink()
|
||||
|
||||
|
||||
def test_prodigy_print_contacts(input_model, prodigy_class):
|
||||
|
||||
res_a = input_model["A"][(" ", 931, " ")]
|
||||
res_b = input_model["B"][(" ", 6, " ")]
|
||||
prodigy_class.ic_network = [(res_a, res_b)]
|
||||
|
||||
outfile = tempfile.NamedTemporaryFile(delete=False)
|
||||
assert Path(outfile.name).stat().st_size == 0
|
||||
|
||||
prodigy_class.print_contacts(outfile.name)
|
||||
assert Path(outfile.name).stat().st_size != 0
|
||||
|
||||
Path(outfile.name).unlink()
|
||||
|
||||
|
||||
def test_print_pymol_script(input_model, prodigy_class):
|
||||
res_a = input_model["A"][(" ", 931, " ")]
|
||||
res_b = input_model["B"][(" ", 6, " ")]
|
||||
prodigy_class.ic_network = [(res_a, res_b)]
|
||||
|
||||
outfile = tempfile.NamedTemporaryFile(delete=False)
|
||||
assert Path(outfile.name).stat().st_size == 0
|
||||
|
||||
prodigy_class.print_pymol_script(outfile.name)
|
||||
assert Path(outfile.name).stat().st_size != 0
|
||||
|
||||
Path(outfile.name).unlink()
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_dataset_prediction(compressed_dataset_f, expected_dataset_json):
|
||||
"""
|
||||
Test method to compare prediction for 80 dataset cases with
|
||||
expected values.
|
||||
"""
|
||||
# load expected data from json
|
||||
with open(expected_dataset_json) as fh:
|
||||
expected_data = json.load(fh)
|
||||
|
||||
# load dataset PDBs
|
||||
dataset = tarfile.open(compressed_dataset_f)
|
||||
parser = PDBParser(QUIET=True)
|
||||
|
||||
keys_equal = ["AA", "PP", "CC", "AP", "CP", "AC"]
|
||||
diffs = {"ba_val": [], "nis_a": [], "nis_c": []}
|
||||
|
||||
# run prodigy for each dataset in the PDB
|
||||
for entry in dataset:
|
||||
s_name, s_ext = splitext(basename(entry.name))
|
||||
|
||||
# skip system files in archive
|
||||
if not s_name.isalnum() or s_ext != ".pdb":
|
||||
continue
|
||||
|
||||
handle = dataset.extractfile(entry)
|
||||
|
||||
# Wrap filehandle to ensure string file handle in Python 3
|
||||
handle = TextIOWrapper(BufferedReader(handle)) # type: ignore
|
||||
|
||||
parsed_structure = parser.get_structure(s_name, handle)
|
||||
assert isinstance(parsed_structure, Structure)
|
||||
|
||||
models = validate_structure(parsed_structure, selection=["A", "B"])
|
||||
|
||||
# Test for structure object
|
||||
# Check if it's a list and all elements are Model objects
|
||||
assert isinstance(models, list) and all(
|
||||
isinstance(item, Model) for item in models
|
||||
)
|
||||
# assert isinstance(s, list[Model])
|
||||
|
||||
# run prediction and retrieve result dict
|
||||
for m in models:
|
||||
prod = Prodigy(m, selection=["A", "B"])
|
||||
prod.predict()
|
||||
results = prod.as_dict()
|
||||
|
||||
# check for equality of prdicted interface residues
|
||||
for k in keys_equal:
|
||||
observed_value = results[k]
|
||||
expected_value = expected_data[s_name][k]
|
||||
assert observed_value == pytest.approx(expected_value)
|
||||
|
||||
# check that NIS and binding afinity values are within 2% of
|
||||
# expected values and add diffs for summary
|
||||
for k in diffs.keys():
|
||||
delta = abs(results[k] / expected_data[s_name][k] - 1)
|
||||
# assume a difference of less then 2%
|
||||
assert delta == pytest.approx(0, abs=0.02)
|
||||
diffs[k].append(delta)
|
||||
21
tests/test_utils.py
Normal file
21
tests/test_utils.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import math
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from prodigy_prot.modules.utils import check_path, dg_to_kd
|
||||
|
||||
|
||||
def test_check_path():
|
||||
|
||||
temp_f = tempfile.NamedTemporaryFile(delete=False)
|
||||
|
||||
result = check_path(temp_f.name)
|
||||
|
||||
assert result == temp_f.name
|
||||
|
||||
Path(temp_f.name).unlink()
|
||||
|
||||
|
||||
def test_dg_to_kd():
|
||||
|
||||
assert math.isclose(dg_to_kd(0.0), 1.0, rel_tol=1e-9)
|
||||
Reference in New Issue
Block a user