Initial commit: FlowDock pipeline configured for WES execution

2026-03-16 15:23:29 +01:00
commit a3ffec6a07
116 changed files with 16139 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,6 @@
 # example of file for storing private and user specific environment variables, like keys or system paths
 # rename it to ".env" (excluded from version control by default)
 # .env is loaded by train.py automatically
 # hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
 PLINDER_MOUNT="$(pwd)/data/PLINDER"
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,22 @@
 ## What does this PR do?
 <!--
 Please include a summary of the change and which issue is fixed.
 Please also include relevant motivation and context.
 List any dependencies that are required for this change.
 List all the breaking changes introduced by this pull request.
 -->
 Fixes #\<issue_number>
 ## Before submitting
 - [ ] Did you make sure **title is self-explanatory** and **the description concisely explains the PR**?
 - [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together?
 - [ ] Did you list all the **breaking changes** introduced by this pull request?
 - [ ] Did you **test your PR locally** with `pytest` command?
 - [ ] Did you **run pre-commit hooks** with `pre-commit run -a` command?
 ## Did you have fun?
 Make sure you had fun coding 🙃
--- a/.github/codecov.yml
+++ b/.github/codecov.yml
@@ -0,0 +1,15 @@
 coverage:
  status:
    # measures overall project coverage
    project:
      default:
        threshold: 100% # how much decrease in coverage is needed to not consider success
    # measures PR or single commit coverage
    patch:
      default:
        threshold: 100% # how much decrease in coverage is needed to not consider success
    # project: off
    # patch: off
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,16 @@
 # To get started with Dependabot version updates, you'll need to specify which
 # package ecosystems to update and where the package manifests are located.
 # Please see the documentation for all configuration options:
 # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 version: 2
 updates:
  - package-ecosystem: "pip" # See documentation for possible values
    directory: "/" # Location of package manifests
    schedule:
      interval: "daily"
    ignore:
      - dependency-name: "pytorch-lightning"
        update-types: ["version-update:semver-patch"]
      - dependency-name: "torchmetrics"
        update-types: ["version-update:semver-patch"]
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -0,0 +1,44 @@
 name-template: "v$RESOLVED_VERSION"
 tag-template: "v$RESOLVED_VERSION"
 categories:
  - title: "🚀 Features"
    labels:
      - "feature"
      - "enhancement"
  - title: "🐛 Bug Fixes"
    labels:
      - "fix"
      - "bugfix"
      - "bug"
  - title: "🧹 Maintenance"
    labels:
      - "maintenance"
      - "dependencies"
      - "refactoring"
      - "cosmetic"
      - "chore"
  - title: "📝️ Documentation"
    labels:
      - "documentation"
      - "docs"
 change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
 change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
 version-resolver:
  major:
    labels:
      - "major"
  minor:
    labels:
      - "minor"
  patch:
    labels:
      - "patch"
  default: patch
 template: |
  ## Changes
  $CHANGES
--- a/.github/workflows/code-quality-main.yaml
+++ b/.github/workflows/code-quality-main.yaml
@@ -0,0 +1,22 @@
 # Same as `code-quality-pr.yaml` but triggered on commit to main branch
 # and runs on all files (instead of only the changed ones)
 name: Code Quality Main
 on:
  push:
    branches: [main]
 jobs:
  code-quality:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v2
      - name: Set up Python
        uses: actions/setup-python@v2
      - name: Run pre-commits
        uses: pre-commit/action@v2.0.3
--- a/.github/workflows/code-quality-pr.yaml
+++ b/.github/workflows/code-quality-pr.yaml
@@ -0,0 +1,36 @@
 # This workflow finds which files were changed, prints them,
 # and runs `pre-commit` on those files.
 # Inspired by the sktime library:
 # https://github.com/alan-turing-institute/sktime/blob/main/.github/workflows/test.yml
 name: Code Quality PR
 on:
  pull_request:
    branches: [main, "release/*", "dev"]
 jobs:
  code-quality:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v2
      - name: Set up Python
        uses: actions/setup-python@v2
      - name: Find modified files
        id: file_changes
        uses: trilom/file-changes-action@v1.2.4
        with:
          output: " "
      - name: List modified files
        run: echo '${{ steps.file_changes.outputs.files}}'
      - name: Run pre-commits
        uses: pre-commit/action@v2.0.3
        with:
          extra_args: --files ${{ steps.file_changes.outputs.files}}
--- a/.github/workflows/release-drafter.yml
+++ b/.github/workflows/release-drafter.yml
@@ -0,0 +1,27 @@
 name: Release Drafter
 on:
  push:
    # branches to consider in the event; optional, defaults to all
    branches:
      - main
 permissions:
  contents: read
 jobs:
  update_release_draft:
    permissions:
      # write permission is required to create a github release
      contents: write
      # write permission is required for autolabeler
      # otherwise, read permission is required at least
      pull-requests: write
    runs-on: ubuntu-latest
    steps:
      # Drafts your next Release notes as Pull Requests are merged into "master"
      - uses: release-drafter/release-drafter@v5
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,139 @@
 name: Tests
 on:
  push:
    branches: [main]
  pull_request:
    branches: [main, "release/*", "dev"]
 jobs:
  run_tests_ubuntu:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: ["ubuntu-latest"]
        python-version: ["3.8", "3.9", "3.10"]
    timeout-minutes: 20
    steps:
      - name: Checkout
        uses: actions/checkout@v3
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v3
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: |
          conda env create -f environment.yml
          python -m pip install --upgrade pip
          pip install pytest
          pip install sh
      - name: List dependencies
        run: |
          python -m pip list
      - name: Run pytest
        run: |
          pytest -v
  run_tests_macos:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: ["macos-latest"]
        python-version: ["3.8", "3.9", "3.10"]
    timeout-minutes: 20
    steps:
      - name: Checkout
        uses: actions/checkout@v3
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v3
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: |
          conda env create -f environment.yml
          python -m pip install --upgrade pip
          pip install pytest
          pip install sh
      - name: List dependencies
        run: |
          python -m pip list
      - name: Run pytest
        run: |
          pytest -v
  run_tests_windows:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: ["windows-latest"]
        python-version: ["3.8", "3.9", "3.10"]
    timeout-minutes: 20
    steps:
      - name: Checkout
        uses: actions/checkout@v3
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v3
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: |
          conda env create -f environment.yml
          python -m pip install --upgrade pip
          pip install pytest
      - name: List dependencies
        run: |
          python -m pip list
      - name: Run pytest
        run: |
          pytest -v
  # upload code coverage report
  code-coverage:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v2
      - name: Set up Python 3.10
        uses: actions/setup-python@v2
        with:
          python-version: "3.10"
      - name: Install dependencies
        run: |
          conda env create -f environment.yml
          python -m pip install --upgrade pip
          pip install pytest
          pip install pytest-cov[toml]
          pip install sh
      - name: Run tests and collect coverage
        run: pytest --cov flowdock # NEEDS TO BE UPDATED WHEN CHANGING THE NAME OF "src" FOLDER
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v3
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,11 @@
 work/
 .nextflow/
 .nextflow.log*
 *.log.*
 results/
 __pycache__/
 *.pyc
 .vscode/
 .idea/
 *.tmp
 *.swp
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,150 @@
 default_language_version:
  python: python3
 exclude: "^forks/"
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
      # list of supported hooks: https://pre-commit.com/hooks.html
      - id: trailing-whitespace
      - id: end-of-file-fixer
      - id: check-docstring-first
      - id: check-yaml
      - id: debug-statements
      - id: detect-private-key
      - id: check-executables-have-shebangs
      - id: check-toml
      - id: check-case-conflict
      - id: check-added-large-files
        args: ["--maxkb=20000"]
  # python code formatting
  - repo: https://github.com/psf/black
    rev: 25.1.0
    hooks:
      - id: black
        args: [--line-length, "99"]
  # python import sorting
  - repo: https://github.com/PyCQA/isort
    rev: 6.0.1
    hooks:
      - id: isort
        args: ["--profile", "black", "--filter-files"]
  # python upgrading syntax to newer version
  - repo: https://github.com/asottile/pyupgrade
    rev: v3.19.1
    hooks:
      - id: pyupgrade
        args: [--py38-plus]
  # python docstring formatting
  - repo: https://github.com/myint/docformatter
    rev: eb1df347edd128b30cd3368dddc3aa65edcfac38 # Don't autoupdate until https://github.com/PyCQA/docformatter/issues/293 is fixed
    hooks:
      - id: docformatter
        args:
          [
            --in-place,
            --wrap-summaries=99,
            --wrap-descriptions=99,
            --style=sphinx,
            --black,
          ]
  # python docstring coverage checking
  - repo: https://github.com/econchick/interrogate
    rev: 1.7.0 # or master if you're bold
    hooks:
      - id: interrogate
        args:
          [
            --verbose,
            --fail-under=80,
            --ignore-init-module,
            --ignore-init-method,
            --ignore-module,
            --ignore-nested-functions,
            -vv,
          ]
  # python check (PEP8), programming errors and code complexity
  - repo: https://github.com/PyCQA/flake8
    rev: 7.1.2
    hooks:
      - id: flake8
        args:
          [
            "--extend-ignore",
            "E203,E402,E501,F401,F841,RST2,RST301",
            "--exclude",
            "logs/*,data/*",
          ]
        additional_dependencies: [flake8-rst-docstrings==0.3.0]
  # python security linter
  - repo: https://github.com/PyCQA/bandit
    rev: "1.8.3"
    hooks:
      - id: bandit
        args: ["-s", "B101"]
  # yaml formatting
  - repo: https://github.com/pre-commit/mirrors-prettier
    rev: v4.0.0-alpha.8
    hooks:
      - id: prettier
        types: [yaml]
        exclude: "environment.yaml"
  # shell scripts linter
  - repo: https://github.com/shellcheck-py/shellcheck-py
    rev: v0.10.0.1
    hooks:
      - id: shellcheck
  # md formatting
  - repo: https://github.com/executablebooks/mdformat
    rev: 0.7.22
    hooks:
      - id: mdformat
        args: ["--number"]
        additional_dependencies:
          - mdformat-gfm
          - mdformat-tables
          - mdformat_frontmatter
          # - mdformat-toc
          # - mdformat-black
  # word spelling linter
  - repo: https://github.com/codespell-project/codespell
    rev: v2.4.1
    hooks:
      - id: codespell
        args:
          - --skip=logs/**,data/**,*.ipynb,flowdock/data/components/constants.py,flowdock/data/components/process_mols.py,flowdock/data/components/residue_constants.py,flowdock/data/components/uff_parameters.csv,flowdock/data/components/chemical/*,flowdock/utils/data_utils.py
          # - --ignore-words-list=abc,def
  # jupyter notebook cell output clearing
  - repo: https://github.com/kynan/nbstripout
    rev: 0.8.1
    hooks:
      - id: nbstripout
  # jupyter notebook linting
  - repo: https://github.com/nbQA-dev/nbQA
    rev: 1.9.1
    hooks:
      - id: nbqa-black
        args: ["--line-length=99"]
      - id: nbqa-isort
        args: ["--profile=black"]
      - id: nbqa-flake8
        args:
          [
            "--extend-ignore=E203,E402,E501,F401,F841",
            "--exclude=logs/*,data/*",
          ]
--- a/.project-root
+++ b/.project-root
@@ -0,0 +1,2 @@
 # this file is required for inferring the project root directory
 # do not delete
--- a/49
+++ b/49
@@ -0,0 +1,49 @@
 FROM pytorch/pytorch:2.2.1-cuda11.8-cudnn8-runtime
 LABEL authors="BioinfoMachineLearning"
 # Install system requirements
 RUN apt-get update && \
    apt-get install -y --reinstall ca-certificates && \
    apt-get install -y --no-install-recommends \
        git \
        wget \
        libxml2 \
        libgl-dev \
        libgl1 \
        gcc \
        g++ \
        procps && \
    rm -rf /var/lib/apt/lists/*
 # Set working directory
 RUN mkdir -p /software/flowdock
 WORKDIR /software/flowdock
 # Clone FlowDock repository
 RUN git clone https://github.com/BioinfoMachineLearning/FlowDock /software/flowdock
 # Create conda environment
 RUN conda env create -f /software/flowdock/environments/flowdock_environment.yaml
 # Install local package and ProDy
 RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
    conda activate FlowDock && \
    pip install --no-cache-dir -e /software/flowdock && \
    pip install --no-cache-dir --no-dependencies prody==2.4.1"
 # Create checkpoints directory
 RUN mkdir -p /software/flowdock/checkpoints
 # Download pretrained weights
 RUN wget -q https://zenodo.org/records/15066450/files/flowdock_checkpoints.tar.gz && \
    tar -xzf flowdock_checkpoints.tar.gz && \
    rm flowdock_checkpoints.tar.gz
 # Activate conda environment by default
 RUN echo "source /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
    echo "conda activate FlowDock" >> ~/.bashrc
 # Default shell
 SHELL ["/bin/bash", "-l", "-c"]
 CMD ["/bin/bash"]
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 BioinfoMachineLearning
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/30
+++ b/30
@@ -0,0 +1,30 @@
 help:  ## Show help
 	@grep -E '^[.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 clean: ## Clean autogenerated files
 	rm -rf dist
 	find . -type f -name "*.DS_Store" -ls -delete
 	find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
 	find . | grep -E ".pytest_cache" | xargs rm -rf
 	find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
 	rm -f .coverage
 clean-logs: ## Clean logs
 	rm -rf logs/**
 format: ## Run pre-commit hooks
 	pre-commit run -a
 sync: ## Merge changes from main branch to your current branch
 	git pull
 	git pull origin main
 test: ## Run not slow tests
 	pytest -k "not slow"
 test-full: ## Run all tests
 	pytest
 train: ## Train the model
 	python flowdock/train.py
--- a/README.md
+++ b/README.md
@@ -0,0 +1,471 @@
 <div align="center">
 # FlowDock
 <a href="https://pytorch.org/get-started/locally/"><img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-ee4c2c?logo=pytorch&logoColor=white"></a>
 <a href="https://pytorchlightning.ai/"><img alt="Lightning" src="https://img.shields.io/badge/-Lightning-792ee5?logo=pytorchlightning&logoColor=white"></a>
 <a href="https://hydra.cc/"><img alt="Config: Hydra" src="https://img.shields.io/badge/Config-Hydra-89b8cd"></a>
 <!-- <a href="https://github.com/ashleve/lightning-hydra-template"><img alt="Template" src="https://img.shields.io/badge/-Lightning--Hydra--Template-017F2F?style=flat&logo=github&labelColor=gray"></a><br> -->
 [![Paper](http://img.shields.io/badge/paper-arxiv.2412.10966-B31B1B.svg)](https://arxiv.org/abs/2412.10966)
 [![Conference](http://img.shields.io/badge/ISMB-2025-4b44ce.svg)](https://academic.oup.com/bioinformatics/article/41/Supplement_1/i198/8199366)
 [![Data DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15066450.svg)](https://doi.org/10.5281/zenodo.15066450)
 <img src="./img/FlowDock.png" width="600">
 </div>
 ## Description
 This is the official codebase of the paper
 **FlowDock: Geometric Flow Matching for Generative Protein-Ligand Docking and Affinity Prediction**
 \[[arXiv](https://arxiv.org/abs/2412.10966)\] \[[ISMB](https://academic.oup.com/bioinformatics/article/41/Supplement_1/i198/8199366)\] \[[Neurosnap](https://neurosnap.ai/service/FlowDock)\] \[[Tamarind Bio](https://app.tamarind.bio/tools/flowdock)\]
 <div align="center">
 ![Animation of a flow model-predicted 3D protein-ligand complex structure visualized successively](img/6I67.gif)
 ![Animation of a flow model-predicted 3D protein-multi-ligand complex structure visualized successively](img/T1152.gif)
 </div>
 ## Contents
 - [FlowDock](#flowdock)
  - [Description](#description)
  - [Contents](#contents)
  - [Installation](#installation)
  - [How to prepare data for `FlowDock`](#how-to-prepare-data-for-flowdock)
    - [Generating ESM2 embeddings for each protein (optional, cached input data available on SharePoint)](#generating-esm2-embeddings-for-each-protein-optional-cached-input-data-available-on-sharepoint)
    - [Predicting apo protein structures using ESMFold (optional, cached data available on Zenodo)](#predicting-apo-protein-structures-using-esmfold-optional-cached-data-available-on-zenodo)
  - [How to train `FlowDock`](#how-to-train-flowdock)
  - [How to evaluate `FlowDock`](#how-to-evaluate-flowdock)
  - [How to create comparative plots of benchmarking results](#how-to-create-comparative-plots-of-benchmarking-results)
  - [How to predict new protein-ligand complex structures and their affinities using `FlowDock`](#how-to-predict-new-protein-ligand-complex-structures-and-their-affinities-using-flowdock)
  - [For developers](#for-developers)
  - [Docker](#docker)
  - [Acknowledgements](#acknowledgements)
  - [License](#license)
  - [Citing this work](#citing-this-work)
 ## Installation
 <details>
 Install Mamba
 ```bash
 wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
 bash Miniforge3-$(uname)-$(uname -m).sh  # accept all terms and install to the default location
 rm Miniforge3-$(uname)-$(uname -m).sh  # (optionally) remove installer after using it
 source ~/.bashrc  # alternatively, one can restart their shell session to achieve the same result
 ```
 Install dependencies
 ```bash
 # clone project
 git clone https://github.com/BioinfoMachineLearning/FlowDock
 cd FlowDock
 # create conda environment
 mamba env create -f environments/flowdock_environment.yaml
 conda activate FlowDock  # NOTE: one still needs to use `conda` to (de)activate environments
 pip3 install -e . # install local project as package
 pip3 install prody==2.4.1 --no-dependencies  # install ProDy without NumPy dependency
 ```
 Download checkpoints
 ```bash
 # pretrained NeuralPLexer weights
 cd checkpoints/
 wget https://zenodo.org/records/10373581/files/neuralplexermodels_downstream_datasets_predictions.zip
 unzip neuralplexermodels_downstream_datasets_predictions.zip
 rm neuralplexermodels_downstream_datasets_predictions.zip
 cd ../
 ```
 ```bash
 # pretrained FlowDock weights
 wget https://zenodo.org/records/15066450/files/flowdock_checkpoints.tar.gz
 tar -xzf flowdock_checkpoints.tar.gz
 rm flowdock_checkpoints.tar.gz
 ```
 Download preprocessed datasets
 ```bash
 # cached input data for training/validation/testing
 wget "https://mailmissouri-my.sharepoint.com/:u:/g/personal/acmwhb_umsystem_edu/ER1hctIBhDVFjM7YepOI6WcBXNBm4_e6EBjFEHAM1A3y5g?download=1"
 tar -xzf flowdock_data_cache.tar.gz
 rm flowdock_data_cache.tar.gz
 # cached data for PDBBind, Binding MOAD, DockGen, and the PDB-based van der Mers (vdM) dataset
 wget https://zenodo.org/records/15066450/files/flowdock_pdbbind_data.tar.gz
 tar -xzf flowdock_pdbbind_data.tar.gz
 rm flowdock_pdbbind_data.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_moad_data.tar.gz
 tar -xzf flowdock_moad_data.tar.gz
 rm flowdock_moad_data.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_dockgen_data.tar.gz
 tar -xzf flowdock_dockgen_data.tar.gz
 rm flowdock_dockgen_data.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_pdbsidechain_data.tar.gz
 tar -xzf flowdock_pdbsidechain_data.tar.gz
 rm flowdock_pdbsidechain_data.tar.gz
 ```
 </details>
 ## How to prepare data for `FlowDock`
 <details>
 **NOTE:** The following steps (besides downloading PDBBind and Binding MOAD's PDB files) are only necessary if one wants to fully process each of the following datasets manually.
 Otherwise, preprocessed versions of each dataset can be found on [Zenodo](https://zenodo.org/records/15066450).
 Download data
 ```bash
 # fetch preprocessed PDBBind and Binding MOAD (as well as the optional DockGen and vdM datasets)
 cd data/
 wget "https://mailmissouri-my.sharepoint.com/:u:/g/personal/acmwhb_umsystem_edu/EXesf4oh6ztOusGqFcDyqP0Bvk-LdJ1DagEl8GNK-HxDtg?download=1"
 wget https://zenodo.org/records/10656052/files/BindingMOAD_2020_processed.tar
 wget https://zenodo.org/records/10656052/files/DockGen.tar
 wget https://files.ipd.uw.edu/pub/training_sets/pdb_2021aug02.tar.gz
 mv EXesf4oh6ztOusGqFcDyqP0Bvk-LdJ1DagEl8GNK-HxDtg?download=1 PDBBind.tar.gz
 tar -xzf PDBBind.tar.gz
 tar -xf BindingMOAD_2020_processed.tar
 tar -xf DockGen.tar
 tar -xzf pdb_2021aug02.tar.gz
 rm PDBBind.tar.gz BindingMOAD_2020_processed.tar DockGen.tar pdb_2021aug02.tar.gz
 mkdir pdbbind/ moad/ pdbsidechain/
 mv PDBBind_processed/ pdbbind/
 mv BindingMOAD_2020_processed/ moad/
 mv pdb_2021aug02/ pdbsidechain/
 cd ../
 ```
 Lastly, to finetune `FlowDock` using the `PLINDER` dataset, one must first prepare this data for training
 ```bash
 # fetch PLINDER data (NOTE: requires ~1 hour to download and ~750G of storage)
 export PLINDER_MOUNT="$(pwd)/data/PLINDER"
 mkdir -p "$PLINDER_MOUNT" # create the directory if it doesn't exist
 plinder_download -y
 ```
 ### Generating ESM2 embeddings for each protein (optional, cached input data available on SharePoint)
 To generate the ESM2 embeddings for the protein inputs,
 first create all the corresponding FASTA files for each protein sequence
 ```bash
 python flowdock/data/components/esm_embedding_preparation.py --dataset pdbbind --data_dir data/pdbbind/PDBBind_processed/ --out_file data/pdbbind/pdbbind_sequences.fasta
 python flowdock/data/components/esm_embedding_preparation.py --dataset moad --data_dir data/moad/BindingMOAD_2020_processed/pdb_protein/ --out_file data/moad/moad_sequences.fasta
 python flowdock/data/components/esm_embedding_preparation.py --dataset dockgen --data_dir data/DockGen/processed_files/ --out_file data/DockGen/dockgen_sequences.fasta
 python flowdock/data/components/esm_embedding_preparation.py --dataset pdbsidechain --data_dir data/pdbsidechain/pdb_2021aug02/pdb/ --out_file data/pdbsidechain/pdbsidechain_sequences.fasta
 ```
 Then, generate all ESM2 embeddings in batch using the ESM repository's helper script
 ```bash
 python flowdock/data/components/esm_embedding_extraction.py esm2_t33_650M_UR50D data/pdbbind/pdbbind_sequences.fasta data/pdbbind/embeddings_output --repr_layers 33 --include per_tok --truncation_seq_length 4096 --cuda_device_index 0
 python flowdock/data/components/esm_embedding_extraction.py esm2_t33_650M_UR50D data/moad/moad_sequences.fasta data/moad/embeddings_output --repr_layers 33 --include per_tok --truncation_seq_length 4096 --cuda_device_index 0
 python flowdock/data/components/esm_embedding_extraction.py esm2_t33_650M_UR50D data/DockGen/dockgen_sequences.fasta data/DockGen/embeddings_output --repr_layers 33 --include per_tok --truncation_seq_length 4096 --cuda_device_index 0
 python flowdock/data/components/esm_embedding_extraction.py esm2_t33_650M_UR50D data/pdbsidechain/pdbsidechain_sequences.fasta data/pdbsidechain/embeddings_output --repr_layers 33 --include per_tok --truncation_seq_length 4096 --cuda_device_index 0
 ```
 ### Predicting apo protein structures using ESMFold (optional, cached data available on Zenodo)
 To generate the apo version of each protein structure,
 first create ESMFold-ready versions of the combined FASTA files
 prepared above by the script `esm_embedding_preparation.py`
 for the PDBBind, Binding MOAD, DockGen, and PDBSidechain datasets, respectively
 ```bash
 python flowdock/data/components/esmfold_sequence_preparation.py dataset=pdbbind
 python flowdock/data/components/esmfold_sequence_preparation.py dataset=moad
 python flowdock/data/components/esmfold_sequence_preparation.py dataset=dockgen
 python flowdock/data/components/esmfold_sequence_preparation.py dataset=pdbsidechain
 ```
 Then, predict each apo protein structure using ESMFold's batch
 inference script
 ```bash
 # Note: Having a CUDA-enabled device available when running this script is highly recommended
 python flowdock/data/components/esmfold_batch_structure_prediction.py -i data/pdbbind/pdbbind_esmfold_sequences.fasta -o data/pdbbind/pdbbind_esmfold_structures --cuda-device-index 0 --skip-existing
 python flowdock/data/components/esmfold_batch_structure_prediction.py -i data/moad/moad_esmfold_sequences.fasta -o data/moad/moad_esmfold_structures --cuda-device-index 0 --skip-existing
 python flowdock/data/components/esmfold_batch_structure_prediction.py -i data/DockGen/dockgen_esmfold_sequences.fasta -o data/DockGen/dockgen_esmfold_structures --cuda-device-index 0 --skip-existing
 python flowdock/data/components/esmfold_batch_structure_prediction.py -i data/pdbsidechain/pdbsidechain_esmfold_sequences.fasta -o data/pdbsidechain/pdbsidechain_esmfold_structures --cuda-device-index 0 --skip-existing
 ```
 Align each apo protein structure to its corresponding
 holo protein structure counterpart in PDBBind, Binding MOAD, and PDBSidechain,
 taking ligand conformations into account during each alignment
 ```bash
 python flowdock/data/components/esmfold_apo_to_holo_alignment.py dataset=pdbbind num_workers=1
 python flowdock/data/components/esmfold_apo_to_holo_alignment.py dataset=moad num_workers=1
 python flowdock/data/components/esmfold_apo_to_holo_alignment.py dataset=dockgen num_workers=1
 python flowdock/data/components/esmfold_apo_to_holo_alignment.py dataset=pdbsidechain num_workers=1
 ```
 Lastly, assess the apo-to-holo alignments in terms of statistics and structural metrics
 to enable runtime-dynamic dataset filtering using such information
 ```bash
 python flowdock/data/components/esmfold_apo_to_holo_assessment.py dataset=pdbbind usalign_exec_path=$MY_USALIGN_EXEC_PATH
 python flowdock/data/components/esmfold_apo_to_holo_assessment.py dataset=moad usalign_exec_path=$MY_USALIGN_EXEC_PATH
 python flowdock/data/components/esmfold_apo_to_holo_assessment.py dataset=dockgen usalign_exec_path=$MY_USALIGN_EXEC_PATH
 python flowdock/data/components/esmfold_apo_to_holo_assessment.py dataset=pdbsidechain usalign_exec_path=$MY_USALIGN_EXEC_PATH
 ```
 </details>
 ## How to train `FlowDock`
 <details>
 Train model with default configuration
 ```bash
 # train on CPU
 python flowdock/train.py trainer=cpu
 # train on GPU
 python flowdock/train.py trainer=gpu
 ```
 Train model with chosen experiment configuration from [configs/experiment/](configs/experiment/)
 ```bash
 python flowdock/train.py experiment=experiment_name.yaml
 ```
 For example, reproduce `FlowDock`'s default model training run
 ```bash
 python flowdock/train.py experiment=flowdock_fm
 ```
 **Note:** You can override any parameter from command line like this
 ```bash
 python flowdock/train.py experiment=flowdock_fm trainer.max_epochs=20 data.batch_size=8
 ```
 For example, override parameters to finetune `FlowDock`'s pretrained weights using a new dataset such as [PLINDER](https://www.plinder.sh/)
 ```bash
 python flowdock/train.py experiment=flowdock_fm data=plinder ckpt_path=checkpoints/esmfold_prior_paper_weights.ckpt
 ```
 </details>
 ## How to evaluate `FlowDock`
 <details>
 To reproduce `FlowDock`'s evaluation results for structure prediction, please refer to its documentation in version `0.6.0-FlowDock` of the [PoseBench](https://github.com/BioinfoMachineLearning/PoseBench/tree/0.6.0-FlowDock?tab=readme-ov-file#how-to-run-inference-with-flowdock) GitHub repository.
 To reproduce `FlowDock`'s evaluation results for binding affinity prediction using the PDBBind dataset
 ```bash
 python flowdock/eval.py data.test_datasets=[pdbbind] ckpt_path=checkpoints/esmfold_prior_paper_weights-EMA.ckpt trainer=gpu
 ... # re-run two more times to gather triplicate results
 ```
 </details>
 ## How to create comparative plots of benchmarking results
 <details>
 Download baseline method predictions and results
 ```bash
 # cached predictions and evaluation metrics for reproducing structure prediction paper results
 wget https://zenodo.org/records/15066450/files/alphafold3_baseline_method_predictions.tar.gz
 tar -xzf alphafold3_baseline_method_predictions.tar.gz
 rm alphafold3_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/chai_baseline_method_predictions.tar.gz
 tar -xzf chai_baseline_method_predictions.tar.gz
 rm chai_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/diffdock_baseline_method_predictions.tar.gz
 tar -xzf diffdock_baseline_method_predictions.tar.gz
 rm diffdock_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/dynamicbind_baseline_method_predictions.tar.gz
 tar -xzf dynamicbind_baseline_method_predictions.tar.gz
 rm dynamicbind_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_baseline_method_predictions.tar.gz
 tar -xzf flowdock_baseline_method_predictions.tar.gz
 rm flowdock_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_aft_baseline_method_predictions.tar.gz
 tar -xzf flowdock_aft_baseline_method_predictions.tar.gz
 rm flowdock_aft_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_pft_baseline_method_predictions.tar.gz
 tar -xzf flowdock_pft_baseline_method_predictions.tar.gz
 rm flowdock_pft_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_esmfold_baseline_method_predictions.tar.gz
 tar -xzf flowdock_esmfold_baseline_method_predictions.tar.gz
 rm flowdock_esmfold_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_chai_baseline_method_predictions.tar.gz
 tar -xzf flowdock_chai_baseline_method_predictions.tar.gz
 rm flowdock_chai_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/flowdock_hp_baseline_method_predictions.tar.gz
 tar -xzf flowdock_hp_baseline_method_predictions.tar.gz
 rm flowdock_hp_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/neuralplexer_baseline_method_predictions.tar.gz
 tar -xzf neuralplexer_baseline_method_predictions.tar.gz
 rm neuralplexer_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/vina_p2rank_baseline_method_predictions.tar.gz
 tar -xzf vina_p2rank_baseline_method_predictions.tar.gz
 rm vina_p2rank_baseline_method_predictions.tar.gz
 wget https://zenodo.org/records/15066450/files/rfaa_baseline_method_predictions.tar.gz
 tar -xzf rfaa_baseline_method_predictions.tar.gz
 rm rfaa_baseline_method_predictions.tar.gz
 ```
 Reproduce paper result figures
 ```bash
 jupyter notebook notebooks/casp16_binding_affinity_prediction_results_plotting.ipynb
 jupyter notebook notebooks/casp16_flowdock_vs_multicom_ligand_structure_prediction_results_plotting.ipynb
 jupyter notebook notebooks/dockgen_structure_prediction_results_plotting.ipynb
 jupyter notebook notebooks/posebusters_benchmark_structure_prediction_chemical_similarity_analysis.ipynb
 jupyter notebook notebooks/posebusters_benchmark_structure_prediction_results_plotting.ipynb
 ```
 </details>
 ## How to predict new protein-ligand complex structures and their affinities using `FlowDock`
 <details>
 For example, generate new protein-ligand complexes for a pair of protein sequence and ligand SMILES strings such as those of the PDBBind 2020 test target `6i67`
 ```bash
 python flowdock/sample.py ckpt_path=checkpoints/esmfold_prior_paper_weights-EMA.ckpt model.cfg.prior_type=esmfold sampling_task=batched_structure_sampling input_receptor='YNKIVHLLVAEPEKIYAMPDPTVPDSDIKALTTLCDLADRELVVIIGWAKHIPGFSTLSLADQMSLLQSAWMEILILGVVYRSLFEDELVYADDYIMDEDQSKLAGLLDLNNAILQLVKKYKSMKLEKEEFVTLKAIALANSDSMHIEDVEAVQKLQDVLHEALQDYEAGQHMEDPRRAGKMLMTLPLLRQTSTKAVQHFYNKLEGKVPMHKLFLEMLEAKV' input_ligand='"c1cc2c(cc1O)CCCC2"' input_template=data/pdbbind/pdbbind_holo_aligned_esmfold_structures/6i67_holo_aligned_esmfold_protein.pdb sample_id='6i67' out_path='./6i67_sampled_structures/' n_samples=5 chunk_size=5 num_steps=40 sampler=VDODE sampler_eta=1.0 start_time='1.0' use_template=true separate_pdb=true visualize_sample_trajectories=true auxiliary_estimation_only=false esmfold_chunk_size=null trainer=gpu
 ```
 Or, for example, generate new protein-ligand complexes for pairs of protein sequences and (multi-)ligand SMILES strings (delimited via `|`) such as those of the CASP15 target `T1152`
 ```bash
 python flowdock/sample.py ckpt_path=checkpoints/esmfold_prior_paper_weights-EMA.ckpt model.cfg.prior_type=esmfold sampling_task=batched_structure_sampling input_receptor='MYTVKPGDTMWKIAVKYQIGISEIIAANPQIKNPNLIYPGQKINIP|MYTVKPGDTMWKIAVKYQIGISEIIAANPQIKNPNLIYPGQKINIP|MYTVKPGDTMWKIAVKYQIGISEIIAANPQIKNPNLIYPGQKINIPN' input_ligand='"CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(OC3OC(CO)C(O)C(O)C3NC(C)=O)C(O)C2NC(C)=O)C1O"' input_template=data/test_cases/predicted_structures/T1152.pdb sample_id='T1152' out_path='./T1152_sampled_structures/' n_samples=5 chunk_size=5 num_steps=40 sampler=VDODE sampler_eta=1.0 start_time='1.0' use_template=true separate_pdb=true visualize_sample_trajectories=true auxiliary_estimation_only=false esmfold_chunk_size=null trainer=gpu
 ```
 If you do not already have a template protein structure available for your target of interest, set `input_template=null` to instead have the sampling script predict the ESMFold structure of your provided `input_protein` sequence before running the sampling pipeline. For more information regarding the input arguments available for sampling, please refer to the config at `configs/sample.yaml`.
 **NOTE:** To optimize prediction runtimes, a `csv_path` can be specified instead of the `input_receptor`, `input_ligand`, and `input_template` CLI arguments to perform *batched* prediction for a collection of protein-ligand sequence pairs, each represented as a CSV row containing column values for `id`, `input_receptor`, `input_ligand`, and `input_template`. Additionally, disabling `visualize_sample_trajectories` may reduce storage requirements when predicting a large batch of inputs.
 For instance, one can perform batched prediction as follows:
 ```bash
 python flowdock/sample.py ckpt_path=checkpoints/esmfold_prior_paper_weights-EMA.ckpt model.cfg.prior_type=esmfold sampling_task=batched_structure_sampling csv_path='./data/test_cases/prediction_inputs/flowdock_batched_inputs.csv' out_path='./T1152_batch_sampled_structures/' n_samples=5 chunk_size=5 num_steps=40 sampler=VDODE sampler_eta=1.0 start_time='1.0' use_template=true separate_pdb=true visualize_sample_trajectories=false auxiliary_estimation_only=false esmfold_chunk_size=null trainer=gpu
 ```
 </details>
 ## For developers
 <details>
 Set up `pre-commit` (one time only) for automatic code linting and formatting upon each `git commit`
 ```bash
 pre-commit install
 ```
 Manually reformat all files in the project, as desired
 ```bash
 pre-commit run -a
 ```
 Update dependencies in a `*_environment.yml` file
 ```bash
 mamba env export > env.yaml # e.g., run this after installing new dependencies locally
 diff environments/flowdock_environment.yaml env.yaml # note the differences and copy accepted changes back into e.g., `environments/flowdock_environment.yaml`
 rm env.yaml # clean up temporary environment file
 ```
 </details>
 ## Docker
 <details>
 Given that this tool has a number of dependencies, it may be easier to run it in a Docker container.
 Pull from [Docker Hub](https://hub.docker.com/repository/docker/cford38/flowdock): `docker pull cford38/flowdock:latest`
 Alternatively, build the Docker image locally:
 ```bash
 docker build --platform linux/amd64 -t flowdock .
 ```
 Then, run the Docker container (and mount your local `checkpoints/` directory)
 ```bash
 docker run --gpus all -v ./checkpoints:/software/flowdock/checkpoints --rm --name flowdock -it flowdock /bin/bash
 # docker run --gpus all -v ./checkpoints:/software/flowdock/checkpoints --rm --name flowdock -it cford38/flowdock:latest /bin/bash
 ```
 </details>
 ## Acknowledgements
 `FlowDock` builds upon the source code and data from the following projects:
 - [DiffDock](https://github.com/gcorso/DiffDock)
 - [lightning-hydra-template](https://github.com/ashleve/lightning-hydra-template)
 - [NeuralPLexer](https://github.com/zrqiao/NeuralPLexer)
 We thank all their contributors and maintainers!
 ## License
 This project is covered under the **MIT License**.
 ## Citing this work
 If you use the code or data associated with this package or otherwise find this work useful, please cite:
 ```bibtex
@inproceedings{morehead2025flowdock,
    title={FlowDock: Geometric Flow Matching for Generative Protein-Ligand Docking and Affinity Prediction}, 
    author={Alex Morehead and Jianlin Cheng},
    booktitle={Intelligent Systems for Molecular Biology (ISMB)},
    year=2025,
 }
 ```
--- a/citation.bib
+++ b/citation.bib
@@ -0,0 +1,6 @@
@inproceedings{morehead2025flowdock,
    title={FlowDock: Geometric Flow Matching for Generative Protein-Ligand Docking and Affinity Prediction}, 
    author={Alex Morehead and Jianlin Cheng},
    booktitle={Intelligent Systems for Molecular Biology (ISMB)},
    year=2025,
 }
--- a/configs/init.py
+++ b/configs/init.py
@@ -0,0 +1 @@
 # this file is needed here to include configs when building project as a package
--- a/configs/callbacks/default.yaml
+++ b/configs/callbacks/default.yaml
@@ -0,0 +1,21 @@
 defaults:
  - ema
  - last_model_checkpoint
  - learning_rate_monitor
  - model_checkpoint
  - model_summary
  - rich_progress_bar
  - _self_
 last_model_checkpoint:
  dirpath: ${paths.output_dir}/checkpoints
  filename: "last"
  monitor: null
  verbose: True
  auto_insert_metric_name: False
  every_n_epochs: 1
  save_on_train_epoch_end: True
  enable_version_counter: False
 model_summary:
  max_depth: -1
--- a/configs/callbacks/early_stopping.yaml
+++ b/configs/callbacks/early_stopping.yaml
@@ -0,0 +1,15 @@
 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html
 early_stopping:
  _target_: lightning.pytorch.callbacks.EarlyStopping
  monitor: ??? # quantity to be monitored, must be specified !!!
  min_delta: 0. # minimum change in the monitored quantity to qualify as an improvement
  patience: 3 # number of checks with no improvement after which training will be stopped
  verbose: False # verbosity mode
  mode: "min" # "max" means higher metric value is better, can be also "min"
  strict: True # whether to crash the training if monitor is not found in the validation metrics
  check_finite: True # when set True, stops training when the monitor becomes NaN or infinite
  stopping_threshold: null # stop training immediately once the monitored quantity reaches this threshold
  divergence_threshold: null # stop training as soon as the monitored quantity becomes worse than this threshold
  check_on_train_epoch_end: null # whether to run early stopping at the end of the training epoch
  # log_rank_zero_only: False  # this keyword argument isn't available in stable version
--- a/configs/callbacks/ema.yaml
+++ b/configs/callbacks/ema.yaml
@@ -0,0 +1,10 @@
 # https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/common/callbacks/ema.py
 # Maintains an exponential moving average (EMA) of model weights.
 # Look at the above link for more detailed information regarding the original implementation.
 ema:
  _target_: flowdock.models.components.callbacks.ema.EMA
  decay: 0.999
  validate_original_weights: false
  every_n_steps: 4
  cpu_offload: false
--- a/configs/callbacks/last_model_checkpoint.yaml
+++ b/configs/callbacks/last_model_checkpoint.yaml
@@ -0,0 +1,21 @@
 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
 last_model_checkpoint:
  # NOTE: this is a direct copy of `model_checkpoint`,
  # which is necessary to make to work around the
  # key-duplication limitations of YAML config files
  _target_: flowdock.models.components.callbacks.ema.EMAModelCheckpoint
  dirpath: null # directory to save the model file
  filename: null # checkpoint filename
  monitor: null # name of the logged metric which determines when model is improving
  verbose: False # verbosity mode
  save_last: null # additionally always save an exact copy of the last checkpoint to a file last.ckpt
  save_top_k: 1 # save k best models (determined by above metric)
  mode: "min" # "max" means higher metric value is better, can be also "min"
  auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
  save_weights_only: False # if True, then only the model’s weights will be saved
  every_n_train_steps: null # number of training steps between checkpoints
  train_time_interval: null # checkpoints are monitored at the specified time interval
  every_n_epochs: null # number of epochs between checkpoints
  save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
  enable_version_counter: True # enables versioning for checkpoint names
--- a/configs/callbacks/learning_rate_monitor.yaml
+++ b/configs/callbacks/learning_rate_monitor.yaml
@@ -0,0 +1,7 @@
 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html
 learning_rate_monitor:
  _target_: lightning.pytorch.callbacks.LearningRateMonitor
  logging_interval: null # set to `epoch` or `step` to log learning rate of all optimizers at the same interval, or set to `null` to log at individual interval according to the interval key of each scheduler
  log_momentum: false # whether to also log the momentum values of the optimizer, if the optimizer has the `momentum` or `betas` attribute
  log_weight_decay: false # whether to also log the weight decay values of the optimizer, if the optimizer has the `weight_decay` attribute
--- a/configs/callbacks/model_checkpoint.yaml
+++ b/configs/callbacks/model_checkpoint.yaml
@@ -0,0 +1,18 @@
 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
 model_checkpoint:
  _target_: flowdock.models.components.callbacks.ema.EMAModelCheckpoint
  dirpath: null # directory to save the model file
  filename: "best" # checkpoint filename
  monitor: val_sampling/ligand_hit_score_2A_epoch # name of the logged metric which determines when model is improving
  verbose: True # verbosity mode
  save_last: False # additionally always save an exact copy of the last checkpoint to a file last.ckpt
  save_top_k: 1 # save k best models (determined by above metric)
  mode: "max" # "max" means higher metric value is better, can be also "min"
  auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
  save_weights_only: False # if True, then only the model’s weights will be saved
  every_n_train_steps: null # number of training steps between checkpoints
  train_time_interval: null # checkpoints are monitored at the specified time interval
  every_n_epochs: null # number of epochs between checkpoints
  save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
  enable_version_counter: False # enables versioning for checkpoint names
--- a/configs/callbacks/model_summary.yaml
+++ b/configs/callbacks/model_summary.yaml
@@ -0,0 +1,5 @@
 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html
 model_summary:
  _target_: lightning.pytorch.callbacks.RichModelSummary
  max_depth: 1 # the maximum depth of layer nesting that the summary will include
--- a/configs/callbacks/none.yaml
+++ b/configs/callbacks/none.yaml
--- a/configs/callbacks/rich_progress_bar.yaml
+++ b/configs/callbacks/rich_progress_bar.yaml
@@ -0,0 +1,4 @@
 # https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html
 rich_progress_bar:
  _target_: lightning.pytorch.callbacks.RichProgressBar
--- a/configs/debug/default.yaml
+++ b/configs/debug/default.yaml
@@ -0,0 +1,35 @@
 # @package _global_
 # default debugging setup, runs 1 full epoch
 # other debugging configs can inherit from this one
 # overwrite task name so debugging logs are stored in separate folder
 task_name: "debug"
 # disable callbacks and loggers during debugging
 callbacks: null
 logger: null
 extras:
  ignore_warnings: False
  enforce_tags: False
 # sets level of all command line loggers to 'DEBUG'
 # https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
 hydra:
  job_logging:
    root:
      level: DEBUG
  # use this to also set hydra loggers to 'DEBUG'
  # verbose: True
 trainer:
  max_epochs: 1
  accelerator: cpu # debuggers don't like gpus
  devices: 1 # debuggers don't like multiprocessing
  detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
 data:
  num_workers: 0 # debuggers don't like multiprocessing
  pin_memory: False # disable gpu memory pin
--- a/configs/debug/fdr.yaml
+++ b/configs/debug/fdr.yaml
@@ -0,0 +1,9 @@
 # @package _global_
 # runs 1 train, 1 validation and 1 test step
 defaults:
  - default
 trainer:
  fast_dev_run: true
--- a/configs/debug/limit.yaml
+++ b/configs/debug/limit.yaml
@@ -0,0 +1,12 @@
 # @package _global_
 # uses only 1% of the training data and 5% of validation/test data
 defaults:
  - default
 trainer:
  max_epochs: 3
  limit_train_batches: 0.01
  limit_val_batches: 0.05
  limit_test_batches: 0.05
--- a/configs/debug/overfit.yaml
+++ b/configs/debug/overfit.yaml
@@ -0,0 +1,13 @@
 # @package _global_
 # overfits to 3 batches
 defaults:
  - default
 trainer:
  max_epochs: 20
  overfit_batches: 3
 # model ckpt and early stopping need to be disabled during overfitting
 callbacks: null
--- a/configs/debug/profiler.yaml
+++ b/configs/debug/profiler.yaml
@@ -0,0 +1,12 @@
 # @package _global_
 # runs with execution time profiling
 defaults:
  - default
 trainer:
  max_epochs: 1
  profiler: "simple"
  # profiler: "advanced"
  # profiler: "pytorch"
--- a/configs/environment/default.yaml
+++ b/configs/environment/default.yaml
@@ -0,0 +1,2 @@
 defaults:
  - _self_
--- a/configs/environment/lightning.yaml
+++ b/configs/environment/lightning.yaml
@@ -0,0 +1 @@
 _target_: lightning.fabric.plugins.environments.LightningEnvironment
--- a/configs/environment/slurm.yaml
+++ b/configs/environment/slurm.yaml
@@ -0,0 +1,3 @@
 _target_: lightning.fabric.plugins.environments.SLURMEnvironment
 auto_requeue: true
 requeue_signal: null
--- a/configs/eval.yaml
+++ b/configs/eval.yaml
@@ -0,0 +1,49 @@
 # @package _global_
 defaults:
  - data: combined # choose datamodule with `test_dataloader()` for evaluation
  - model: flowdock_fm
  - logger: null
  - strategy: default
  - trainer: default
  - paths: default
  - extras: default
  - hydra: default
  - environment: default
  - _self_
 task_name: "eval"
 tags: ["eval", "combined", "flowdock_fm"]
 # passing checkpoint path is necessary for evaluation
 ckpt_path: ???
 # seed for random number generators in pytorch, numpy and python.random
 seed: null
 # model arguments
 model:
  cfg:
    mol_encoder:
      from_pretrained: false
    protein_encoder:
      from_pretrained: false
    relational_reasoning:
      from_pretrained: false
    contact_predictor:
      from_pretrained: false
    score_head:
      from_pretrained: false
    confidence:
      from_pretrained: false
    affinity:
      from_pretrained: false
    task:
      freeze_mol_encoder: true
      freeze_protein_encoder: false
      freeze_relational_reasoning: false
      freeze_contact_predictor: false
      freeze_score_head: false
      freeze_confidence: true
      freeze_affinity: false
--- a/configs/experiment/flowdock_fm.yaml
+++ b/configs/experiment/flowdock_fm.yaml
@@ -0,0 +1,35 @@
 # @package _global_
 # to execute this experiment run:
 # python train.py experiment=flowdock_fm
 defaults:
  - override /data: combined
  - override /model: flowdock_fm
  - override /callbacks: default
  - override /trainer: default
 # all parameters below will be merged with parameters from default configurations set above
 # this allows you to overwrite only specified parameters
 tags: ["flowdock_fm", "combined_dataset"]
 seed: 496
 trainer:
  max_epochs: 300
  check_val_every_n_epoch: 5 # NOTE: we increase this since validation steps involve full model sampling and evaluation
  reload_dataloaders_every_n_epochs: 1
 model:
  optimizer:
    lr: 2e-4
  compile: false
 data:
  batch_size: 16
 logger:
  wandb:
    tags: ${tags}
    group: "FlowDock-FM"
--- a/configs/extras/default.yaml
+++ b/configs/extras/default.yaml
@@ -0,0 +1,8 @@
 # disable python warnings if they annoy you
 ignore_warnings: False
 # ask user for tags if none are provided in the config
 enforce_tags: True
 # pretty print config tree at the start of the run using Rich library
 print_config: True
--- a/configs/hparams_search/combined_optuna.yaml
+++ b/configs/hparams_search/combined_optuna.yaml
@@ -0,0 +1,50 @@
 # @package _global_
 # example hyperparameter optimization of some experiment with Optuna:
 # python train.py -m hparams_search=mnist_optuna experiment=example
 defaults:
  - override /hydra/sweeper: optuna
 # choose metric which will be optimized by Optuna
 # make sure this is the correct name of some metric logged in lightning module!
 optimized_metric: "val/loss"
 # here we define Optuna hyperparameter search
 # it optimizes for value returned from function with @hydra.main decorator
 # docs: https://hydra.cc/docs/next/plugins/optuna_sweeper
 hydra:
  mode: "MULTIRUN" # set hydra to multirun by default if this config is attached
  sweeper:
    _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
    # storage URL to persist optimization results
    # for example, you can use SQLite if you set 'sqlite:///example.db'
    storage: null
    # name of the study to persist optimization results
    study_name: null
    # number of parallel workers
    n_jobs: 1
    # 'minimize' or 'maximize' the objective
    direction: minimize
    # total number of runs that will be executed
    n_trials: 20
    # choose Optuna hyperparameter sampler
    # you can choose bayesian sampler (tpe), random search (without optimization), grid sampler, and others
    # docs: https://optuna.readthedocs.io/en/stable/reference/samplers.html
    sampler:
      _target_: optuna.samplers.TPESampler
      seed: 1234
      n_startup_trials: 10 # number of random sampling runs before optimization starts
    # define hyperparameter search space
    params:
      model.optimizer.lr: interval(0.0001, 0.1)
      data.batch_size: choice(2, 4, 8, 16)
      model.net.hidden_dim: choice(64, 128, 256)
--- a/configs/hydra/default.yaml
+++ b/configs/hydra/default.yaml
@@ -0,0 +1,19 @@
 # https://hydra.cc/docs/configure_hydra/intro/
 # enable color logging
 defaults:
  - override hydra_logging: colorlog
  - override job_logging: colorlog
 # output directory, generated dynamically on each run
 run:
  dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
 sweep:
  dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
  subdir: ${hydra.job.num}
 job_logging:
  handlers:
    file:
      # Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
      filename: ${hydra.runtime.output_dir}/${task_name}.log
--- a/configs/local/.gitkeep
+++ b/configs/local/.gitkeep
--- a/configs/logger/aim.yaml
+++ b/configs/logger/aim.yaml
@@ -0,0 +1,28 @@
 # https://aimstack.io/
 # example usage in lightning module:
 # https://github.com/aimhubio/aim/blob/main/examples/pytorch_lightning_track.py
 # open the Aim UI with the following command (run in the folder containing the `.aim` folder):
 # `aim up`
 aim:
  _target_: aim.pytorch_lightning.AimLogger
  repo: ${paths.root_dir} # .aim folder will be created here
  # repo: "aim://ip_address:port" # can instead provide IP address pointing to Aim remote tracking server which manages the repo, see https://aimstack.readthedocs.io/en/latest/using/remote_tracking.html#
  # aim allows to group runs under experiment name
  experiment: null # any string, set to "default" if not specified
  train_metric_prefix: "train/"
  val_metric_prefix: "val/"
  test_metric_prefix: "test/"
  # sets the tracking interval in seconds for system usage metrics (CPU, GPU, memory, etc.)
  system_tracking_interval: 10 # set to null to disable system metrics tracking
  # enable/disable logging of system params such as installed packages, git info, env vars, etc.
  log_system_params: true
  # enable/disable tracking console logs (default value is true)
  capture_terminal_logs: false # set to false to avoid infinite console log loop issue https://github.com/aimhubio/aim/issues/2550
--- a/configs/logger/comet.yaml
+++ b/configs/logger/comet.yaml
@@ -0,0 +1,12 @@
 # https://www.comet.ml
 comet:
  _target_: lightning.pytorch.loggers.comet.CometLogger
  api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
  save_dir: "${paths.output_dir}"
  project_name: "FlowDock_FM"
  rest_api_key: null
  # experiment_name: ""
  experiment_key: null # set to resume experiment
  offline: False
  prefix: ""
--- a/configs/logger/csv.yaml
+++ b/configs/logger/csv.yaml
@@ -0,0 +1,7 @@
 # csv logger built in lightning
 csv:
  _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
  save_dir: "${paths.output_dir}"
  name: "csv/"
  prefix: ""
--- a/configs/logger/many_loggers.yaml
+++ b/configs/logger/many_loggers.yaml
@@ -0,0 +1,9 @@
 # train with many loggers at once
 defaults:
  # - comet
  - csv
  # - mlflow
  # - neptune
  - tensorboard
  - wandb
--- a/configs/logger/mlflow.yaml
+++ b/configs/logger/mlflow.yaml
@@ -0,0 +1,12 @@
 # https://mlflow.org
 mlflow:
  _target_: lightning.pytorch.loggers.mlflow.MLFlowLogger
  # experiment_name: ""
  # run_name: ""
  tracking_uri: ${paths.log_dir}/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI
  tags: null
  # save_dir: "./mlruns"
  prefix: ""
  artifact_location: null
  # run_id: ""
--- a/configs/logger/neptune.yaml
+++ b/configs/logger/neptune.yaml
@@ -0,0 +1,9 @@
 # https://neptune.ai
 neptune:
  _target_: lightning.pytorch.loggers.neptune.NeptuneLogger
  api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
  project: username/FlowDock_FM
  # name: ""
  log_model_checkpoints: True
  prefix: ""
--- a/configs/logger/tensorboard.yaml
+++ b/configs/logger/tensorboard.yaml
@@ -0,0 +1,10 @@
 # https://www.tensorflow.org/tensorboard/
 tensorboard:
  _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
  save_dir: "${paths.output_dir}/tensorboard/"
  name: null
  log_graph: False
  default_hp_metric: True
  prefix: ""
  # version: ""
--- a/configs/logger/wandb.yaml
+++ b/configs/logger/wandb.yaml
@@ -0,0 +1,16 @@
 # https://wandb.ai
 wandb:
  _target_: lightning.pytorch.loggers.wandb.WandbLogger
  # name: "" # name of the run (normally generated by wandb)
  save_dir: "${paths.output_dir}"
  offline: False
  id: null # pass correct id to resume experiment!
  anonymous: null # enable anonymous logging
  project: "FlowDock_FM"
  log_model: False # upload lightning ckpts
  prefix: "" # a string to put at the beginning of metric keys
  entity: "bml-lab" # set to name of your wandb team
  group: ""
  tags: []
  job_type: ""
--- a/configs/model/flowdock_fm.yaml
+++ b/configs/model/flowdock_fm.yaml
@@ -0,0 +1,148 @@
 _target_: flowdock.models.flowdock_fm_module.FlowDockFMLitModule
 net:
  _target_: flowdock.models.components.flowdock.FlowDock
  _partial_: true
 optimizer:
  _target_: torch.optim.Adam
  _partial_: true
  lr: 2e-4
  weight_decay: 0.0
 scheduler:
  _target_: torch.optim.lr_scheduler.CosineAnnealingWarmRestarts
  _partial_: true
  T_0: ${int_divide:${trainer.max_epochs},15}
  T_mult: 2
  eta_min: 1e-8
  verbose: true
 # compile model for faster training with pytorch 2.0
 compile: false
 # model arguments
 cfg:
  mol_encoder:
    node_channels: 512
    pair_channels: 64
    n_atom_encodings: 23
    n_bond_encodings: 4
    n_atom_pos_encodings: 6
    n_stereo_encodings: 14
    n_attention_heads: 8
    attention_head_dim: 8
    hidden_dim: 2048
    max_path_integral_length: 6
    n_transformer_stacks: 8
    n_heads: 8
    n_patches: ${data.n_lig_patches}
    checkpoint_file: ${oc.env:PROJECT_ROOT}/checkpoints/neuralplexermodels_downstream_datasets_predictions/models/complex_structure_prediction.ckpt
    megamolbart: null
    from_pretrained: true
  protein_encoder:
    use_esm_embedding: true
    esm_version: esm2_t33_650M_UR50D
    esm_repr_layer: 33
    residue_dim: 512
    plm_embed_dim: 1280
    n_aa_types: 21
    atom_padding_dim: 37
    n_atom_types: 4 # [C, N, O, S]
    n_patches: ${data.n_protein_patches}
    n_attention_heads: 8
    scalar_dim: 16
    point_dim: 4
    pair_dim: 64
    n_heads: 8
    head_dim: 8
    max_residue_degree: 32
    n_encoder_stacks: 2
    from_pretrained: true
  relational_reasoning:
    from_pretrained: true
  contact_predictor:
    n_stacks: 4
    dropout: 0.01
    from_pretrained: true
  score_head:
    fiber_dim: 64
    hidden_dim: 512
    n_stacks: 4
    max_atom_degree: 8
    from_pretrained: true
  confidence:
    enabled: true # whether the confidence prediction head is to be used e.g., during inference
    fiber_dim: ${..score_head.fiber_dim}
    hidden_dim: ${..score_head.hidden_dim}
    n_stacks: ${..score_head.n_stacks}
    from_pretrained: true
  affinity:
    enabled: true # whether the affinity prediction head is to be used e.g., during inference
    fiber_dim: ${..score_head.fiber_dim}
    hidden_dim: ${..score_head.hidden_dim}
    n_stacks: ${..score_head.n_stacks}
    ligand_pooling: sum # NOTE: must be a value in (`sum`, `mean`)
    dropout: 0.01
    from_pretrained: false
  latent_model: default
  prior_type: esmfold # NOTE: must be a value in (`gaussian`, `harmonic`, `esmfold`)
  task:
    pretrained: null
    ligands: true
    epoch_frac: ${data.epoch_frac}
    label_name: null
    sequence_crop_size: 1600
    edge_crop_size: ${data.edge_crop_size} # NOTE: for dynamic batching via `max_n_edges`
    max_masking_rate: 0.0
    n_modes: 8
    dropout: 0.01
    # pretraining: true
    freeze_mol_encoder: true
    freeze_protein_encoder: false
    freeze_relational_reasoning: false
    freeze_contact_predictor: true
    freeze_score_head: false
    freeze_confidence: true
    freeze_affinity: false
    use_template: true
    use_plddt: false
    block_contact_decoding_scheme: "beam"
    frozen_ligand_backbone: false
    frozen_protein_backbone: false
    single_protein_batch: true
    contact_loss_weight: 0.2
    global_score_loss_weight: 0.2
    ligand_score_loss_weight: 0.1
    clash_loss_weight: 10.0
    local_distgeom_loss_weight: 10.0
    drmsd_loss_weight: 2.0
    distogram_loss_weight: 0.05
    plddt_loss_weight: 1.0
    affinity_loss_weight: 0.1
    aux_batch_freq: 10 # NOTE: e.g., `10` means that auxiliary estimation losses will be calculated every 10th batch
    global_max_sigma: 5.0
    internal_max_sigma: 2.0
    detect_covalent: true
    # runtime configs
    float32_matmul_precision: highest
    # sampling configs
    constrained_inpainting: false
    visualize_generated_samples: true
    # testing configs
    loss_mode: auxiliary_estimation # NOTE: must be one of (`structure_prediction`, `auxiliary_estimation`, `auxiliary_estimation_without_structure_prediction`)
    num_steps: 20
    sampler: VDODE # NOTE: must be one of (`ODE`, `VDODE`)
    sampler_eta: 1.0 # NOTE: this corresponds to the variance diminishing factor for the `VDODE` sampler, which offers a trade-off between exploration (1.0) and exploitation (> 1.0)
    start_time: 1.0
    eval_structure_prediction: false # whether to evaluate structure prediction performance (`true`) or instead only binding affinity performance (`false`) when running a test epoch
    # overfitting configs
    overfitting_example_name: ${data.overfitting_example_name}
--- a/configs/paths/default.yaml
+++ b/configs/paths/default.yaml
@@ -0,0 +1,21 @@
 # path to root directory
 # this requires PROJECT_ROOT environment variable to exist
 # you can replace it with "." if you want the root to be the current working directory
 root_dir: ${oc.env:PROJECT_ROOT}
 # path to data directory
 data_dir: ${paths.root_dir}/data/
 # path to logging directory
 log_dir: ${paths.root_dir}/logs/
 # path to output directory, created dynamically by hydra
 # path generation pattern is specified in `configs/hydra/default.yaml`
 # use it to store all files generated during the run, like ckpts and metrics
 output_dir: ${hydra:runtime.output_dir}
 # path to working directory
 work_dir: ${hydra:runtime.cwd}
 # path to the directory containing the model checkpoints
 ckpt_dir: ${paths.root_dir}/checkpoints/
--- a/configs/sample.yaml
+++ b/configs/sample.yaml
@@ -0,0 +1,78 @@
 # @package _global_
 defaults:
  - data: combined # NOTE: this will not be referenced during sampling
  - model: flowdock_fm
  - logger: null
  - strategy: default
  - trainer: default
  - paths: default
  - extras: default
  - hydra: default
  - environment: default
  - _self_
 task_name: "sample"
 tags: ["sample", "combined", "flowdock_fm"]
 # passing checkpoint path is necessary for sampling
 ckpt_path: ???
 # seed for random number generators in pytorch, numpy and python.random
 seed: null
 # sampling arguments
 sampling_task: batched_structure_sampling # NOTE: must be one of (`batched_structure_sampling`)
 sample_id: null # optional identifier for the sampling run
 input_receptor: null # NOTE: must be either a protein sequence string (with chains separated by `|`) or a path to a PDB file (from which protein chain sequences will be parsed)
 input_ligand: null # NOTE: must be either a ligand SMILES string (with chains/fragments separated by `|`) or a path to a ligand SDF file (from which ligand SMILES will be parsed)
 input_template: null # path to a protein PDB file to use as a starting protein template for sampling (with an ESMFold prior model)
 out_path: ??? # path to which to save the output PDB and SDF files
 n_samples: 5 # number of structures to sample
 chunk_size: 5 # number of structures to concurrently sample within each batch segment - NOTE: `n_samples` should be evenly divisible by `chunk_size` to produce the expected number of outputs
 num_steps: 40 # number of sampling steps to perform
 latent_model: null # if provided, the type of latent model to use
 sampler: VDODE # sampling algorithm to use - NOTE: must be one of (`ODE`, `VDODE`)
 sampler_eta: 1.0 # the variance diminishing factor for the `VDODE` sampler - NOTE: offers a trade-off between exploration (1.0) and exploitation (> 1.0)
 start_time: "1.0" # time at which to start sampling
 max_chain_encoding_k: -1 # maximum number of chains to encode in the chain encoding
 exact_prior: false # whether to use the "ground-truth" binding site for sampling, if available
 prior_type: esmfold # the type of prior to use for sampling - NOTE: must be one of (`gaussian`, `harmonic`, `esmfold`)
 discard_ligand: false # whether to discard a given input ligand during sampling
 discard_sdf_coords: true # whether to discard the input ligand's 3D structure during sampling, if available
 detect_covalent: false # whether to detect covalent bonds between the input receptor and ligand
 use_template: true # whether to use the input protein template for sampling if one is provided
 separate_pdb: true # whether to save separate PDB files for each sampled structure instead of simply a single PDB file
 rank_outputs_by_confidence: true # whether to rank the sampled structures by estimated confidence
 plddt_ranking_type: ligand # the type of plDDT ranking to apply to generated samples - NOTE: must be one of (`protein`, `ligand`, `protein_ligand`)
 visualize_sample_trajectories: false # whether to visualize the generated samples' trajectories
 auxiliary_estimation_only: false # whether to only estimate auxiliary outputs (e.g., confidence, affinity) for the input (generated) samples (potentially derived from external sources)
 csv_path: null # if provided, the CSV file (with columns `id`, `input_receptor`, `input_ligand`, and `input_template`) from which to parse input receptors and ligands for sampling, overriding the `input_receptor` and `input_ligand` arguments in the process and ignoring the `input_template` for now
 esmfold_chunk_size: null # chunks axial attention computation to reduce memory usage from O(L^2) to O(L); equivalent to running a for loop over chunks of of each dimension; lower values will result in lower memory usage at the cost of speed; recommended values: 128, 64, 32
 # model arguments
 model:
  cfg:
    mol_encoder:
      from_pretrained: false
    protein_encoder:
      from_pretrained: false
    relational_reasoning:
      from_pretrained: false
    contact_predictor:
      from_pretrained: false
    score_head:
      from_pretrained: false
    confidence:
      from_pretrained: false
    affinity:
      from_pretrained: false
    task:
      freeze_mol_encoder: true
      freeze_protein_encoder: false
      freeze_relational_reasoning: false
      freeze_contact_predictor: false
      freeze_score_head: false
      freeze_confidence: true
      freeze_affinity: false
--- a/configs/strategy/ddp.yaml
+++ b/configs/strategy/ddp.yaml
@@ -0,0 +1,4 @@
 _target_: lightning.pytorch.strategies.DDPStrategy
 static_graph: false
 gradient_as_bucket_view: false
 find_unused_parameters: true
--- a/configs/strategy/ddp_spawn.yaml
+++ b/configs/strategy/ddp_spawn.yaml
@@ -0,0 +1,5 @@
 _target_: lightning.pytorch.strategies.DDPStrategy
 static_graph: false
 gradient_as_bucket_view: false
 find_unused_parameters: true
 start_method: spawn
--- a/configs/strategy/deepspeed.yaml
+++ b/configs/strategy/deepspeed.yaml
@@ -0,0 +1,5 @@
 _target_: lightning.pytorch.strategies.DeepSpeedStrategy
 stage: 2
 offload_optimizer: false
 allgather_bucket_size: 200_000_000
 reduce_bucket_size: 200_000_000
--- a/configs/strategy/default.yaml
+++ b/configs/strategy/default.yaml
@@ -0,0 +1,2 @@
 defaults:
  - _self_
--- a/configs/strategy/fsdp.yaml
+++ b/configs/strategy/fsdp.yaml
@@ -0,0 +1,12 @@
 _target_: lightning.pytorch.strategies.FSDPStrategy
 sharding_strategy: ${resolve_variable:torch.distributed.fsdp.ShardingStrategy.FULL_SHARD}
 cpu_offload: null
 activation_checkpointing: null
 mixed_precision:
  _target_: torch.distributed.fsdp.MixedPrecision
  param_dtype: null
  reduce_dtype: null
  buffer_dtype: null
  keep_low_precision_grads: false
  cast_forward_inputs: false
  cast_root_forward_inputs: true
--- a/configs/strategy/optimized_ddp.yaml
+++ b/configs/strategy/optimized_ddp.yaml
@@ -0,0 +1,4 @@
 _target_: lightning.pytorch.strategies.DDPStrategy
 static_graph: true
 gradient_as_bucket_view: true
 find_unused_parameters: false
--- a/configs/train.yaml
+++ b/configs/train.yaml
@@ -0,0 +1,51 @@
 # @package _global_
 # specify here default configuration
 # order of defaults determines the order in which configs override each other
 defaults:
  - _self_
  - data: combined
  - model: flowdock_fm
  - callbacks: default
  - logger: null # set logger here or use command line (e.g. `python train.py logger=tensorboard`)
  - strategy: default
  - trainer: default
  - paths: default
  - extras: default
  - hydra: default
  - environment: default
  # experiment configs allow for version control of specific hyperparameters
  # e.g. best hyperparameters for given model and datamodule
  - experiment: null
  # config for hyperparameter optimization
  - hparams_search: null
  # optional local config for machine/user specific settings
  # it's optional since it doesn't need to exist and is excluded from version control
  - optional local: default
  # debugging config (enable through command line, e.g. `python train.py debug=default)
  - debug: null
 # task name, determines output directory path
 task_name: "train"
 # tags to help you identify your experiments
 # you can overwrite this in experiment configs
 # overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
 tags: ["train", "combined", "flowdock_fm"]
 # set False to skip model training
 train: True
 # evaluate on test set, using best model weights achieved during training
 # lightning chooses best weights based on the metric specified in checkpoint callback
 test: False
 # simply provide checkpoint path to resume training
 ckpt_path: null
 # seed for random number generators in pytorch, numpy and python.random
 seed: null
--- a/configs/trainer/cpu.yaml
+++ b/configs/trainer/cpu.yaml
@@ -0,0 +1,5 @@
 defaults:
  - default
 accelerator: cpu
 devices: 1
--- a/configs/trainer/ddp.yaml
+++ b/configs/trainer/ddp.yaml
@@ -0,0 +1,9 @@
 defaults:
  - default
 strategy: ddp
 accelerator: gpu
 devices: 4
 num_nodes: 1
 sync_batchnorm: True
--- a/configs/trainer/ddp_sim.yaml
+++ b/configs/trainer/ddp_sim.yaml
@@ -0,0 +1,7 @@
 defaults:
  - default
 # simulate DDP on CPU, useful for debugging
 accelerator: cpu
 devices: 2
 strategy: ddp_spawn
--- a/configs/trainer/ddp_spawn.yaml
+++ b/configs/trainer/ddp_spawn.yaml
@@ -0,0 +1,9 @@
 defaults:
  - default
 strategy: ddp_spawn
 accelerator: gpu
 devices: 4
 num_nodes: 1
 sync_batchnorm: True
--- a/configs/trainer/default.yaml
+++ b/configs/trainer/default.yaml
@@ -0,0 +1,29 @@
 _target_: lightning.pytorch.trainer.Trainer
 default_root_dir: ${paths.output_dir}
 min_epochs: 1 # prevents early stopping
 max_epochs: 10
 accelerator: cpu
 devices: 1
 # mixed precision for extra speed-up
 # precision: 16
 # perform a validation loop every N training epochs
 check_val_every_n_epoch: 1
 # set True to to ensure deterministic results
 # makes training slower but gives more reproducibility than just setting seeds
 deterministic: False
 # determine the frequency of how often to reload the dataloaders
 reload_dataloaders_every_n_epochs: 1
 # if `gradient_clip_val` is not `null`, gradients will be norm-clipped during training
 gradient_clip_algorithm: norm
 gradient_clip_val: 1.0
 # if `num_sanity_val_steps` is > 0, then specifically that many validation steps will be run during the first call to `trainer.fit`
 num_sanity_val_steps: 0
--- a/configs/trainer/gpu.yaml
+++ b/configs/trainer/gpu.yaml
@@ -0,0 +1,5 @@
 defaults:
  - default
 accelerator: gpu
 devices: 1
--- a/configs/trainer/mps.yaml
+++ b/configs/trainer/mps.yaml
@@ -0,0 +1,5 @@
 defaults:
  - default
 accelerator: mps
 devices: 1
--- a/environments/flowdock_environment.yaml
+++ b/environments/flowdock_environment.yaml
@@ -0,0 +1,540 @@
 name: FlowDock
 channels:
  - pytorch
  - pyg
  - senyan.dev
  - nvidia
  - conda-forge
 dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=3_kmp_llvm
  - aiohappyeyeballs=2.6.1=pyhd8ed1ab_0
  - aiohttp=3.11.13=py310h89163eb_0
  - aiosignal=1.3.2=pyhd8ed1ab_0
  - alsa-lib=1.2.13=hb9d3cd8_0
  - ambertools=24.8=cuda_None_nompi_py310h834fefc_101
  - annotated-types=0.7.0=pyhd8ed1ab_1
  - anyio=4.8.0=pyhd8ed1ab_0
  - aom=3.9.1=hac33072_0
  - argon2-cffi=23.1.0=pyhd8ed1ab_1
  - argon2-cffi-bindings=21.2.0=py310ha75aee5_5
  - arpack=3.9.1=nompi_hf03ea27_102
  - arrow=1.3.0=pyhd8ed1ab_1
  - asttokens=3.0.0=pyhd8ed1ab_1
  - async-lru=2.0.4=pyhd8ed1ab_1
  - async-timeout=5.0.1=pyhd8ed1ab_1
  - attr=2.5.1=h166bdaf_1
  - attrs=25.3.0=pyh71513ae_0
  - babel=2.17.0=pyhd8ed1ab_0
  - beautifulsoup4=4.13.3=pyha770c72_0
  - blas=2.116=mkl
  - blas-devel=3.9.0=16_linux64_mkl
  - bleach=6.2.0=pyh29332c3_4
  - bleach-with-css=6.2.0=h82add2a_4
  - blosc=1.21.6=he440d0b_1
  - brotli=1.1.0=hb9d3cd8_2
  - brotli-bin=1.1.0=hb9d3cd8_2
  - brotli-python=1.1.0=py310hf71b8c6_2
  - bson=0.5.10=pyhd8ed1ab_0
  - bzip2=1.0.8=h4bc722e_7
  - c-ares=1.34.4=hb9d3cd8_0
  - c-blosc2=2.15.2=h3122c55_1
  - ca-certificates=2025.1.31=hbcca054_0
  - cached-property=1.5.2=hd8ed1ab_1
  - cached_property=1.5.2=pyha770c72_1
  - cachetools=5.5.2=pyhd8ed1ab_0
  - cairo=1.18.4=h3394656_0
  - certifi=2025.1.31=pyhd8ed1ab_0
  - cffi=1.17.1=py310h8deb56e_0
  - chardet=5.2.0=pyhd8ed1ab_3
  - charset-normalizer=3.4.1=pyhd8ed1ab_0
  - colorama=0.4.6=pyhd8ed1ab_1
  - comm=0.2.2=pyhd8ed1ab_1
  - contourpy=1.3.1=py310h3788b33_0
  - cpython=3.10.16=py310hd8ed1ab_1
  - cuda-cudart=11.8.89=0
  - cuda-cupti=11.8.87=0
  - cuda-libraries=11.8.0=0
  - cuda-nvrtc=11.8.89=0
  - cuda-nvtx=11.8.86=0
  - cuda-runtime=11.8.0=0
  - cuda-version=11.8=h70ddcb2_3
  - cudatoolkit=11.8.0=h4ba93d1_13
  - cudatoolkit-dev=11.8.0=h1fa729e_6
  - cycler=0.12.1=pyhd8ed1ab_1
  - cyrus-sasl=2.1.27=h54b06d7_7
  - dav1d=1.2.1=hd590300_0
  - dbus=1.13.6=h5008d03_3
  - debugpy=1.8.13=py310hf71b8c6_0
  - decorator=5.2.1=pyhd8ed1ab_0
  - defusedxml=0.7.1=pyhd8ed1ab_0
  - deprecated=1.2.18=pyhd8ed1ab_0
  - exceptiongroup=1.2.2=pyhd8ed1ab_1
  - expat=2.6.4=h5888daf_0
  - ffmpeg=7.1.1=gpl_h24e5c1d_701
  - fftw=3.3.10=nompi_hf1063bd_110
  - filelock=3.18.0=pyhd8ed1ab_0
  - flexcache=0.3=pyhd8ed1ab_1
  - flexparser=0.4=pyhd8ed1ab_1
  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
  - font-ttf-inconsolata=3.000=h77eed37_0
  - font-ttf-source-code-pro=2.038=h77eed37_0
  - font-ttf-ubuntu=0.83=h77eed37_3
  - fontconfig=2.15.0=h7e30c49_1
  - fonts-conda-ecosystem=1=0
  - fonts-conda-forge=1=0
  - fonttools=4.56.0=py310h89163eb_0
  - fqdn=1.5.1=pyhd8ed1ab_1
  - freetype=2.13.3=h48d6fc4_0
  - freetype-py=2.3.0=pyhd8ed1ab_0
  - fribidi=1.0.10=h36c2ea0_0
  - frozenlist=1.5.0=py310h89163eb_1
  - fsspec=2025.3.0=pyhd8ed1ab_0
  - gdk-pixbuf=2.42.12=hb9ae30d_0
  - gettext=0.23.1=h5888daf_0
  - gettext-tools=0.23.1=h5888daf_0
  - gmp=6.3.0=hac33072_2
  - gmpy2=2.1.5=py310he8512ff_3
  - graphite2=1.3.13=h59595ed_1003
  - greenlet=3.1.1=py310hf71b8c6_1
  - h11=0.14.0=pyhd8ed1ab_1
  - h2=4.2.0=pyhd8ed1ab_0
  - harfbuzz=10.4.0=h76408a6_0
  - hdf4=4.2.15=h2a13503_7
  - hdf5=1.14.4=nompi_h2d575fe_105
  - hpack=4.1.0=pyhd8ed1ab_0
  - httpcore=1.0.7=pyh29332c3_1
  - httpx=0.28.1=pyhd8ed1ab_0
  - hyperframe=6.1.0=pyhd8ed1ab_0
  - icu=75.1=he02047a_0
  - idna=3.10=pyhd8ed1ab_1
  - importlib-metadata=8.6.1=pyha770c72_0
  - importlib_resources=6.5.2=pyhd8ed1ab_0
  - ipykernel=6.29.5=pyh3099207_0
  - ipython=8.34.0=pyh907856f_0
  - isoduration=20.11.0=pyhd8ed1ab_1
  - jack=1.9.22=h7c63dc7_2
  - jedi=0.19.2=pyhd8ed1ab_1
  - jinja2=3.1.6=pyhd8ed1ab_0
  - joblib=1.4.2=pyhd8ed1ab_1
  - json5=0.10.0=pyhd8ed1ab_1
  - jsonpointer=3.0.0=py310hff52083_1
  - jsonschema=4.23.0=pyhd8ed1ab_1
  - jsonschema-specifications=2024.10.1=pyhd8ed1ab_1
  - jsonschema-with-format-nongpl=4.23.0=hd8ed1ab_1
  - jupyter-lsp=2.2.5=pyhd8ed1ab_1
  - jupyter_client=8.6.3=pyhd8ed1ab_1
  - jupyter_core=5.7.2=pyh31011fe_1
  - jupyter_events=0.12.0=pyh29332c3_0
  - jupyter_server=2.15.0=pyhd8ed1ab_0
  - jupyter_server_terminals=0.5.3=pyhd8ed1ab_1
  - jupyterlab=4.3.6=pyhd8ed1ab_0
  - jupyterlab_pygments=0.3.0=pyhd8ed1ab_2
  - jupyterlab_server=2.27.3=pyhd8ed1ab_1
  - jupyterlab_widgets=3.0.13=pyhd8ed1ab_1
  - kernel-headers_linux-64=3.10.0=he073ed8_18
  - keyutils=1.6.1=h166bdaf_0
  - kiwisolver=1.4.7=py310h3788b33_0
  - krb5=1.21.3=h659f571_0
  - lame=3.100=h166bdaf_1003
  - lcms2=2.17=h717163a_0
  - ld_impl_linux-64=2.43=h712a8e2_4
  - lerc=4.0.0=h27087fc_0
  - level-zero=1.21.2=h84d6215_0
  - libabseil=20250127.0=cxx17_hbbce691_0
  - libaec=1.1.3=h59595ed_0
  - libasprintf=0.23.1=h8e693c7_0
  - libasprintf-devel=0.23.1=h8e693c7_0
  - libass=0.17.3=hba53ac1_1
  - libblas=3.9.0=16_linux64_mkl
  - libboost=1.86.0=h6c02f8c_3
  - libboost-python=1.86.0=py310ha2bacc8_3
  - libbrotlicommon=1.1.0=hb9d3cd8_2
  - libbrotlidec=1.1.0=hb9d3cd8_2
  - libbrotlienc=1.1.0=hb9d3cd8_2
  - libcap=2.75=h39aace5_0
  - libcblas=3.9.0=16_linux64_mkl
  - libcublas=11.11.3.6=0
  - libcufft=10.9.0.58=0
  - libcufile=1.9.1.3=0
  - libcurand=10.3.5.147=0
  - libcurl=8.12.1=h332b0f4_0
  - libcusolver=11.4.1.48=0
  - libcusparse=11.7.5.86=0
  - libdb=6.2.32=h9c3ff4c_0
  - libdeflate=1.23=h4ddbbb0_0
  - libdrm=2.4.124=hb9d3cd8_0
  - libedit=3.1.20250104=pl5321h7949ede_0
  - libegl=1.7.0=ha4b6fd6_2
  - libev=4.33=hd590300_2
  - libexpat=2.6.4=h5888daf_0
  - libffi=3.4.6=h2dba641_0
  - libflac=1.4.3=h59595ed_0
  - libgcc=14.2.0=h767d61c_2
  - libgcc-ng=14.2.0=h69a702a_2
  - libgcrypt-lib=1.11.0=hb9d3cd8_2
  - libgettextpo=0.23.1=h5888daf_0
  - libgettextpo-devel=0.23.1=h5888daf_0
  - libgfortran=14.2.0=h69a702a_2
  - libgfortran-ng=14.2.0=h69a702a_2
  - libgfortran5=14.2.0=hf1ad2bd_2
  - libgl=1.7.0=ha4b6fd6_2
  - libglib=2.82.2=h2ff4ddf_1
  - libglvnd=1.7.0=ha4b6fd6_2
  - libglx=1.7.0=ha4b6fd6_2
  - libgomp=14.2.0=h767d61c_2
  - libgpg-error=1.51=hbd13f7d_1
  - libhwloc=2.11.2=default_h0d58e46_1001
  - libiconv=1.18=h4ce23a2_1
  - libjpeg-turbo=3.0.0=hd590300_1
  - liblapack=3.9.0=16_linux64_mkl
  - liblapacke=3.9.0=16_linux64_mkl
  - liblzma=5.6.4=hb9d3cd8_0
  - libnetcdf=4.9.2=nompi_h5ddbaa4_116
  - libnghttp2=1.64.0=h161d5f1_0
  - libnpp=11.8.0.86=0
  - libnsl=2.0.1=hd590300_0
  - libntlm=1.8=hb9d3cd8_0
  - libnvjpeg=11.9.0.86=0
  - libogg=1.3.5=h4ab18f5_0
  - libopenvino=2025.0.0=hdc3f47d_3
  - libopenvino-auto-batch-plugin=2025.0.0=h4d9b6c2_3
  - libopenvino-auto-plugin=2025.0.0=h4d9b6c2_3
  - libopenvino-hetero-plugin=2025.0.0=h981d57b_3
  - libopenvino-intel-cpu-plugin=2025.0.0=hdc3f47d_3
  - libopenvino-intel-gpu-plugin=2025.0.0=hdc3f47d_3
  - libopenvino-intel-npu-plugin=2025.0.0=hdc3f47d_3
  - libopenvino-ir-frontend=2025.0.0=h981d57b_3
  - libopenvino-onnx-frontend=2025.0.0=h0e684df_3
  - libopenvino-paddle-frontend=2025.0.0=h0e684df_3
  - libopenvino-pytorch-frontend=2025.0.0=h5888daf_3
  - libopenvino-tensorflow-frontend=2025.0.0=h684f15b_3
  - libopenvino-tensorflow-lite-frontend=2025.0.0=h5888daf_3
  - libopus=1.3.1=h7f98852_1
  - libpciaccess=0.18=hd590300_0
  - libpng=1.6.47=h943b412_0
  - libpq=17.4=h27ae623_0
  - libprotobuf=5.29.3=h501fc15_0
  - librdkit=2024.09.6=h84b0b3c_0
  - librsvg=2.58.4=h49af25d_2
  - libsndfile=1.2.2=hc60ed4a_1
  - libsodium=1.0.20=h4ab18f5_0
  - libsqlite=3.49.1=hee588c1_1
  - libssh2=1.11.1=hf672d98_0
  - libstdcxx=14.2.0=h8f9b012_2
  - libstdcxx-ng=14.2.0=h4852527_2
  - libsystemd0=257.4=h4e0b6ca_1
  - libtiff=4.7.0=hd9ff511_3
  - libudev1=257.4=hbe16f8c_1
  - libunwind=1.6.2=h9c3ff4c_0
  - liburing=2.9=h84d6215_0
  - libusb=1.0.27=hb9d3cd8_101
  - libuuid=2.38.1=h0b41bf4_0
  - libva=2.22.0=h4f16b4b_2
  - libvorbis=1.3.7=h9c3ff4c_0
  - libvpx=1.14.1=hac33072_0
  - libwebp-base=1.5.0=h851e524_0
  - libxcb=1.17.0=h8a09558_0
  - libxcrypt=4.4.36=hd590300_1
  - libxkbcommon=1.8.1=hc4a0caf_0
  - libxml2=2.13.6=h8d12d68_0
  - libxslt=1.1.39=h76b75d6_0
  - libzip=1.11.2=h6991a6a_0
  - libzlib=1.3.1=hb9d3cd8_2
  - llvm-openmp=15.0.7=h0cdce71_0
  - lxml=5.3.1=py310h6ee67d5_0
  - lz4-c=1.10.0=h5888daf_1
  - markupsafe=3.0.2=py310h89163eb_1
  - matplotlib-base=3.10.1=py310h68603db_0
  - matplotlib-inline=0.1.7=pyhd8ed1ab_1
  - mda-xdrlib=0.2.0=pyhd8ed1ab_1
  - mdtraj=1.10.3=py310h4cdbd58_0
  - mendeleev=0.20.1=pymin39_ha308f57_3
  - mistune=3.1.2=pyhd8ed1ab_0
  - mkl=2022.1.0=h84fe81f_915
  - mkl-devel=2022.1.0=ha770c72_916
  - mkl-include=2022.1.0=h84fe81f_915
  - mpc=1.3.1=h24ddda3_1
  - mpfr=4.2.1=h90cbb55_3
  - mpg123=1.32.9=hc50e24c_0
  - mpmath=1.3.0=pyhd8ed1ab_1
  - multidict=6.1.0=py310h89163eb_2
  - munkres=1.1.4=pyh9f0ad1d_0
  - nbclient=0.10.2=pyhd8ed1ab_0
  - nbconvert-core=7.16.6=pyh29332c3_0
  - nbformat=5.10.4=pyhd8ed1ab_1
  - ncurses=6.5=h2d0b736_3
  - nest-asyncio=1.6.0=pyhd8ed1ab_1
  - netcdf-fortran=4.6.1=nompi_ha5d1325_108
  - networkx=3.4.2=pyh267e887_2
  - notebook=7.3.3=pyhd8ed1ab_0
  - notebook-shim=0.2.4=pyhd8ed1ab_1
  - numexpr=2.7.3=py310hb5077e9_1
  - ocl-icd=2.3.2=hb9d3cd8_2
  - ocl-icd-system=1.0.0=1
  - opencl-headers=2024.10.24=h5888daf_0
  - openff-amber-ff-ports=0.0.4=pyhca7485f_0
  - openff-forcefields=2024.09.0=pyhff2d567_0
  - openff-interchange=0.4.2=pyhd8ed1ab_2
  - openff-interchange-base=0.4.2=pyhd8ed1ab_2
  - openff-toolkit=0.16.8=pyhd8ed1ab_2
  - openff-toolkit-base=0.16.8=pyhd8ed1ab_2
  - openff-units=0.3.0=pyhd8ed1ab_1
  - openff-utilities=0.1.15=pyhd8ed1ab_0
  - openh264=2.6.0=hc22cd8d_0
  - openjpeg=2.5.3=h5fbd93e_0
  - openldap=2.6.9=he970967_0
  - openmm=8.2.0=py310h30bdd6a_2
  - openmmforcefields=0.14.2=pyhd8ed1ab_0
  - openssl=3.4.1=h7b32b05_0
  - overrides=7.7.0=pyhd8ed1ab_1
  - packaging=24.2=pyhd8ed1ab_2
  - panedr=0.8.0=pyhd8ed1ab_1
  - pango=1.56.2=h861ebed_0
  - parmed=4.3.0=py310h78e4988_1
  - parso=0.8.4=pyhd8ed1ab_1
  - pcre2=10.44=hba22ea6_2
  - pdbfixer=1.11=pyhd8ed1ab_0
  - perl=5.32.1=7_hd590300_perl5
  - pexpect=4.9.0=pyhd8ed1ab_1
  - pickleshare=0.7.5=pyhd8ed1ab_1004
  - pillow=11.1.0=py310h7e6dc6c_0
  - pint=0.24.4=pyhd8ed1ab_1
  - pip=25.0.1=pyh8b19718_0
  - pixman=0.44.2=h29eaf8c_0
  - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_2
  - platformdirs=4.3.6=pyhd8ed1ab_1
  - prometheus_client=0.21.1=pyhd8ed1ab_0
  - prompt-toolkit=3.0.50=pyha770c72_0
  - psutil=7.0.0=py310ha75aee5_0
  - pthread-stubs=0.4=hb9d3cd8_1002
  - ptyprocess=0.7.0=pyhd8ed1ab_1
  - pugixml=1.15=h3f63f65_0
  - pulseaudio-client=17.0=hb77b528_0
  - pure_eval=0.2.3=pyhd8ed1ab_1
  - py-cpuinfo=9.0.0=pyhd8ed1ab_1
  - pycairo=1.27.0=py310h25ff670_0
  - pycparser=2.22=pyh29332c3_1
  - pydantic=2.10.6=pyh3cfb1c2_0
  - pydantic-core=2.27.2=py310h505e2c1_0
  - pyedr=0.8.0=pyhd8ed1ab_1
  - pyfiglet=0.8.post1=py_0
  - pyg=2.5.2=py310_torch_2.2.0_cu118
  - pygments=2.19.1=pyhd8ed1ab_0
  - pyparsing=3.2.1=pyhd8ed1ab_0
  - pysocks=1.7.1=pyha55dd90_7
  - pytables=3.10.1=py310h431dcdc_4
  - python=3.10.16=he725a3c_1_cpython
  - python-constraint=1.4.0=pyhff2d567_1
  - python-dateutil=2.9.0.post0=pyhff2d567_1
  - python-fastjsonschema=2.21.1=pyhd8ed1ab_0
  - python-tzdata=2025.1=pyhd8ed1ab_0
  - python_abi=3.10=5_cp310
  - pytorch=2.2.1=py3.10_cuda11.8_cudnn8.7.0_0
  - pytorch-cuda=11.8=h7e8668a_6
  - pytorch-mutex=1.0=cuda
  - pytorch-scatter=2.1.2=py310_torch_2.2.0_cu118
  - pytz=2025.1=pyhd8ed1ab_0
  - pyyaml=6.0.2=py310h89163eb_2
  - pyzmq=26.3.0=py310h71f11fc_0
  - qhull=2020.2=h434a139_5
  - rdkit=2024.09.6=py310hcd13295_0
  - readline=8.2=h8c095d6_2
  - referencing=0.36.2=pyh29332c3_0
  - reportlab=4.3.1=py310ha75aee5_0
  - requests=2.32.3=pyhd8ed1ab_1
  - rfc3339-validator=0.1.4=pyhd8ed1ab_1
  - rfc3986-validator=0.1.1=pyh9f0ad1d_0
  - rlpycairo=0.2.0=pyhd8ed1ab_0
  - rpds-py=0.23.1=py310hc1293b2_0
  - scikit-learn=1.6.1=py310h27f47ee_0
  - scipy=1.15.2=py310h1d65ade_0
  - sdl2=2.32.50=h9b8e6db_1
  - sdl3=3.2.8=h3083f51_0
  - send2trash=1.8.3=pyh0d859eb_1
  - setuptools=75.8.2=pyhff2d567_0
  - six=1.17.0=pyhd8ed1ab_0
  - smirnoff99frosst=1.1.0=pyh44b312d_0
  - snappy=1.2.1=h8bd8927_1
  - sniffio=1.3.1=pyhd8ed1ab_1
  - sqlalchemy=2.0.39=py310ha75aee5_1
  - stack_data=0.6.3=pyhd8ed1ab_1
  - svt-av1=3.0.1=h5888daf_0
  - sympy=1.13.3=pyh2585a3b_105
  - sysroot_linux-64=2.17=h0157908_18
  - tbb=2021.13.0=hceb3a55_1
  - terminado=0.18.1=pyh0d859eb_0
  - threadpoolctl=3.6.0=pyhecae5ae_0
  - tinycss2=1.4.0=pyhd8ed1ab_0
  - tinydb=4.8.2=pyhd8ed1ab_1
  - tk=8.6.13=noxft_h4845f30_101
  - tomli=2.2.1=pyhd8ed1ab_1
  - torchaudio=2.2.1=py310_cu118
  - torchtriton=2.2.0=py310
  - torchvision=0.17.1=py310_cu118
  - tornado=6.4.2=py310ha75aee5_0
  - tqdm=4.67.1=pyhd8ed1ab_1
  - traitlets=5.14.3=pyhd8ed1ab_1
  - types-python-dateutil=2.9.0.20241206=pyhd8ed1ab_0
  - typing-extensions=4.12.2=hd8ed1ab_1
  - typing_extensions=4.12.2=pyha770c72_1
  - typing_utils=0.1.0=pyhd8ed1ab_1
  - tzdata=2025a=h78e105d_0
  - unicodedata2=16.0.0=py310ha75aee5_0
  - uri-template=1.3.0=pyhd8ed1ab_1
  - urllib3=2.3.0=pyhd8ed1ab_0
  - validators=0.34.0=pyhd8ed1ab_1
  - wayland=1.23.1=h3e06ad9_0
  - wayland-protocols=1.41=hd8ed1ab_0
  - wcwidth=0.2.13=pyhd8ed1ab_1
  - webcolors=24.11.1=pyhd8ed1ab_0
  - webencodings=0.5.1=pyhd8ed1ab_3
  - websocket-client=1.8.0=pyhd8ed1ab_1
  - wheel=0.45.1=pyhd8ed1ab_1
  - wrapt=1.17.2=py310ha75aee5_0
  - x264=1!164.3095=h166bdaf_2
  - x265=3.5=h924138e_3
  - xkeyboard-config=2.43=hb9d3cd8_0
  - xmltodict=0.14.2=pyhd8ed1ab_1
  - xorg-libice=1.1.2=hb9d3cd8_0
  - xorg-libsm=1.2.6=he73a12e_0
  - xorg-libx11=1.8.12=h4f16b4b_0
  - xorg-libxau=1.0.12=hb9d3cd8_0
  - xorg-libxcursor=1.2.3=hb9d3cd8_0
  - xorg-libxdmcp=1.1.5=hb9d3cd8_0
  - xorg-libxext=1.3.6=hb9d3cd8_0
  - xorg-libxfixes=6.0.1=hb9d3cd8_0
  - xorg-libxrender=0.9.12=hb9d3cd8_0
  - xorg-libxscrnsaver=1.2.4=hb9d3cd8_0
  - xorg-libxt=1.3.1=hb9d3cd8_0
  - yaml=0.2.5=h7f98852_2
  - yarl=1.18.3=py310h89163eb_1
  - zeromq=4.3.5=h3b0a872_7
  - zipp=3.21.0=pyhd8ed1ab_1
  - zlib=1.3.1=hb9d3cd8_2
  - zlib-ng=2.2.4=h7955e40_0
  - zstandard=0.23.0=py310ha75aee5_1
  - zstd=1.5.7=hb8e6e7a_1
  - pip:
      - absl-py==2.1.0
      - alembic==1.15.1
      - amberutils==21.0
      - antlr4-python3-runtime==4.9.3
      - autopage==0.5.2
      - beartype==0.20.0
      - biopandas==0.5.1
      - biopython==1.79
      - biotite==1.1.0
      - biotraj==1.2.2
      - cfgv==3.4.0
      - cftime==1.6.4.post1
      - click==8.1.8
      - cliff==4.9.1
      - cloudpathlib==0.21.0
      - cmaes==0.11.1
      - cmd2==2.5.11
      - colorlog==6.9.0
      - distlib==0.3.9
      - git+https://github.com/NVIDIA/dllogger.git@0540a43971f4a8a16693a9de9de73c1072020769
      - dm-tree==0.1.9
      - docker-pycreds==0.4.0
      - duckdb==1.2.1
      - edgembar==3.0
      - einops==0.8.1
      - eval-type-backport==0.2.2
      - executing==2.2.0
      - fair-esm==2.0.0
      - fairscale==0.4.13
      - fastcore==1.7.29
      - future==1.0.0
      - fvcore==0.1.5.post20221221
      - gcsfs==2025.3.0
      - gemmi==0.7.0
      - gitdb==4.0.12
      - gitpython==3.1.44
      - google-api-core==2.24.2
      - google-auth==2.38.0
      - google-auth-oauthlib==1.2.1
      - google-cloud-core==2.4.3
      - google-cloud-storage==3.1.0
      - google-crc32c==1.6.0
      - google-resumable-media==2.7.2
      - googleapis-common-protos==1.69.1
      - hydra-colorlog==1.2.0
      - hydra-core==1.3.2
      - hydra-optuna-sweeper==1.2.0
      - identify==2.6.9
      - iniconfig==2.0.0
      - iopath==0.1.10
      - ipython-genutils==0.2.0
      - ipywidgets==7.8.5
      - jupyterlab-widgets==1.1.11
      - lightning==2.5.0.post0
      - lightning-utilities==0.14.1
      - looseversion==1.1.2
      - lovely-numpy==0.2.13
      - lovely-tensors==0.1.18
      - mako==1.3.9
      - markdown-it-py==3.0.0
      - mdurl==0.1.2
      - ml-collections==1.0.0
      - mmcif==0.91.0
      - mmpbsa-py==16.0
      - mmtf-python==1.1.3
      - mols2grid==2.0.0
      - msgpack==1.1.0
      - msgpack-numpy==0.4.8
      - narwhals==1.30.0
      - netcdf4==1.7.2
      - nodeenv==1.9.1
      - numpy==1.26.4
      - oauthlib==3.2.2
      - omegaconf==2.3.0
      - git+https://github.com/amorehead/openfold.git@fe1275099639bf7e617e09ef24d6af778647dd64
      - optuna==2.10.1
      - packmol-memgen==2025.1.29
      - pandas==2.2.3
      - pandocfilters==1.5.1
      - pbr==6.1.1
      - pdb4amber==22.0
      - plinder==0.2.24
      - plotly==6.0.0
      - pluggy==1.5.0
      - portalocker==3.1.1
      - posebusters==0.2.13
      - git+https://git@github.com/zrqiao/power_spherical.git@290b1630c5f84e3bb0d61711046edcf6e47200d4
      - pre-commit==4.1.0
      - prettytable==3.15.1
      # - prody==2.4.1  # NOTE: we must `pip` install Prody to skip its NumPy dependency
      - propcache==0.3.0
      - proto-plus==1.26.1
      - protobuf==5.29.3
      - pyarrow==19.0.1
      - pyasn1==0.6.1
      - pyasn1-modules==0.4.1
      - pymsmt==22.0
      - pyperclip==1.9.0
      - pytest==8.3.5
      - python-dotenv==1.0.1
      - python-json-logger==3.3.0
      - pytorch-lightning==2.5.0.post0
      - git+https://github.com/facebookresearch/pytorch3d.git@3da7703c5ac10039645966deddffe8db52eab8c5
      - pytraj==2.0.6
      - requests-oauthlib==2.0.0
      - rich==13.9.4
      - rootutils==1.0.7
      - rsa==4.9
      - sander==22.0
      - seaborn==0.13.2
      - sentry-sdk==2.22.0
      - setproctitle==1.3.5
      - smmap==5.0.2
      - soupsieve==2.6
      - stevedore==5.4.1
      - tabulate==0.9.0
      - termcolor==2.5.0
      - torchmetrics==1.6.3
      - virtualenv==20.29.3
      - wandb==0.19.8
      - widgetsnbextension==3.6.10
      - yacs==0.1.8
--- a/environments/flowdock_environment_docker.yaml
+++ b/environments/flowdock_environment_docker.yaml
@@ -0,0 +1,58 @@
 name: flowdock
 channels:
  - pyg
  - pytorch
  - nvidia
  - defaults
  - conda-forge
 dependencies:
  - mendeleev=0.20.1=pymin39_ha308f57_3
  - networkx=3.4.2=pyh267e887_2
  - python=3.10.16=he725a3c_1_cpython
  - pytorch=2.2.1=py3.10_cuda11.8_cudnn8.7.0_0
  - pytorch-cuda=11.8=h7e8668a_6
  - pytorch-mutex=1.0=cuda
  - pytorch-scatter=2.1.2=py310_torch_2.2.0_cu118
  - rdkit=2024.09.6=py310hcd13295_0
  - scikit-learn=1.6.1=py310h27f47ee_0
  - scipy=1.15.2=py310h1d65ade_0
  - torchaudio=2.2.1=py310_cu118
  - torchtriton=2.2.0=py310
  - torchvision=0.17.1=py310_cu118
  - tqdm=4.67.1=pyhd8ed1ab_1
  - pip:
      - beartype==0.20.0
      - biopandas==0.5.1
      - biopython==1.79
      - biotite==1.1.0
      - git+https://github.com/NVIDIA/dllogger.git@0540a43971f4a8a16693a9de9de73c1072020769
      - dm-tree==0.1.9
      - einops==0.8.1
      - fair-esm==2.0.0
      - fairscale==0.4.13
      - gemmi==0.7.0
      - hydra-colorlog==1.2.0
      - hydra-core==1.3.2
      - hydra-optuna-sweeper==1.2.0
      - lightning==2.5.0.post0
      - lightning-utilities==0.14.1
      - lovely-numpy==0.2.13
      - lovely-tensors==0.1.18
      - ml-collections==1.0.0
      - msgpack==1.1.0
      - msgpack-numpy==0.4.8
      - numpy==1.26.4
      - omegaconf==2.3.0
      - git+https://github.com/amorehead/openfold.git@fe1275099639bf7e617e09ef24d6af778647dd64
      - pandas==2.2.3
      - plinder==0.2.24
      - plotly==6.0.0
      - posebusters==0.2.13
      # - prody==2.4.1  # NOTE: we must `pip` install Prody to skip its NumPy dependency
      - pytorch-lightning==2.5.0.post0
      - git+https://github.com/facebookresearch/pytorch3d.git@3da7703c5ac10039645966deddffe8db52eab8c5
      - rich==13.9.4
      - rootutils==1.0.7
      - seaborn==0.13.2
      - torchmetrics==1.6.3
      - wandb==0.19.8
--- a/flowdock/init.py
+++ b/flowdock/init.py
@@ -0,0 +1,120 @@
 import importlib
 import os
 from beartype.typing import Any
 from omegaconf import OmegaConf
 METHOD_TITLE_MAPPING = {
    "diffdock": "DiffDock",
    "flowdock": "FlowDock",
    "neuralplexer": "NeuralPLexer",
 }
 STANDARDIZED_DIR_METHODS = ["diffdock"]
 def resolve_omegaconf_variable(variable_path: str) -> Any:
    """Resolve an OmegaConf variable path to its value."""
    # split the string into parts using the dot separator
    parts = variable_path.rsplit(".", 1)
    # get the module name from the first part of the path
    module_name = parts[0]
    # dynamically import the module using the module name
    try:
        module = importlib.import_module(module_name)
        # use the imported module to get the requested attribute value
        attribute = getattr(module, parts[1])
    except Exception:
        module = importlib.import_module(".".join(module_name.split(".")[:-1]))
        inner_module = ".".join(module_name.split(".")[-1:])
        # use the imported module to get the requested attribute value
        attribute = getattr(getattr(module, inner_module), parts[1])
    return attribute
 def resolve_dataset_path_dirname(dataset: str) -> str:
    """Resolve the dataset path directory name based on the dataset's name.
    :param dataset: Name of the dataset.
    :return: Directory name for the dataset path.
    """
    return "DockGen" if dataset == "dockgen" else dataset
 def resolve_method_input_csv_path(method: str, dataset: str) -> str:
    """Resolve the input CSV path for a given method.
    :param method: The method name.
    :param dataset: The dataset name.
    :return: The input CSV path for the given method.
    """
    if method in STANDARDIZED_DIR_METHODS or method in ["flowdock", "neuralplexer"]:
        return os.path.join(
            "forks",
            METHOD_TITLE_MAPPING.get(method, method),
            "inference",
            f"{method}_{dataset}_inputs.csv",
        )
    else:
        raise ValueError(f"Invalid method: {method}")
 def resolve_method_title(method: str) -> str:
    """Resolve the method title for a given method.
    :param method: The method name.
    :return: The method title for the given method.
    """
    return METHOD_TITLE_MAPPING.get(method, method)
 def resolve_method_output_dir(
    method: str,
    dataset: str,
    repeat_index: int,
 ) -> str:
    """Resolve the output directory for a given method.
    :param method: The method name.
    :param dataset: The dataset name.
    :param repeat_index: The repeat index for the method.
    :return: The output directory for the given method.
    """
    if method in STANDARDIZED_DIR_METHODS or method in ["flowdock", "neuralplexer"]:
        return os.path.join(
            "forks",
            METHOD_TITLE_MAPPING.get(method, method),
            "inference",
            f"{method}_{dataset}_output{'s' if method in ['flowdock', 'neuralplexer'] else ''}_{repeat_index}",
        )
    else:
        raise ValueError(f"Invalid method: {method}")
 def register_custom_omegaconf_resolvers():
    """Register custom OmegaConf resolvers."""
    OmegaConf.register_new_resolver(
        "resolve_variable", lambda variable_path: resolve_omegaconf_variable(variable_path)
    )
    OmegaConf.register_new_resolver(
        "resolve_dataset_path_dirname", lambda dataset: resolve_dataset_path_dirname(dataset)
    )
    OmegaConf.register_new_resolver(
        "resolve_method_input_csv_path",
        lambda method, dataset: resolve_method_input_csv_path(method, dataset),
    )
    OmegaConf.register_new_resolver(
        "resolve_method_title", lambda method: resolve_method_title(method)
    )
    OmegaConf.register_new_resolver(
        "resolve_method_output_dir",
        lambda method, dataset, repeat_index: resolve_method_output_dir(
            method, dataset, repeat_index
        ),
    )
    OmegaConf.register_new_resolver(
        "int_divide", lambda dividend, divisor: int(dividend) // int(divisor)
    )
--- a/flowdock/eval.py
+++ b/flowdock/eval.py
@@ -0,0 +1,165 @@
 import os
 import hydra
 import lightning as L
 import lovely_tensors as lt
 import rootutils
 import torch
 from beartype.typing import Any, Dict, List, Tuple
 from lightning import LightningDataModule, LightningModule, Trainer
 from lightning.fabric.plugins.environments.cluster_environment import ClusterEnvironment
 from lightning.pytorch.loggers import Logger
 from lightning.pytorch.strategies.strategy import Strategy
 from omegaconf import DictConfig, open_dict
 lt.monkey_patch()
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 # ------------------------------------------------------------------------------------ #
 # the setup_root above is equivalent to:
 # - adding project root dir to PYTHONPATH
 #       (so you don't need to force user to install project as a package)
 #       (necessary before importing any local modules e.g. `from flowdock import utils`)
 # - setting up PROJECT_ROOT environment variable
 #       (which is used as a base for paths in "configs/paths/default.yaml")
 #       (this way all filepaths are the same no matter where you run the code)
 # - loading environment variables from ".env" in root dir
 #
 # you can remove it if you:
 # 1. either install project as a package or move entry files to project root dir
 # 2. set `root_dir` to "." in "configs/paths/default.yaml"
 #
 # more info: https://github.com/ashleve/rootutils
 # ------------------------------------------------------------------------------------ #
 from flowdock import register_custom_omegaconf_resolvers, resolve_omegaconf_variable
 from flowdock.utils import (
    RankedLogger,
    extras,
    instantiate_loggers,
    log_hyperparameters,
    task_wrapper,
 )
 log = RankedLogger(__name__, rank_zero_only=True)
@task_wrapper
 def evaluate(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
    """Evaluates given checkpoint on a datamodule testset.
    This method is wrapped in optional @task_wrapper decorator, that controls the behavior during
    failure. Useful for multiruns, saving info about the crash, etc.
    :param cfg: DictConfig configuration composed by Hydra.
    :return: Tuple[dict, dict] with metrics and dict with all instantiated objects.
    """
    assert cfg.ckpt_path, "Please provide a checkpoint path to evaluate!"
    assert os.path.exists(cfg.ckpt_path), f"Checkpoint path {cfg.ckpt_path} does not exist!"
    # set seed for random number generators in pytorch, numpy and python.random
    if cfg.get("seed"):
        L.seed_everything(cfg.seed, workers=True)
    log.info(
        f"Setting `float32_matmul_precision` to {cfg.model.cfg.task.float32_matmul_precision}."
    )
    torch.set_float32_matmul_precision(precision=cfg.model.cfg.task.float32_matmul_precision)
    log.info(f"Instantiating datamodule <{cfg.data._target_}>")
    datamodule: LightningDataModule = hydra.utils.instantiate(cfg.data, stage="test")
    # Establish model input arguments
    with open_dict(cfg):
        if cfg.model.cfg.task.start_time == "auto":
            cfg.model.cfg.task.start_time = 1.0
        else:
            cfg.model.cfg.task.start_time = float(cfg.model.cfg.task.start_time)
    log.info(f"Instantiating model <{cfg.model._target_}>")
    model: LightningModule = hydra.utils.instantiate(cfg.model)
    log.info("Instantiating loggers...")
    logger: List[Logger] = instantiate_loggers(cfg.get("logger"))
    plugins = None
    if "_target_" in cfg.environment:
        log.info(f"Instantiating environment <{cfg.environment._target_}>")
        plugins: ClusterEnvironment = hydra.utils.instantiate(cfg.environment)
    strategy = getattr(cfg.trainer, "strategy", None)
    if "_target_" in cfg.strategy:
        log.info(f"Instantiating strategy <{cfg.strategy._target_}>")
        strategy: Strategy = hydra.utils.instantiate(cfg.strategy)
        if (
            "mixed_precision" in strategy.__dict__
            and getattr(strategy, "mixed_precision", None) is not None
        ):
            strategy.mixed_precision.param_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.param_dtype)
                if getattr(cfg.strategy.mixed_precision, "param_dtype", None) is not None
                else None
            )
            strategy.mixed_precision.reduce_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.reduce_dtype)
                if getattr(cfg.strategy.mixed_precision, "reduce_dtype", None) is not None
                else None
            )
            strategy.mixed_precision.buffer_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.buffer_dtype)
                if getattr(cfg.strategy.mixed_precision, "buffer_dtype", None) is not None
                else None
            )
    log.info(f"Instantiating trainer <{cfg.trainer._target_}>")
    trainer: Trainer = (
        hydra.utils.instantiate(
            cfg.trainer,
            logger=logger,
            plugins=plugins,
            strategy=strategy,
        )
        if strategy is not None
        else hydra.utils.instantiate(
            cfg.trainer,
            logger=logger,
            plugins=plugins,
        )
    )
    object_dict = {
        "cfg": cfg,
        "datamodule": datamodule,
        "model": model,
        "logger": logger,
        "trainer": trainer,
    }
    if logger:
        log.info("Logging hyperparameters!")
        log_hyperparameters(object_dict)
    log.info("Starting testing!")
    trainer.test(model=model, datamodule=datamodule, ckpt_path=cfg.ckpt_path)
    metric_dict = trainer.callback_metrics
    return metric_dict, object_dict
@hydra.main(version_base="1.3", config_path="../configs", config_name="eval.yaml")
 def main(cfg: DictConfig) -> None:
    """Main entry point for evaluation.
    :param cfg: DictConfig configuration composed by Hydra.
    """
    # apply extra utilities
    # (e.g. ask for tags if none are provided in cfg, print cfg tree, etc.)
    extras(cfg)
    evaluate(cfg)
 if __name__ == "__main__":
    register_custom_omegaconf_resolvers()
    main()
--- a/flowdock/models/init.py
+++ b/flowdock/models/init.py
--- a/flowdock/models/components/init.py
+++ b/flowdock/models/components/init.py
--- a/flowdock/models/components/callbacks/ema.py
+++ b/flowdock/models/components/callbacks/ema.py
@@ -0,0 +1,452 @@
 # Adapted from https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/common/callbacks/ema.py
 # Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import contextlib
 import copy
 import os
 import threading
 from pathlib import Path
 from typing import Any, Dict, Iterable, Optional, Union
 import lightning.pytorch as pl
 import torch
 from lightning.pytorch import Callback
 from lightning.pytorch.callbacks import ModelCheckpoint
 from lightning.pytorch.utilities.exceptions import MisconfigurationException
 from lightning.pytorch.utilities.rank_zero import rank_zero_info
 class EMA(Callback):
    """Implements Exponential Moving Averaging (EMA).
    When training a model, this callback will maintain moving averages of the trained parameters.
    When evaluating, we use the moving averages copy of the trained parameters.
    When saving, we save an additional set of parameters with the prefix `ema`.
    Args:
        decay: The exponential decay used when calculating the moving average. Has to be between 0-1.
        validate_original_weights: Validate the original weights, as apposed to the EMA weights.
        every_n_steps: Apply EMA every N steps.
        cpu_offload: Offload weights to CPU.
    """
    def __init__(
        self,
        decay: float,
        validate_original_weights: bool = False,
        every_n_steps: int = 1,
        cpu_offload: bool = False,
    ):
        if not (0 <= decay <= 1):
            raise MisconfigurationException("EMA decay value must be between 0 and 1")
        self.decay = decay
        self.validate_original_weights = validate_original_weights
        self.every_n_steps = every_n_steps
        self.cpu_offload = cpu_offload
    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        """Add the EMA optimizer to the trainer."""
        device = pl_module.device if not self.cpu_offload else torch.device("cpu")
        trainer.optimizers = [
            EMAOptimizer(
                optim,
                device=device,
                decay=self.decay,
                every_n_steps=self.every_n_steps,
                current_step=trainer.global_step,
            )
            for optim in trainer.optimizers
            if not isinstance(optim, EMAOptimizer)
        ]
    def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        """Swap the model weights with the EMA weights."""
        if self._should_validate_ema_weights(trainer):
            self.swap_model_weights(trainer)
    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        """Swap the model weights back to the original weights."""
        if self._should_validate_ema_weights(trainer):
            self.swap_model_weights(trainer)
    def on_test_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        """Swap the model weights with the EMA weights."""
        if self._should_validate_ema_weights(trainer):
            self.swap_model_weights(trainer)
    def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        """Swap the model weights back to the original weights."""
        if self._should_validate_ema_weights(trainer):
            self.swap_model_weights(trainer)
    def _should_validate_ema_weights(self, trainer: "pl.Trainer") -> bool:
        """Check if the EMA weights should be validated."""
        return not self.validate_original_weights and self._ema_initialized(trainer)
    def _ema_initialized(self, trainer: "pl.Trainer") -> bool:
        """Check if the EMA weights have been initialized."""
        return any(isinstance(optimizer, EMAOptimizer) for optimizer in trainer.optimizers)
    def swap_model_weights(self, trainer: "pl.Trainer", saving_ema_model: bool = False):
        """Swaps the model weights with the EMA weights."""
        for optimizer in trainer.optimizers:
            assert isinstance(optimizer, EMAOptimizer)
            optimizer.switch_main_parameter_weights(saving_ema_model)
    @contextlib.contextmanager
    def save_ema_model(self, trainer: "pl.Trainer"):
        """Saves an EMA copy of the model + EMA optimizer states for resume."""
        self.swap_model_weights(trainer, saving_ema_model=True)
        try:
            yield
        finally:
            self.swap_model_weights(trainer, saving_ema_model=False)
    @contextlib.contextmanager
    def save_original_optimizer_state(self, trainer: "pl.Trainer"):
        """Save the original optimizer state."""
        for optimizer in trainer.optimizers:
            assert isinstance(optimizer, EMAOptimizer)
            optimizer.save_original_optimizer_state = True
        try:
            yield
        finally:
            for optimizer in trainer.optimizers:
                optimizer.save_original_optimizer_state = False
    def on_load_checkpoint(
        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", checkpoint: Dict[str, Any]
    ) -> None:
        """Load the EMA state from the checkpoint if it exists."""
        checkpoint_callback = trainer.checkpoint_callback
        # use the connector as NeMo calls the connector directly in the exp_manager when restoring.
        connector = trainer._checkpoint_connector
        # Replace connector._ckpt_path with below to avoid calling into lightning's protected API
        ckpt_path = trainer.ckpt_path
        if (
            ckpt_path
            and checkpoint_callback is not None
            and "EMA" in type(checkpoint_callback).__name__
        ):
            ext = checkpoint_callback.FILE_EXTENSION
            if ckpt_path.endswith(f"-EMA{ext}"):
                rank_zero_info(
                    "loading EMA based weights. "
                    "The callback will treat the loaded EMA weights as the main weights"
                    " and create a new EMA copy when training."
                )
                return
            ema_path = ckpt_path.replace(ext, f"-EMA{ext}")
            if os.path.exists(ema_path):
                ema_state_dict = torch.load(ema_path, map_location=torch.device("cpu"))
                checkpoint["optimizer_states"] = ema_state_dict["optimizer_states"]
                del ema_state_dict
                rank_zero_info("EMA state has been restored.")
            else:
                raise MisconfigurationException(
                    "Unable to find the associated EMA weights when re-loading, "
                    f"training will start with new EMA weights. Expected them to be at: {ema_path}",
                )
@torch.no_grad()
 def ema_update(ema_model_tuple, current_model_tuple, decay):
    """Update the EMA model with the current model."""
    torch._foreach_mul_(ema_model_tuple, decay)
    torch._foreach_add_(
        ema_model_tuple,
        current_model_tuple,
        alpha=(1.0 - decay),
    )
 def run_ema_update_cpu(ema_model_tuple, current_model_tuple, decay, pre_sync_stream=None):
    """Run EMA update on CPU."""
    if pre_sync_stream is not None:
        pre_sync_stream.synchronize()
    ema_update(ema_model_tuple, current_model_tuple, decay)
 class EMAOptimizer(torch.optim.Optimizer):
    r"""EMAOptimizer is a wrapper for torch.optim.Optimizer that computes Exponential Moving Average
    of parameters registered in the optimizer.
    EMA parameters are automatically updated after every step of the optimizer
    with the following formula:
        ema_weight = decay * ema_weight + (1 - decay) * training_weight
    To access EMA parameters, use ``swap_ema_weights()`` context manager to
    perform a temporary in-place swap of regular parameters with EMA
    parameters.
    Notes:
        - EMAOptimizer is not compatible with APEX AMP O2.
    Args:
        optimizer (torch.optim.Optimizer): optimizer to wrap
        device (torch.device): device for EMA parameters
        decay (float): decay factor
    Returns:
        returns an instance of torch.optim.Optimizer that computes EMA of
        parameters
    Example:
        model = Model().to(device)
        opt = torch.optim.Adam(model.parameters())
        opt = EMAOptimizer(opt, device, 0.9999)
        for epoch in range(epochs):
            training_loop(model, opt)
            regular_eval_accuracy = evaluate(model)
            with opt.swap_ema_weights():
                ema_eval_accuracy = evaluate(model)
    """
    def __init__(
        self,
        optimizer: torch.optim.Optimizer,
        device: torch.device,
        decay: float = 0.9999,
        every_n_steps: int = 1,
        current_step: int = 0,
    ):
        self.optimizer = optimizer
        self.decay = decay
        self.device = device
        self.current_step = current_step
        self.every_n_steps = every_n_steps
        self.save_original_optimizer_state = False
        self.first_iteration = True
        self.rebuild_ema_params = True
        self.stream = None
        self.thread = None
        self.ema_params = ()
        self.in_saving_ema_model_context = False
    def all_parameters(self) -> Iterable[torch.Tensor]:
        """Return an iterator over all parameters in the optimizer."""
        return (param for group in self.param_groups for param in group["params"])
    def step(self, closure=None, grad_scaler=None, **kwargs):
        """Perform a single optimization step."""
        self.join()
        if self.first_iteration:
            if any(p.is_cuda for p in self.all_parameters()):
                self.stream = torch.cuda.Stream()
            self.first_iteration = False
        if self.rebuild_ema_params:
            opt_params = list(self.all_parameters())
            self.ema_params += tuple(
                copy.deepcopy(param.data.detach()).to(self.device)
                for param in opt_params[len(self.ema_params) :]
            )
            self.rebuild_ema_params = False
        if (
            getattr(self.optimizer, "_step_supports_amp_scaling", False)
            and grad_scaler is not None
        ):
            loss = self.optimizer.step(closure=closure, grad_scaler=grad_scaler)
        else:
            loss = self.optimizer.step(closure)
        if self._should_update_at_step():
            self.update()
        self.current_step += 1
        return loss
    def _should_update_at_step(self) -> bool:
        """Check if the EMA parameters should be updated at the current step."""
        return self.current_step % self.every_n_steps == 0
    @torch.no_grad()
    def update(self):
        """Update the EMA parameters."""
        if self.stream is not None:
            self.stream.wait_stream(torch.cuda.current_stream())
        with torch.cuda.stream(self.stream):
            current_model_state = tuple(
                param.data.to(self.device, non_blocking=True) for param in self.all_parameters()
            )
            if self.device.type == "cuda":
                ema_update(self.ema_params, current_model_state, self.decay)
        if self.device.type == "cpu":
            self.thread = threading.Thread(
                target=run_ema_update_cpu,
                args=(
                    self.ema_params,
                    current_model_state,
                    self.decay,
                    self.stream,
                ),
            )
            self.thread.start()
    def swap_tensors(self, tensor1, tensor2):
        """Swaps the tensors in-place."""
        tmp = torch.empty_like(tensor1)
        tmp.copy_(tensor1)
        tensor1.copy_(tensor2)
        tensor2.copy_(tmp)
    def switch_main_parameter_weights(self, saving_ema_model: bool = False):
        """Switches the main parameter weights with the EMA weights."""
        self.join()
        self.in_saving_ema_model_context = saving_ema_model
        for param, ema_param in zip(self.all_parameters(), self.ema_params):
            self.swap_tensors(param.data, ema_param)
    @contextlib.contextmanager
    def swap_ema_weights(self, enabled: bool = True):
        r"""A context manager to in-place swap regular parameters with EMA parameters. It swaps back
        to the original regular parameters on context manager exit.
        Args:
            enabled (bool): whether the swap should be performed
        """
        if enabled:
            self.switch_main_parameter_weights()
        try:
            yield
        finally:
            if enabled:
                self.switch_main_parameter_weights()
    def __getattr__(self, name):
        """Forward all other attribute calls to the optimizer."""
        return getattr(self.optimizer, name)
    def join(self):
        """Wait for the update to complete."""
        if self.stream is not None:
            self.stream.synchronize()
        if self.thread is not None:
            self.thread.join()
    def state_dict(self):
        """Return the state dict for the optimizer."""
        self.join()
        if self.save_original_optimizer_state:
            return self.optimizer.state_dict()
        # if we are in the context of saving an EMA model, the EMA weights are in the modules' actual weights
        ema_params = (
            self.ema_params
            if not self.in_saving_ema_model_context
            else list(self.all_parameters())
        )
        state_dict = {
            "opt": self.optimizer.state_dict(),
            "ema": ema_params,
            "current_step": self.current_step,
            "decay": self.decay,
            "every_n_steps": self.every_n_steps,
        }
        return state_dict
    def load_state_dict(self, state_dict):
        """Load the state dict for the optimizer."""
        self.join()
        self.optimizer.load_state_dict(state_dict["opt"])
        self.ema_params = tuple(
            param.to(self.device) for param in copy.deepcopy(state_dict["ema"])
        )
        self.current_step = state_dict["current_step"]
        self.decay = state_dict["decay"]
        self.every_n_steps = state_dict["every_n_steps"]
        self.rebuild_ema_params = False
    def add_param_group(self, param_group):
        """Add a param group to the optimizer."""
        self.optimizer.add_param_group(param_group)
        self.rebuild_ema_params = True
 class EMAModelCheckpoint(ModelCheckpoint):
    """EMA version of ModelCheckpoint that saves EMA checkpoints as well."""
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def _ema_callback(self, trainer: "pl.Trainer") -> Optional[EMA]:
        """Returns the EMA callback if it exists."""
        ema_callback = None
        for callback in trainer.callbacks:
            if isinstance(callback, EMA):
                ema_callback = callback
        return ema_callback
    def _save_checkpoint(self, trainer: "pl.Trainer", filepath: str) -> None:
        """Saves the checkpoint file and the EMA checkpoint file if it exists."""
        ema_callback = self._ema_callback(trainer)
        if ema_callback is not None:
            with ema_callback.save_original_optimizer_state(trainer):
                super()._save_checkpoint(trainer, filepath)
            # save EMA copy of the model as well.
            with ema_callback.save_ema_model(trainer):
                filepath = self._ema_format_filepath(filepath)
                if self.verbose:
                    rank_zero_info(f"Saving EMA weights to separate checkpoint {filepath}")
                super()._save_checkpoint(trainer, filepath)
        else:
            super()._save_checkpoint(trainer, filepath)
    def _remove_checkpoint(self, trainer: "pl.Trainer", filepath: str) -> None:
        """Removes the checkpoint file and the EMA checkpoint file if it exists."""
        super()._remove_checkpoint(trainer, filepath)
        ema_callback = self._ema_callback(trainer)
        if ema_callback is not None:
            # remove EMA copy of the state dict as well.
            filepath = self._ema_format_filepath(filepath)
            super()._remove_checkpoint(trainer, filepath)
    def _ema_format_filepath(self, filepath: str) -> str:
        """Appends '-EMA' to the filepath."""
        return filepath.replace(self.FILE_EXTENSION, f"-EMA{self.FILE_EXTENSION}")
    def _has_ema_ckpts(self, checkpoints: Iterable[Path]) -> bool:
        """Checks if any of the checkpoints are EMA checkpoints."""
        return any(self._is_ema_filepath(checkpoint_path) for checkpoint_path in checkpoints)
    def _is_ema_filepath(self, filepath: Union[Path, str]) -> bool:
        """Checks if the filepath is an EMA checkpoint."""
        return str(filepath).endswith(f"-EMA{self.FILE_EXTENSION}")
    @property
    def _saved_checkpoint_paths(self) -> Iterable[Path]:
        """Returns all the saved checkpoint paths in the directory."""
        return Path(self.dirpath).rglob("*.ckpt")
--- a/flowdock/models/components/cpm.py
+++ b/flowdock/models/components/cpm.py
@@ -0,0 +1,857 @@
 # Adapted from: https://github.com/zrqiao/NeuralPLexer
 import random
 import rootutils
 import torch
 from beartype.typing import Any, Dict, Optional, Tuple
 from omegaconf import DictConfig
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.models.components.embedding import (
    GaussianFourierEncoding1D,
    RelativeGeometryEncoding,
 )
 from flowdock.models.components.hetero_graph import make_multi_relation_graph_batcher
 from flowdock.models.components.modules import (
    BiDirectionalTriangleAttention,
    TransformerLayer,
 )
 from flowdock.utils import RankedLogger
 from flowdock.utils.frame_utils import cartesian_to_internal, get_frame_matrix
 from flowdock.utils.model_utils import GELUMLP
 log = RankedLogger(__name__, rank_zero_only=True)
 STATE_DICT = Dict[str, Any]
 class ProtFormer(torch.nn.Module):
    """Protein relational reasoning with downsampled edges."""
    def __init__(
        self,
        dim: int,
        pair_dim: int,
        n_blocks: int = 4,
        n_heads: int = 8,
        dropout: float = 0.0,
    ):
        """Initialize the ProtFormer model."""
        super().__init__()
        self.dim = dim
        self.pair_dim = pair_dim
        self.n_heads = n_heads
        self.n_blocks = n_blocks
        self.time_encoding = GaussianFourierEncoding1D(16)
        self.res_in_mlp = GELUMLP(dim + 32, dim, dropout=dropout)
        self.chain_pos_encoding = GaussianFourierEncoding1D(self.pair_dim // 4)
        self.rel_geom_enc = RelativeGeometryEncoding(15, self.pair_dim)
        self.template_nenc = GELUMLP(64 + 37 * 3, self.dim, n_hidden_feats=128)
        self.template_eenc = RelativeGeometryEncoding(15, self.pair_dim)
        self.template_binding_site_enc = torch.nn.Linear(1, 64, bias=False)
        self.pp_edge_embed = GELUMLP(
            pair_dim + self.pair_dim // 4 * 2 + dim * 2,
            self.pair_dim,
            n_hidden_feats=dim,
            dropout=dropout,
        )
        self.graph_stacks = torch.nn.ModuleList(
            [
                TransformerLayer(
                    dim,
                    n_heads,
                    head_dim=pair_dim // n_heads,
                    edge_channels=pair_dim,
                    edge_update=True,
                    dropout=dropout,
                )
                for _ in range(self.n_blocks)
            ]
        )
        self.ABab_mha = TransformerLayer(
            pair_dim,
            n_heads,
            bidirectional=True,
        )
        self.triangle_stacks = torch.nn.ModuleList(
            [
                BiDirectionalTriangleAttention(pair_dim, pair_dim // n_heads, n_heads)
                for _ in range(self.n_blocks)
            ]
        )
        self.graph_relations = [
            (
                "residue_to_residue",
                "gather_idx_ab_a",
                "gather_idx_ab_b",
                "prot_res",
                "prot_res",
            ),
            (
                "sampled_residue_to_sampled_residue",
                "gather_idx_AB_a",
                "gather_idx_AB_b",
                "prot_res",
                "prot_res",
            ),
        ]
    def compute_chain_pe(
        self,
        residue_index,
        res_chain_index,
        src_rid,
        dst_rid,
    ):
        """Compute chain positional encoding for a pair of residues."""
        chain_disp = residue_index[src_rid] - residue_index[dst_rid]
        chain_rope = self.chain_pos_encoding(chain_disp.div(8).unsqueeze(-1)).div(
            chain_disp.div(8).abs().add(1).unsqueeze(-1)
        )
        # Mask cross-chain entries
        chain_mask = res_chain_index[src_rid] == res_chain_index[dst_rid]
        chain_rope = chain_rope * chain_mask[..., None]
        return chain_rope
    def compute_chain_pair_pe(
        self,
        residue_index,
        res_chain_index,
        AB_broadcasted_rid,
        ab_rid,
        AB_broadcasted_cid,
        ab_cid,
    ):
        """Compute chain positional encoding for a pair of residues."""
        chain_disp_row = residue_index[AB_broadcasted_rid] - residue_index[ab_rid]
        chain_disp_col = residue_index[AB_broadcasted_cid] - residue_index[ab_cid]
        chain_disp = torch.stack([chain_disp_row, chain_disp_col], dim=-1)
        chain_rope = self.chain_pos_encoding(chain_disp.div(8).unsqueeze(-1)).div(
            chain_disp.div(8).abs().add(1).unsqueeze(-1)
        )
        # Mask cross-chain entries
        chain_mask_row = res_chain_index[AB_broadcasted_rid] == res_chain_index[ab_rid]
        chain_mask_col = res_chain_index[AB_broadcasted_cid] == res_chain_index[ab_cid]
        chain_mask = torch.stack([chain_mask_row, chain_mask_col], dim=-1)
        chain_rope = (chain_rope * chain_mask[..., None]).flatten(-2, -1)
        return chain_rope
    def eval_protein_template_encodings(self, batch, edge_idx, use_plddt=False):
        """Evaluate template encodings for protein residues."""
        with torch.no_grad():
            template_bb_coords = batch["features"]["apo_res_atom_positions"][:, :3]
            template_bb_frames = get_frame_matrix(
                template_bb_coords[:, 0, :],
                template_bb_coords[:, 1, :],
                template_bb_coords[:, 2, :],
            )
            # Add template local representations & lddt
            template_local_coords = cartesian_to_internal(
                batch["features"]["apo_res_atom_positions"],
                template_bb_frames.unsqueeze(1),
            )
            template_local_coords[~batch["features"]["apo_res_atom_mask"].bool()] = 0
            # if use_plddt:
            #     template_plddt_enc = F.one_hot(
            #         torch.bucketize(
            #             batch["features"]["apo_pLDDT"],
            #             torch.linspace(0, 1, 65, device=template_bb_coords.device)[:-1],
            #             right=True,
            #         )
            #         - 1,
            #         num_classes=64,
            #     )
            # else:
            #     template_plddt_enc = torch.zeros(
            #         template_local_coords.shape[0], 64, device=template_bb_coords.device
            #     )
            if self.training:
                use_sidechain_coords = random.randint(0, 1)  # nosec
                template_local_coords = template_local_coords * use_sidechain_coords
                # use_plddt_input = random.randint(0, 1)
                # template_plddt_enc = template_plddt_enc * use_plddt_input
            if "binding_site_mask" in batch["features"].keys():
                # Externally-specified binding residue list
                binding_site_enc = self.template_binding_site_enc(
                    batch["features"]["binding_site_mask"][:, None].float()
                )
            else:
                binding_site_enc = torch.zeros(
                    template_local_coords.shape[0], 64, device=template_bb_coords.device
                )
            template_nfeat = self.template_nenc(
                torch.cat([template_local_coords.flatten(-2, -1), binding_site_enc], dim=-1)
            )
            template_efeat = self.template_eenc(template_bb_frames, edge_idx)
            template_alignment_mask = batch["features"]["apo_res_alignment_mask"].float()
        if self.training:
            # template_alignment_mask = template_alignment_mask * use_template
            nomasking_rate = random.randint(9, 10) / 10  # nosec
            template_alignment_mask = template_alignment_mask * (
                torch.rand_like(template_alignment_mask) < nomasking_rate
            )
        template_nfeat = template_nfeat * template_alignment_mask.unsqueeze(-1)
        template_efeat = (
            template_efeat
            * template_alignment_mask[edge_idx[0]].unsqueeze(-1)
            * template_alignment_mask[edge_idx[1]].unsqueeze(-1)
        )
        return template_nfeat, template_efeat
    def forward(self, batch, **kwargs):
        """Forward pass of the ProtFormer model."""
        return self.forward_prot_sample(batch, **kwargs)
    def forward_prot_sample(
        self,
        batch,
        embed_coords=True,
        in_attr_suffix="",
        out_attr_suffix="",
        use_template=False,
        use_plddt=False,
        **kwargs,
    ):
        """Forward pass of the ProtFormer model for a single protein sample."""
        features = batch["features"]
        indexer = batch["indexer"]
        metadata = batch["metadata"]
        device = features["res_type"].device
        time_encoding = self.time_encoding(features["timestep_encoding_prot"])
        if not embed_coords:
            time_encoding = torch.zeros_like(time_encoding)
        residue_rep = (
            self.res_in_mlp(
                torch.cat(
                    [
                        features["res_embedding_in"],
                        time_encoding,
                    ],
                    dim=1,
                )
            )
            + features["res_embedding_in"]
        )
        batch_size = metadata["num_structid"]
        # Prepare indexers
        # Use max to ensure segmentation faults are 100% invoked
        # in case there are any bad indices
        max(metadata["num_a_per_sample"])
        n_protein_patches = batch["metadata"]["n_prot_patches_per_sample"]
        indexer["gather_idx_pid_b"] = indexer["gather_idx_pid_a"]
        # Evaluate gather_idx_AB_a and gather_idx_AB_b
        # Assign a to rows and b to columns
        # Simple broadcasting for single-structure batches
        indexer["gather_idx_AB_a"] = (
            indexer["gather_idx_pid_a"]
            .view(batch_size, n_protein_patches)[:, :, None]
            .expand(-1, -1, n_protein_patches)
            .contiguous()
            .flatten()
        )
        indexer["gather_idx_AB_b"] = (
            indexer["gather_idx_pid_b"]
            .view(batch_size, n_protein_patches)[:, None, :]
            .expand(-1, n_protein_patches, -1)
            .contiguous()
            .flatten()
        )
        # Handle all batch offsets here
        graph_batcher = make_multi_relation_graph_batcher(self.graph_relations, indexer, metadata)
        merged_edge_idx = graph_batcher.collate_idx_list(indexer)
        input_protein_coords_padded = features["input_protein_coords"]
        backbone_frames = get_frame_matrix(
            input_protein_coords_padded[:, 0, :],
            input_protein_coords_padded[:, 1, :],
            input_protein_coords_padded[:, 2, :],
        )
        batch["features"]["backbone_frames"] = backbone_frames
        # Adding geometrical info to pair representations
        chain_pe = self.compute_chain_pe(
            features["residue_index"],
            features["res_chain_id"],
            merged_edge_idx[0],
            merged_edge_idx[1],
        )
        geometry_pe = self.rel_geom_enc(backbone_frames, merged_edge_idx)
        if not embed_coords:
            geometry_pe = torch.zeros_like(geometry_pe)
        merged_edge_reps = self.pp_edge_embed(
            torch.cat(
                [
                    geometry_pe,
                    chain_pe,
                    residue_rep[merged_edge_idx[0]],
                    residue_rep[merged_edge_idx[1]],
                ],
                dim=-1,
            )
        )
        if use_template and "apo_res_atom_positions" in features.keys():
            (
                template_res_encodings,
                template_geom_encodings,
            ) = self.eval_protein_template_encodings(batch, merged_edge_idx, use_plddt=use_plddt)
            residue_rep = residue_rep + template_res_encodings
            merged_edge_reps = merged_edge_reps + template_geom_encodings
        edge_reps = graph_batcher.offload_edge_attr(merged_edge_reps)
        node_reps = {"prot_res": residue_rep}
        gather_idx_res_protpatch = indexer["gather_idx_a_pid"]
        # Pointer: AB->AB, ab->AB
        gather_idx_ab_AB = (
            indexer["gather_idx_ab_structid"] * n_protein_patches**2
            + (gather_idx_res_protpatch % n_protein_patches)[indexer["gather_idx_ab_a"]]
            * n_protein_patches
            + (gather_idx_res_protpatch % n_protein_patches)[indexer["gather_idx_ab_b"]]
        )
        # Intertwine graph iterations and triangle iterations
        for block_id in range(self.n_blocks):
            # Communicate between atomistic and patch resolutions
            # Up-sampling for interface edge embeddings
            rec_pair_rep = edge_reps["residue_to_residue"]
            AB_grid_attr_flat = edge_reps["sampled_residue_to_sampled_residue"]
            # Upper-left block: intra-window visual-attention
            # Cross-attention between random and grid edges
            rec_pair_rep, AB_grid_attr_flat = self.ABab_mha(
                rec_pair_rep,
                AB_grid_attr_flat,
                (
                    torch.arange(metadata["num_ab"], device=device),
                    gather_idx_ab_AB,
                ),
            )
            AB_grid_attr = AB_grid_attr_flat.view(
                batch_size,
                n_protein_patches,
                n_protein_patches,
                self.pair_dim,
            )
            # Inter-patch triangle attentions, refining intermolecular edges
            _, AB_grid_attr = self.triangle_stacks[block_id](
                AB_grid_attr,
                AB_grid_attr,
                AB_grid_attr.unsqueeze(-4),
            )
            # Transfer grid-formatted representations to edges
            edge_reps["residue_to_residue"] = rec_pair_rep
            edge_reps["sampled_residue_to_sampled_residue"] = AB_grid_attr.flatten(0, 2)
            merged_node_reps = graph_batcher.collate_node_attr(node_reps)
            merged_edge_reps = graph_batcher.collate_edge_attr(edge_reps)
            # Graph transformer iteration
            _, merged_node_reps, merged_edge_reps = self.graph_stacks[block_id](
                merged_node_reps,
                merged_node_reps,
                merged_edge_idx,
                merged_edge_reps,
            )
            node_reps = graph_batcher.offload_node_attr(merged_node_reps)
            edge_reps = graph_batcher.offload_edge_attr(merged_edge_reps)
        batch["features"][f"rec_res_attr{out_attr_suffix}"] = node_reps["prot_res"]
        batch["features"][f"res_res_pair_attr{out_attr_suffix}"] = edge_reps["residue_to_residue"]
        batch["features"][f"res_res_grid_attr_flat{out_attr_suffix}"] = edge_reps[
            "sampled_residue_to_sampled_residue"
        ]
        batch["indexer"]["gather_idx_AB_a"] = indexer["gather_idx_AB_a"]
        batch["indexer"]["gather_idx_AB_b"] = indexer["gather_idx_AB_b"]
        batch["indexer"]["gather_idx_ab_AB"] = gather_idx_ab_AB
        return batch
 class BindingFormer(ProtFormer):
    """Edge inference on protein-ligand graphs."""
    def __init__(
        self,
        dim: int,
        pair_dim: int,
        n_blocks: int = 4,
        n_heads: int = 8,
        n_ligand_patches: int = 16,
        dropout: float = 0.0,
    ):
        """Initialize the BindingFormer model."""
        super().__init__(
            dim,
            pair_dim,
            n_blocks,
            n_heads,
            dropout,
        )
        self.dim = dim
        self.n_heads = n_heads
        self.n_blocks = n_blocks
        self.n_ligand_patches = n_ligand_patches
        self.pl_edge_embed = GELUMLP(dim * 2, self.pair_dim, n_hidden_feats=dim, dropout=dropout)
        self.AaJ_mha = TransformerLayer(pair_dim, n_heads, bidirectional=True)
        self.graph_relations = [
            (
                "residue_to_residue",
                "gather_idx_ab_a",
                "gather_idx_ab_b",
                "prot_res",
                "prot_res",
            ),
            (
                "sampled_residue_to_sampled_residue",
                "gather_idx_AB_a",
                "gather_idx_AB_b",
                "prot_res",
                "prot_res",
            ),
            (
                "sampled_residue_to_sampled_lig_triplet",
                "gather_idx_AJ_a",
                "gather_idx_AJ_J",
                "prot_res",
                "lig_trp",
            ),
            (
                "sampled_lig_triplet_to_sampled_residue",
                "gather_idx_AJ_J",
                "gather_idx_AJ_a",
                "lig_trp",
                "prot_res",
            ),
            (
                "residue_to_sampled_lig_triplet",
                "gather_idx_aJ_a",
                "gather_idx_aJ_J",
                "prot_res",
                "lig_trp",
            ),
            (
                "sampled_lig_triplet_to_residue",
                "gather_idx_aJ_J",
                "gather_idx_aJ_a",
                "lig_trp",
                "prot_res",
            ),
            (
                "sampled_lig_triplet_to_sampled_lig_triplet",
                "gather_idx_IJ_I",
                "gather_idx_IJ_J",
                "lig_trp",
                "lig_trp",
            ),
        ]
    def forward(
        self,
        batch,
        observed_block_contacts=None,
        in_attr_suffix="",
        out_attr_suffix="",
    ):
        """Forward pass of the BindingFormer model."""
        features = batch["features"]
        indexer = batch["indexer"]
        metadata = batch["metadata"]
        device = features["res_type"].device
        # Synchronize with a language model
        residue_rep = features[f"rec_res_attr{in_attr_suffix}"]
        rec_pair_rep = features[f"res_res_pair_attr{in_attr_suffix}"]
        # Inherit the last-layer pair representations from protein encoder
        AB_grid_attr_flat = features[f"res_res_grid_attr_flat{in_attr_suffix}"]
        # Prepare indexers
        batch_size = metadata["num_structid"]
        n_a_per_sample = max(metadata["num_a_per_sample"])
        n_protein_patches = batch["metadata"]["n_prot_patches_per_sample"]
        if not batch["misc"]["protein_only"]:
            n_ligand_patches = max(metadata["num_I_per_sample"])
            max(metadata["num_molid_per_sample"])
            lig_frame_rep = features[f"lig_trp_attr{in_attr_suffix}"]
            UI_grid_attr = features["lig_af_grid_attr_projected"]
            IJ_grid_attr = (UI_grid_attr + UI_grid_attr.transpose(1, 2)) / 2
            aJ_grid_attr = self.pl_edge_embed(
                torch.cat(
                    [
                        residue_rep.view(batch_size, n_a_per_sample, self.dim)[:, :, None].expand(
                            -1, -1, n_ligand_patches, -1
                        ),
                        lig_frame_rep.view(batch_size, n_ligand_patches, self.dim)[
                            :, None, :
                        ].expand(-1, n_a_per_sample, -1, -1),
                    ],
                    dim=-1,
                )
            )
            AJ_grid_attr = IJ_grid_attr.new_zeros(
                batch_size, n_protein_patches, n_ligand_patches, self.pair_dim
            )
            gather_idx_I_I = torch.arange(
                batch_size * n_ligand_patches, device=AJ_grid_attr.device
            )
            gather_idx_a_a = torch.arange(batch_size * n_a_per_sample, device=AJ_grid_attr.device)
            # Note: off-diagonal (AJ) blocks are zero-initialized in the prior stack
            indexer["gather_idx_IJ_I"] = (
                gather_idx_I_I.view(batch_size, n_ligand_patches)[:, :, None]
                .expand(-1, -1, n_ligand_patches)
                .contiguous()
                .flatten()
            )
            indexer["gather_idx_IJ_J"] = (
                gather_idx_I_I.view(batch_size, n_ligand_patches)[:, None, :]
                .expand(-1, n_ligand_patches, -1)
                .contiguous()
                .flatten()
            )
            indexer["gather_idx_AJ_a"] = (
                indexer["gather_idx_pid_a"]
                .view(batch_size, n_protein_patches)[:, :, None]
                .expand(-1, -1, n_ligand_patches)
                .contiguous()
                .flatten()
            )
            indexer["gather_idx_AJ_J"] = (
                gather_idx_I_I.view(batch_size, n_ligand_patches)[:, None, :]
                .expand(-1, n_protein_patches, -1)
                .contiguous()
                .flatten()
            )
            indexer["gather_idx_aJ_a"] = (
                gather_idx_a_a.view(batch_size, n_a_per_sample)[:, :, None]
                .expand(-1, -1, n_ligand_patches)
                .contiguous()
                .flatten()
            )
            indexer["gather_idx_aJ_J"] = (
                gather_idx_I_I.view(batch_size, n_ligand_patches)[:, None, :]
                .expand(-1, n_a_per_sample, -1)
                .contiguous()
                .flatten()
            )
            batch["indexer"] = indexer
            if observed_block_contacts is not None:
                # Generative feedback from block one-hot sampling
                # AJ_grid_attr = (
                #     AJ_grid_attr
                #     + observed_block_contacts.transpose(1, 2)
                #     .contiguous()
                #     .flatten(0, 1)[indexer["gather_idx_I_molid"]]
                #     .view(batch_size, n_ligand_patches, n_protein_patches, -1)
                #     .transpose(1, 2)
                #     .contiguous()
                # )
                AJ_grid_attr = AJ_grid_attr + observed_block_contacts
            graph_batcher = make_multi_relation_graph_batcher(
                self.graph_relations, indexer, metadata
            )
            merged_edge_idx = graph_batcher.collate_idx_list(indexer)
            node_reps = {
                "prot_res": residue_rep,
                "lig_trp": lig_frame_rep,
            }
            edge_reps = {
                "residue_to_residue": rec_pair_rep,
                "sampled_residue_to_sampled_residue": AB_grid_attr_flat,
                "sampled_lig_triplet_to_sampled_residue": AJ_grid_attr.flatten(0, 2),
                "sampled_residue_to_sampled_lig_triplet": AJ_grid_attr.flatten(0, 2),
                "sampled_lig_triplet_to_residue": aJ_grid_attr.flatten(0, 2),
                "residue_to_sampled_lig_triplet": aJ_grid_attr.flatten(0, 2),
                "sampled_lig_triplet_to_sampled_lig_triplet": IJ_grid_attr.flatten(0, 2),
            }
            edge_reps = graph_batcher.zero_pad_edge_attr(edge_reps, self.dim, device)
        else:
            graph_batcher = make_multi_relation_graph_batcher(
                self.graph_relations[:2], indexer, metadata
            )
            merged_edge_idx = graph_batcher.collate_idx_list(indexer)
            node_reps = {
                "prot_res": residue_rep,
            }
            edge_reps = {
                "residue_to_residue": rec_pair_rep,
                "sampled_residue_to_sampled_residue": AB_grid_attr_flat,
            }
            edge_reps = graph_batcher.zero_pad_edge_attr(edge_reps, self.dim, device)
        # Intertwine graph iterations and triangle iterations
        gather_idx_res_protpatch = indexer["gather_idx_a_pid"]
        for block_id in range(self.n_blocks):
            # Communicate between atomistic and patch resolutions
            # Up-sampling for interface edge embeddings
            rec_pair_rep = edge_reps["residue_to_residue"]
            AB_grid_attr_flat = edge_reps["sampled_residue_to_sampled_residue"]
            AB_grid_attr = AB_grid_attr_flat.view(
                batch_size,
                n_protein_patches,
                n_protein_patches,
                self.pair_dim,
            )
            if not batch["misc"]["protein_only"]:
                # Symmetrize off-diagonal blocks
                AJ_grid_attr_flat_ = (
                    edge_reps["sampled_residue_to_sampled_lig_triplet"]
                    + edge_reps["sampled_lig_triplet_to_sampled_residue"]
                ) / 2
                AJ_grid_attr = AJ_grid_attr_flat_.contiguous().view(
                    batch_size, n_protein_patches, n_ligand_patches, -1
                )
                aJ_grid_attr_flat_ = (
                    edge_reps["residue_to_sampled_lig_triplet"]
                    + edge_reps["sampled_lig_triplet_to_residue"]
                ) / 2
                aJ_grid_attr = aJ_grid_attr_flat_.contiguous().view(
                    batch_size, n_a_per_sample, n_ligand_patches, -1
                )
                IJ_grid_attr = (
                    edge_reps["sampled_lig_triplet_to_sampled_lig_triplet"]
                    .contiguous()
                    .view(batch_size, n_ligand_patches, n_ligand_patches, -1)
                )
                AJ_grid_attr_temp_, aJ_grid_attr_temp_ = self.AaJ_mha(
                    AJ_grid_attr.flatten(0, 1),
                    aJ_grid_attr.flatten(0, 1),
                    (
                        gather_idx_res_protpatch,
                        torch.arange(gather_idx_res_protpatch.shape[0], device=device),
                    ),
                )
                AJ_grid_attr = AJ_grid_attr_temp_.contiguous().view(
                    batch_size, n_protein_patches, n_ligand_patches, -1
                )
                aJ_grid_attr = aJ_grid_attr_temp_.contiguous().view(
                    batch_size, n_a_per_sample, n_ligand_patches, -1
                )
                merged_grid_rep = torch.cat(
                    [
                        torch.cat([AB_grid_attr, AJ_grid_attr], dim=2),
                        torch.cat([AJ_grid_attr.transpose(1, 2), IJ_grid_attr], dim=2),
                    ],
                    dim=1,
                )
            else:
                merged_grid_rep = AB_grid_attr
            # Inter-patch triangle attentions
            _, merged_grid_rep = self.triangle_stacks[block_id](
                merged_grid_rep,
                merged_grid_rep,
                merged_grid_rep.unsqueeze(-4),
            )
            # Dis-assemble the grid representation
            AB_grid_attr = merged_grid_rep[:, :n_protein_patches, :n_protein_patches]
            # Transfer grid-formatted representations to edges
            edge_reps["residue_to_residue"] = rec_pair_rep
            edge_reps["sampled_residue_to_sampled_residue"] = AB_grid_attr.flatten(0, 2)
            if not batch["misc"]["protein_only"]:
                AJ_grid_attr = merged_grid_rep[
                    :, :n_protein_patches, n_protein_patches:
                ].contiguous()
                IJ_grid_attr = merged_grid_rep[
                    :, n_protein_patches:, n_protein_patches:
                ].contiguous()
                edge_reps["sampled_residue_to_sampled_lig_triplet"] = AJ_grid_attr.flatten(0, 2)
                edge_reps["sampled_lig_triplet_to_sampled_residue"] = AJ_grid_attr.flatten(0, 2)
                edge_reps["residue_to_sampled_lig_triplet"] = aJ_grid_attr.flatten(0, 2)
                edge_reps["sampled_lig_triplet_to_residue"] = aJ_grid_attr.flatten(0, 2)
                edge_reps["sampled_lig_triplet_to_sampled_lig_triplet"] = IJ_grid_attr.flatten(
                    0, 2
                )
            merged_node_reps = graph_batcher.collate_node_attr(node_reps)
            merged_edge_reps = graph_batcher.collate_edge_attr(edge_reps)
            # Graph transformer iteration
            _, merged_node_reps, merged_edge_reps = self.graph_stacks[block_id](
                merged_node_reps,
                merged_node_reps,
                merged_edge_idx,
                merged_edge_reps,
            )
            node_reps = graph_batcher.offload_node_attr(merged_node_reps)
            edge_reps = graph_batcher.offload_edge_attr(merged_edge_reps)
        batch["features"][f"rec_res_attr{out_attr_suffix}"] = node_reps["prot_res"]
        batch["features"][f"res_res_pair_attr{out_attr_suffix}"] = edge_reps["residue_to_residue"]
        batch["features"][f"res_res_grid_attr_flat{out_attr_suffix}"] = edge_reps[
            "sampled_residue_to_sampled_residue"
        ]
        if not batch["misc"]["protein_only"]:
            batch["features"][f"lig_trp_attr{out_attr_suffix}"] = node_reps["lig_trp"]
            batch["features"][f"res_trp_grid_attr_flat{out_attr_suffix}"] = edge_reps[
                "sampled_residue_to_sampled_lig_triplet"
            ]
            batch["features"][f"res_trp_pair_attr_flat{out_attr_suffix}"] = edge_reps[
                "residue_to_sampled_lig_triplet"
            ]
            batch["features"][f"trp_trp_grid_attr_flat{out_attr_suffix}"] = edge_reps[
                "sampled_lig_triplet_to_sampled_lig_triplet"
            ]
            batch["metadata"]["n_lig_patches_per_sample"] = n_ligand_patches
        return batch
 def resolve_protein_encoder(
    protein_model_cfg: DictConfig,
    task_cfg: DictConfig,
    state_dict: Optional[STATE_DICT] = None,
 ) -> Tuple[torch.nn.Module, torch.nn.Module]:
    """Instantiates a ProtFormer model for protein encoding.
    :param protein_model_cfg: Protein model configuration.
    :param task_cfg: Task configuration.
    :param state_dict: Optional (potentially-pretrained) state dictionary.
    :return: Protein encoder model and residue input projector.
    """
    node_dim = protein_model_cfg.residue_dim
    model = ProtFormer(
        node_dim,
        protein_model_cfg.pair_dim,
        n_heads=protein_model_cfg.n_heads,
        n_blocks=protein_model_cfg.n_encoder_stacks,
        dropout=task_cfg.dropout,
    )
    if protein_model_cfg.use_esm_embedding:
        # protein sequence language model
        res_in_projector = torch.nn.Linear(protein_model_cfg.plm_embed_dim, node_dim, bias=False)
    else:
        # one-hot amino acid types
        res_in_projector = torch.nn.Linear(
            protein_model_cfg.n_aa_types,
            node_dim,
            bias=False,
        )
    if protein_model_cfg.from_pretrained and state_dict is not None:
        try:
            # NOTE: we must avoid enforcing strict key matching
            # due to the (new) weights `template_binding_site_enc.weight`
            model.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("protein_encoder")
                },
                strict=False,
            )
            log.info("Successfully loaded pretrained protein encoder weights.")
        except Exception as e:
            log.warning(f"Skipping loading of pretrained protein encoder weights due to: {e}.")
        try:
            res_in_projector.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith(
                        "plm_adapter"
                        if protein_model_cfg.use_esm_embedding
                        else "res_in_projector"
                    )
                }
            )
            log.info("Successfully loaded pretrained protein input projector weights.")
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained protein input projector weights due to: {e}."
            )
    return model, res_in_projector
 def resolve_pl_contact_stack(
    protein_model_cfg: DictConfig,
    ligand_model_cfg: DictConfig,
    contact_cfg: DictConfig,
    task_cfg: DictConfig,
    state_dict: Optional[STATE_DICT] = None,
 ) -> Tuple[torch.nn.Module, torch.nn.Module, torch.nn.Module, torch.nn.Module]:
    """Instantiates a BindingFormer model for protein-ligand contact prediction.
    :param protein_model_cfg: Protein model configuration.
    :param ligand_model_cfg: Ligand model configuration.
    :param contact_cfg: Contact prediction configuration.
    :param task_cfg: Task configuration.
    :param state_dict: Optional (potentially-pretrained) state dictionary.
    :return: Protein-ligand contact prediction model, contact code embedding, distance bins, and
        distogram head.
    """
    pl_contact_stack = BindingFormer(
        protein_model_cfg.residue_dim,
        protein_model_cfg.pair_dim,
        n_heads=protein_model_cfg.n_heads,
        n_blocks=contact_cfg.n_stacks,
        n_ligand_patches=ligand_model_cfg.n_patches,
        dropout=contact_cfg.dropout if contact_cfg.get("dropout") else task_cfg.dropout,
    )
    contact_code_embed = torch.nn.Embedding(2, protein_model_cfg.pair_dim)
    # Distogram heads
    dist_bins = torch.nn.Parameter(torch.linspace(2, 22, 32), requires_grad=False)
    dgram_head = GELUMLP(
        protein_model_cfg.pair_dim,
        32,
        n_hidden_feats=protein_model_cfg.pair_dim,
        zero_init=True,
    )
    if contact_cfg.from_pretrained and state_dict is not None:
        try:
            # NOTE: we must avoid enforcing strict key matching
            # due to the (new) weights `template_binding_site_enc.weight`
            pl_contact_stack.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("pl_contact_stack")
                },
                strict=False,
            )
            log.info("Successfully loaded pretrained protein-ligand contact prediction weights.")
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained protein-ligand contact prediction weights due to: {e}."
            )
        try:
            contact_code_embed.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("contact_code_embed")
                }
            )
            log.info("Successfully loaded pretrained contact code embedding weights.")
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained contact code embedding weights due to: {e}."
            )
        try:
            dgram_head.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("dgram_head")
                }
            )
            log.info("Successfully loaded pretrained distogram head weights.")
        except Exception as e:
            log.warning(f"Skipping loading of pretrained distogram head weights due to: {e}.")
    return pl_contact_stack, contact_code_embed, dist_bins, dgram_head
--- a/flowdock/models/components/embedding.py
+++ b/flowdock/models/components/embedding.py
@@ -0,0 +1,105 @@
 # Adapted from: https://github.com/zrqiao/NeuralPLexer
 import math
 import rootutils
 import torch
 from beartype.typing import Tuple
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils.frame_utils import RigidTransform
 class GaussianFourierEncoding1D(torch.nn.Module):
    """Gaussian Fourier Encoding for 1D data."""
    def __init__(
        self,
        n_basis: int,
        eps: float = 1e-2,
    ):
        """Initialize Gaussian Fourier Encoding."""
        super().__init__()
        self.eps = eps
        self.fourier_freqs = torch.nn.Parameter(torch.randn(n_basis) * math.pi)
    def forward(
        self,
        x: torch.Tensor,
    ):
        """Forward pass of Gaussian Fourier Encoding."""
        encodings = torch.cat(
            [
                torch.sin(self.fourier_freqs.mul(x)),
                torch.cos(self.fourier_freqs.mul(x)),
            ],
            dim=-1,
        )
        return encodings
 class GaussianRBFEncoding1D(torch.nn.Module):
    """Gaussian RBF Encoding for 1D data."""
    def __init__(
        self,
        n_basis: int,
        x_max: float,
        sigma: float = 1.0,
    ):
        """Initialize Gaussian RBF Encoding."""
        super().__init__()
        self.sigma = sigma
        self.rbf_centers = torch.nn.Parameter(
            torch.linspace(0, x_max, n_basis), requires_grad=False
        )
    def forward(
        self,
        x: torch.Tensor,
    ):
        """Forward pass of Gaussian RBF Encoding."""
        encodings = torch.exp(-((x.unsqueeze(-1) - self.rbf_centers).div(self.sigma).square()))
        return encodings
 class RelativeGeometryEncoding(torch.nn.Module):
    "Compute radial basis functions and iterresidue/pseudoresidue orientations."
    def __init__(self, n_basis: int, out_dim: int, d_max: float = 20.0):
        """Initialize RelativeGeometryEncoding."""
        super().__init__()
        self.rbf_encoding = GaussianRBFEncoding1D(n_basis, d_max)
        self.rel_geom_projector = torch.nn.Linear(n_basis + 15, out_dim, bias=False)
    def forward(self, frames: RigidTransform, merged_edge_idx: Tuple[torch.Tensor, torch.Tensor]):
        """Forward pass of RelativeGeometryEncoding."""
        frame_t, frame_R = frames.t, frames.R
        pair_dists = torch.norm(
            frame_t[merged_edge_idx[0]] - frame_t[merged_edge_idx[1]],
            dim=-1,
        )
        pair_directions_l = torch.matmul(
            (frame_t[merged_edge_idx[1]] - frame_t[merged_edge_idx[0]]).unsqueeze(-2),
            frame_R[merged_edge_idx[0]],
        ).squeeze(-2) / pair_dists.square().add(1).sqrt().unsqueeze(-1)
        pair_directions_r = torch.matmul(
            (frame_t[merged_edge_idx[0]] - frame_t[merged_edge_idx[1]]).unsqueeze(-2),
            frame_R[merged_edge_idx[1]],
        ).squeeze(-2) / pair_dists.square().add(1).sqrt().unsqueeze(-1)
        pair_orientations = torch.matmul(
            frame_R.transpose(-2, -1).contiguous()[merged_edge_idx[0]],
            frame_R[merged_edge_idx[1]],
        )
        return self.rel_geom_projector(
            torch.cat(
                [
                    self.rbf_encoding(pair_dists),
                    pair_directions_l,
                    pair_directions_r,
                    pair_orientations.flatten(-2, -1),
                ],
                dim=-1,
            )
        )
--- a/flowdock/models/components/esdm.py
+++ b/flowdock/models/components/esdm.py
@@ -0,0 +1,884 @@
 # Adapted from: https://github.com/zrqiao/NeuralPLexer
 import rootutils
 import torch
 from beartype.typing import Any, Dict, Optional, Tuple
 from omegaconf import DictConfig
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.models.components.embedding import (
    GaussianFourierEncoding1D,
    RelativeGeometryEncoding,
 )
 from flowdock.models.components.hetero_graph import make_multi_relation_graph_batcher
 from flowdock.models.components.modules import PointSetAttention
 from flowdock.utils import RankedLogger
 from flowdock.utils.frame_utils import RigidTransform, get_frame_matrix
 from flowdock.utils.model_utils import GELUMLP, AveragePooling, SumPooling, segment_mean
 STATE_DICT = Dict[str, Any]
 log = RankedLogger(__name__, rank_zero_only=True)
 class LocalUpdateUsingReferenceRotations(torch.nn.Module):
    """Update local geometric representations using reference rotations."""
    def __init__(
        self,
        fiber_dim: int,
        extra_feat_dim: int = 0,
        eps: float = 1e-4,
        dropout: float = 0.0,
        hidden_dim: Optional[int] = None,
        zero_init: bool = False,
    ):
        """Initialize the LocalUpdateUsingReferenceRotations module."""
        super().__init__()
        self.dim = fiber_dim * 5 + extra_feat_dim
        self.fiber_dim = fiber_dim
        self.mlp = GELUMLP(
            self.dim,
            fiber_dim * 4,
            dropout=dropout,
            zero_init=zero_init,
            n_hidden_feats=hidden_dim,
        )
        self.eps = eps
    def forward(
        self,
        x: torch.Tensor,
        rotation_mats: torch.Tensor,
        extra_feats=None,
    ):
        """Forward pass of the LocalUpdateUsingReferenceRotations module."""
        # Vector norms are evaluated without applying rigid transform
        vecx_local = torch.matmul(
            x[:, 1:].transpose(-2, -1),
            rotation_mats,
        )
        x1_local = torch.cat(
            [
                x[:, 0],
                vecx_local.flatten(-2, -1),
                x[:, 1:].square().sum(dim=-2).add(self.eps).sqrt(),
            ],
            dim=-1,
        )
        if extra_feats is not None:
            x1_local = torch.cat([x1_local, extra_feats], dim=-1)
        x1_local = self.mlp(x1_local).view(-1, 4, self.fiber_dim)
        vecx1_out = torch.matmul(
            rotation_mats,
            x1_local[:, 1:],
        )
        x1_out = torch.cat([x1_local[:, :1], vecx1_out], dim=-2)
        return x1_out
 class LocalUpdateUsingChannelWiseGating(torch.nn.Module):
    """Update local geometric representations using channel-wise gating."""
    def __init__(
        self,
        fiber_dim: int,
        eps: float = 1e-4,
        dropout: float = 0.0,
        hidden_dim: Optional[int] = None,
        zero_init: bool = False,
    ):
        """Initialize the LocalUpdateUsingChannelWiseGating module."""
        super().__init__()
        self.dim = fiber_dim * 2
        self.fiber_dim = fiber_dim
        self.mlp = GELUMLP(
            self.dim,
            self.dim,
            dropout=dropout,
            n_hidden_feats=hidden_dim,
            zero_init=zero_init,
        )
        self.gate = torch.nn.Sigmoid()
        self.lin_out = torch.nn.Linear(fiber_dim, fiber_dim, bias=False)
        if zero_init:
            self.lin_out.weight.data.fill_(0.0)
        self.eps = eps
    def forward(
        self,
        x: torch.Tensor,
    ):
        """Forward pass of the LocalUpdateUsingChannelWiseGating module."""
        x1 = torch.cat(
            [
                x[:, 0],
                x[:, 1:].square().sum(dim=-2).add(self.eps).sqrt(),
            ],
            dim=-1,
        )
        x1 = self.mlp(x1)
        # Gated nonlinear operation on l=1 representations
        x1_scalar, x1_gatein = torch.split(x1, self.fiber_dim, dim=-1)
        x1_gate = self.gate(x1_gatein).unsqueeze(-2)
        vecx1_out = self.lin_out(x[:, 1:]).mul(x1_gate)
        x1_out = torch.cat([x1_scalar.unsqueeze(-2), vecx1_out], dim=-2)
        return x1_out
 class EquivariantTransformerBlock(torch.nn.Module):
    """Equivariant Transformer Block module."""
    def __init__(
        self,
        fiber_dim: int,
        heads: int = 8,
        point_dim: int = 4,
        eps: float = 1e-4,
        edge_dim: Optional[int] = None,
        target_frames: bool = False,
        edge_update: bool = False,
        dropout: float = 0.0,
    ):
        """Initialize the EquivariantTransformerBlock module."""
        super().__init__()
        self.attn_conv = PointSetAttention(
            fiber_dim,
            heads=heads,
            point_dim=point_dim,
            edge_dim=edge_dim,
            edge_update=edge_update,
        )
        self.fiber_dim = fiber_dim
        self.target_frames = target_frames
        self.eps = eps
        self.edge_update = edge_update
        if target_frames:
            self.local_update = LocalUpdateUsingReferenceRotations(
                fiber_dim, eps=eps, dropout=dropout, zero_init=True
            )
        else:
            self.local_update = LocalUpdateUsingChannelWiseGating(
                fiber_dim, eps=eps, dropout=dropout, zero_init=True
            )
    def forward(
        self,
        x: torch.Tensor,
        edge_index: torch.LongTensor,
        t: torch.Tensor,
        R: torch.Tensor = None,
        x_edge: torch.Tensor = None,
    ):
        """Forward pass of the EquivariantTransformerBlock module."""
        if self.edge_update:
            xout, edge_out = self.attn_conv(x, x, edge_index, t, t, x_edge=x_edge)
            x_edge = x_edge + edge_out
        else:
            xout = self.attn_conv(x, x, edge_index, t, t, x_edge=x_edge)
        x = x + xout
        if self.target_frames:
            x = self.local_update(x, R) + x
        else:
            x = self.local_update(x) + x
        return x, x_edge
 class EquivariantStructureDenoisingModule(torch.nn.Module):
    """Equivariant Structure Denoising Module."""
    def __init__(
        self,
        fiber_dim: int,
        input_dim: int,
        input_pair_dim: int,
        hidden_dim: int = 1024,
        n_stacks: int = 4,
        n_heads: int = 8,
        dropout: float = 0.0,
    ):
        """Initialize the EquivariantStructureDenoisingModule module."""
        super().__init__()
        self.input_dim = input_dim
        self.input_pair_dim = input_pair_dim
        self.fiber_dim = fiber_dim
        self.protatm_padding_dim = 37
        self.n_blocks = n_stacks
        self.input_node_projector = torch.nn.Linear(input_dim, fiber_dim, bias=False)
        self.input_node_vec_projector = torch.nn.Linear(input_dim, fiber_dim * 3, bias=False)
        self.input_pair_projector = torch.nn.Linear(input_pair_dim, fiber_dim, bias=False)
        # Inherit the residue representations
        self.atm_embed = GELUMLP(input_dim + 32, fiber_dim)
        self.ipa_modules = torch.nn.ModuleList(
            [
                EquivariantTransformerBlock(
                    fiber_dim,
                    heads=n_heads,
                    point_dim=fiber_dim // (n_heads * 2),
                    edge_dim=fiber_dim,
                    target_frames=True,
                    edge_update=True,
                    dropout=dropout,
                )
                for _ in range(n_stacks)
            ]
        )
        self.res_adapters = torch.nn.ModuleList(
            [
                LocalUpdateUsingReferenceRotations(
                    fiber_dim,
                    extra_feat_dim=input_dim,
                    hidden_dim=hidden_dim,
                    dropout=dropout,
                    zero_init=True,
                )
                for _ in range(n_stacks)
            ]
        )
        self.protatm_type_encoding = GELUMLP(self.protatm_padding_dim + input_dim, input_pair_dim)
        self.time_encoding = GaussianFourierEncoding1D(16)
        self.rel_geom_enc = RelativeGeometryEncoding(15, fiber_dim)
        self.rel_geom_embed = GELUMLP(fiber_dim, fiber_dim, n_hidden_feats=fiber_dim)
        # [displacement, scale]
        self.out_drift_res = torch.nn.ModuleList(
            [torch.nn.Linear(fiber_dim, 1, bias=False) for _ in range(n_stacks)]
        )
        # for i in range(n_stacks):
        #     self.out_drift_res[i].weight.data.fill_(0.0)
        self.out_scale_res = torch.nn.ModuleList(
            [GELUMLP(fiber_dim, 1, zero_init=True) for _ in range(n_stacks)]
        )
        self.out_drift_atm = torch.nn.ModuleList(
            [torch.nn.Linear(fiber_dim, 1, bias=False) for _ in range(n_stacks)]
        )
        # for i in range(n_stacks):
        #     self.out_drift_atm[i].weight.data.fill_(0.0)
        self.out_scale_atm = torch.nn.ModuleList(
            [GELUMLP(fiber_dim, 1, zero_init=True) for _ in range(n_stacks)]
        )
        # Pre-tabulated edges
        self.graph_relations = [
            (
                "residue_to_residue",
                "gather_idx_ab_a",
                "gather_idx_ab_b",
                "prot_res",
                "prot_res",
            ),
            (
                "sampled_residue_to_sampled_residue",
                "gather_idx_AB_a",
                "gather_idx_AB_b",
                "prot_res",
                "prot_res",
            ),
            (
                "prot_atm_to_prot_atm_graph",
                "protatm_protatm_idx_src",
                "protatm_protatm_idx_dst",
                "prot_atm",
                "prot_atm",
            ),
            (
                "prot_atm_to_prot_atm_knn",
                "knn_idx_protatm_protatm_src",
                "knn_idx_protatm_protatm_dst",
                "prot_atm",
                "prot_atm",
            ),
            (
                "prot_atm_to_residue",
                "protatm_res_idx_protatm",
                "protatm_res_idx_res",
                "prot_atm",
                "prot_res",
            ),
            (
                "residue_to_prot_atm",
                "protatm_res_idx_res",
                "protatm_res_idx_protatm",
                "prot_res",
                "prot_atm",
            ),
            (
                "sampled_lig_triplet_to_lig_atm",
                "gather_idx_UI_I",
                "gather_idx_UI_u",
                "lig_trp",
                "lig_atm",
            ),
            (
                "lig_atm_to_sampled_lig_triplet",
                "gather_idx_UI_u",
                "gather_idx_UI_I",
                "lig_atm",
                "lig_trp",
            ),
            (
                "lig_atm_to_lig_atm_graph",
                "gather_idx_uv_u",
                "gather_idx_uv_v",
                "lig_atm",
                "lig_atm",
            ),
            (
                "sampled_residue_to_sampled_lig_triplet",
                "gather_idx_AJ_a",
                "gather_idx_AJ_J",
                "prot_res",
                "lig_trp",
            ),
            (
                "sampled_lig_triplet_to_sampled_residue",
                "gather_idx_AJ_J",
                "gather_idx_AJ_a",
                "lig_trp",
                "prot_res",
            ),
            (
                "residue_to_sampled_lig_triplet",
                "gather_idx_aJ_a",
                "gather_idx_aJ_J",
                "prot_res",
                "lig_trp",
            ),
            (
                "sampled_lig_triplet_to_residue",
                "gather_idx_aJ_J",
                "gather_idx_aJ_a",
                "lig_trp",
                "prot_res",
            ),
            (
                "sampled_lig_triplet_to_sampled_lig_triplet",
                "gather_idx_IJ_I",
                "gather_idx_IJ_J",
                "lig_trp",
                "lig_trp",
            ),
            (
                "prot_atm_to_lig_atm_knn",
                "knn_idx_protatm_ligatm_src",
                "knn_idx_protatm_ligatm_dst",
                "prot_atm",
                "lig_atm",
            ),
            (
                "lig_atm_to_prot_atm_knn",
                "knn_idx_ligatm_protatm_src",
                "knn_idx_ligatm_protatm_dst",
                "lig_atm",
                "prot_atm",
            ),
            (
                "lig_atm_to_lig_atm_knn",
                "knn_idx_ligatm_ligatm_src",
                "knn_idx_ligatm_ligatm_dst",
                "lig_atm",
                "lig_atm",
            ),
        ]
        self.graph_relations_no_ligand = self.graph_relations[:6]
    def init_scalar_vec_rep(self, x, x_v=None, frame=None):
        """Initialize scalar and vector representations."""
        if frame is None:
            # Zero-initialize the vector channels
            vec_shape = (*x.shape[:-1], 3, x.shape[-1])
            res = torch.cat([x.unsqueeze(-2), torch.zeros(vec_shape, device=x.device)], dim=-2)
        else:
            x_v = x_v.view(*x.shape[:-1], 3, x.shape[-1])
            x_v_glob = torch.matmul(frame.R, x_v)
            res = torch.cat([x.unsqueeze(-2), x_v_glob], dim=-2)
        return res
    def forward(
        self,
        batch,
        frozen_lig=False,
        frozen_prot=False,
        **kwargs,
    ):
        """Forward pass of the EquivariantStructureDenoisingModule module."""
        features = batch["features"]
        indexer = batch["indexer"]
        metadata = batch["metadata"]
        metadata["num_structid"]
        max(metadata["num_a_per_sample"])
        prot_res_rep_in = features["rec_res_attr_decin"]
        timestep_prot = features["timestep_encoding_prot"]
        device = features["res_type"].device
        # Protein all-atom representation initialization
        protatm_padding_mask = features["res_atom_mask"]
        protatm_atom37_onehot = torch.nn.functional.one_hot(
            features["protatm_to_atom37_index"], num_classes=self.protatm_padding_dim
        )
        protatm_res_pair_encoding = self.protatm_type_encoding(
            torch.cat(
                [
                    prot_res_rep_in[indexer["protatm_res_idx_res"]],
                    protatm_atom37_onehot,
                ],
                dim=-1,
            )
        )
        # Gathered AA features from individual graphs
        prot_atm_rep_in = features["prot_atom_attr_projected"]
        prot_atm_rep_int = self.atm_embed(
            torch.cat(
                [
                    prot_atm_rep_in,
                    self.time_encoding(timestep_prot)[indexer["protatm_res_idx_res"]],
                ],
                dim=-1,
            )
        )
        prot_atm_coords_padded = features["input_protein_coords"]
        prot_atm_coords_flat = prot_atm_coords_padded[protatm_padding_mask]
        # Embed the rigid body node representations
        backbone_frames = get_frame_matrix(
            prot_atm_coords_padded[:, 0],
            prot_atm_coords_padded[:, 1],
            prot_atm_coords_padded[:, 2],
        )
        prot_res_rep = self.init_scalar_vec_rep(
            self.input_node_projector(prot_res_rep_in),
            x_v=self.input_node_vec_projector(prot_res_rep_in),
            frame=backbone_frames,
        )
        prot_atm_rep = self.init_scalar_vec_rep(prot_atm_rep_int)
        # gather AA features from individual graphs
        node_reps = {
            "prot_res": prot_res_rep,
            "prot_atm": prot_atm_rep,
        }
        # Embed pair representations
        edge_reps = {
            "residue_to_residue": features["res_res_pair_attr_decin"],
            "prot_atm_to_prot_atm_graph": features["prot_atom_pair_attr_projected"],
            "prot_atm_to_prot_atm_knn": features["knn_feat_protatm_protatm"],
            "prot_atm_to_residue": protatm_res_pair_encoding,
            "residue_to_prot_atm": protatm_res_pair_encoding,
            "sampled_residue_to_sampled_residue": features["res_res_grid_attr_flat_decin"],
        }
        if not batch["misc"]["protein_only"]:
            max(metadata["num_i_per_sample"])
            timestep_lig = features["timestep_encoding_lig"]
            lig_atm_rep_in = features["lig_atom_attr_projected"]
            lig_frame_rep_in = features["lig_trp_attr_decin"]
            # Ligand atom embedding. Two timescales
            lig_atm_rep_int = self.atm_embed(
                torch.cat(
                    [lig_atm_rep_in, self.time_encoding(timestep_lig)],
                    dim=-1,
                )
            )
            lig_atm_rep = self.init_scalar_vec_rep(lig_atm_rep_int)
            # Prepare ligand atom - sidechain atom indexers
            # Initialize coordinate features
            lig_atm_coords = features["input_ligand_coords"].clone()
            lig_frame_atm_idx = (
                indexer["gather_idx_ijk_i"][indexer["gather_idx_I_ijk"]],
                indexer["gather_idx_ijk_j"][indexer["gather_idx_I_ijk"]],
                indexer["gather_idx_ijk_k"][indexer["gather_idx_I_ijk"]],
            )
            ligand_trp_frames = get_frame_matrix(
                lig_atm_coords[lig_frame_atm_idx[0]],
                lig_atm_coords[lig_frame_atm_idx[1]],
                lig_atm_coords[lig_frame_atm_idx[2]],
            )
            lig_frame_rep = self.init_scalar_vec_rep(
                self.input_node_projector(lig_frame_rep_in),
                x_v=self.input_node_vec_projector(lig_frame_rep_in),
                frame=ligand_trp_frames,
            )
            node_reps.update(
                {
                    "lig_atm": lig_atm_rep,
                    "lig_trp": lig_frame_rep,
                }
            )
            edge_reps.update(
                {
                    "lig_atm_to_lig_atm_graph": features["lig_atom_pair_attr_projected"],
                    "sampled_lig_triplet_to_lig_atm": features["lig_af_pair_attr_projected"],
                    "lig_atm_to_sampled_lig_triplet": features["lig_af_pair_attr_projected"],
                    "sampled_residue_to_sampled_lig_triplet": features[
                        "res_trp_grid_attr_flat_decin"
                    ],
                    "sampled_lig_triplet_to_sampled_residue": features[
                        "res_trp_grid_attr_flat_decin"
                    ],
                    "residue_to_sampled_lig_triplet": features["res_trp_pair_attr_flat_decin"],
                    "sampled_lig_triplet_to_residue": features["res_trp_pair_attr_flat_decin"],
                    "sampled_lig_triplet_to_sampled_lig_triplet": features[
                        "trp_trp_grid_attr_flat_decin"
                    ],
                    "prot_atm_to_lig_atm_knn": features["knn_feat_protatm_ligatm"],
                    "lig_atm_to_prot_atm_knn": features["knn_feat_ligatm_protatm"],
                    "lig_atm_to_lig_atm_knn": features["knn_feat_ligatm_ligatm"],
                }
            )
        # Message passing
        protatm_res_idx_res = indexer["protatm_res_idx_res"]
        if batch["misc"]["protein_only"]:
            graph_relations = self.graph_relations_no_ligand
        else:
            graph_relations = self.graph_relations
        graph_batcher = make_multi_relation_graph_batcher(graph_relations, indexer, metadata)
        merged_edge_idx = graph_batcher.collate_idx_list(indexer)
        merged_node_reps = graph_batcher.collate_node_attr(node_reps)
        merged_edge_reps = graph_batcher.collate_edge_attr(
            graph_batcher.zero_pad_edge_attr(edge_reps, self.input_pair_dim, device)
        )
        merged_edge_reps = self.input_pair_projector(merged_edge_reps)
        assert merged_edge_idx[0].shape[0] == merged_edge_reps.shape[0]
        assert merged_edge_idx[1].shape[0] == merged_edge_reps.shape[0]
        dummy_prot_atm_frames = RigidTransform(prot_atm_coords_flat, R=None)
        if not batch["misc"]["protein_only"]:
            dummy_lig_atm_frames = RigidTransform(lig_atm_coords, R=None)
            merged_node_t = graph_batcher.collate_node_attr(
                {
                    "prot_res": backbone_frames.t,
                    "prot_atm": dummy_prot_atm_frames.t,
                    "lig_atm": dummy_lig_atm_frames.t,
                    "lig_trp": ligand_trp_frames.t,
                }
            )
            merged_node_R = graph_batcher.collate_node_attr(
                {
                    "prot_res": backbone_frames.R,
                    "prot_atm": dummy_prot_atm_frames.R,
                    "lig_atm": dummy_lig_atm_frames.R,
                    "lig_trp": ligand_trp_frames.R,
                }
            )
        else:
            merged_node_t = graph_batcher.collate_node_attr(
                {
                    "prot_res": backbone_frames.t,
                    "prot_atm": dummy_prot_atm_frames.t,
                }
            )
            merged_node_R = graph_batcher.collate_node_attr(
                {
                    "prot_res": backbone_frames.R,
                    "prot_atm": dummy_prot_atm_frames.R,
                }
            )
        merged_node_frames = RigidTransform(merged_node_t, merged_node_R)
        merged_edge_reps = merged_edge_reps + (
            self.rel_geom_embed(
                self.rel_geom_enc(merged_node_frames, merged_edge_idx) + merged_edge_reps
            )
        )
        # No need to reassign embeddings but need to update point coordinates & frames
        for block_id in range(self.n_blocks):
            dummy_prot_atm_frames = RigidTransform(prot_atm_coords_flat, R=None)
            if not batch["misc"]["protein_only"]:
                dummy_lig_atm_frames = RigidTransform(lig_atm_coords, R=None)
                merged_node_t = graph_batcher.collate_node_attr(
                    {
                        "prot_res": backbone_frames.t,
                        "prot_atm": dummy_prot_atm_frames.t,
                        "lig_atm": dummy_lig_atm_frames.t,
                        "lig_trp": ligand_trp_frames.t,
                    }
                )
                merged_node_R = graph_batcher.collate_node_attr(
                    {
                        "prot_res": backbone_frames.R,
                        "prot_atm": dummy_prot_atm_frames.R,
                        "lig_atm": dummy_lig_atm_frames.R,
                        "lig_trp": ligand_trp_frames.R,
                    }
                )
            else:
                merged_node_t = graph_batcher.collate_node_attr(
                    {
                        "prot_res": backbone_frames.t,
                        "prot_atm": dummy_prot_atm_frames.t,
                    }
                )
                merged_node_R = graph_batcher.collate_node_attr(
                    {
                        "prot_res": backbone_frames.R,
                        "prot_atm": dummy_prot_atm_frames.R,
                    }
                )
            # PredictDrift iteration
            merged_node_reps, merged_edge_reps = self.ipa_modules[block_id](
                merged_node_reps,
                merged_edge_idx,
                t=merged_node_t,
                R=merged_node_R,
                x_edge=merged_edge_reps,
            )
            offloaded_node_reps = graph_batcher.offload_node_attr(merged_node_reps)
            if "lig_trp" in offloaded_node_reps.keys():
                lig_frame_rep = offloaded_node_reps["lig_trp"]
                offloaded_node_reps["lig_trp"] = lig_frame_rep + self.res_adapters[block_id](
                    lig_frame_rep, ligand_trp_frames.R, extra_feats=lig_frame_rep_in
                )
            prot_res_rep = offloaded_node_reps["prot_res"]
            offloaded_node_reps["prot_res"] = prot_res_rep + self.res_adapters[block_id](
                prot_res_rep, backbone_frames.R, extra_feats=prot_res_rep_in
            )
            merged_node_reps = graph_batcher.collate_node_attr(offloaded_node_reps)
            # Displacement vectors in the global coordinate system
            if not batch["misc"]["protein_only"]:
                drift_trp = (
                    self.out_drift_res[block_id](offloaded_node_reps["lig_trp"][:, 1:]).squeeze(-1)
                    * torch.sigmoid(
                        self.out_scale_res[block_id](offloaded_node_reps["lig_trp"][:, 0])
                    )
                    * 10
                )
                drift_trp_gathered = segment_mean(
                    drift_trp,
                    indexer["gather_idx_I_molid"],
                    metadata["num_molid"],
                )[indexer["gather_idx_i_molid"]]
                drift_atm = self.out_drift_atm[block_id](
                    offloaded_node_reps["lig_atm"][:, 1:]
                ).squeeze(-1) * torch.sigmoid(
                    self.out_scale_atm[block_id](offloaded_node_reps["lig_atm"][:, 0])
                )
                if not frozen_lig:
                    lig_atm_coords = lig_atm_coords + drift_atm + drift_trp_gathered
                ligand_trp_frames = get_frame_matrix(
                    lig_atm_coords[lig_frame_atm_idx[0]],
                    lig_atm_coords[lig_frame_atm_idx[1]],
                    lig_atm_coords[lig_frame_atm_idx[2]],
                )
            drift_bb = (
                self.out_drift_res[block_id](offloaded_node_reps["prot_res"][:, 1:]).squeeze(-1)
                * torch.sigmoid(
                    self.out_scale_res[block_id](offloaded_node_reps["prot_res"][:, 0])
                )
                * 10
            )
            drift_bb_gathered = drift_bb[protatm_res_idx_res]
            drift_prot_atm_int = self.out_drift_atm[block_id](
                offloaded_node_reps["prot_atm"][:, 1:]
            ).squeeze(-1) * torch.sigmoid(
                self.out_scale_atm[block_id](offloaded_node_reps["prot_atm"][:, 0])
            )
            if not frozen_prot:
                prot_atm_coords_flat = (
                    prot_atm_coords_flat + drift_prot_atm_int + drift_bb_gathered
                )
            prot_atm_coords_padded = torch.zeros_like(features["input_protein_coords"])
            prot_atm_coords_padded[protatm_padding_mask] = prot_atm_coords_flat
            backbone_frames = get_frame_matrix(
                prot_atm_coords_padded[:, 0],
                prot_atm_coords_padded[:, 1],
                prot_atm_coords_padded[:, 2],
            )
        ret = {
            "final_embedding_prot_atom": offloaded_node_reps["prot_atm"],
            "final_embedding_prot_res": offloaded_node_reps["prot_res"],
            "final_coords_prot_atom": prot_atm_coords_flat,
            "final_coords_prot_atom_padded": prot_atm_coords_padded,
        }
        if not batch["misc"]["protein_only"]:
            ret["final_embedding_lig_atom"] = offloaded_node_reps["lig_atm"]
            ret["final_coords_lig_atom"] = lig_atm_coords
        else:
            ret["final_embedding_lig_atom"] = None
            ret["final_coords_lig_atom"] = None
        return ret
 def resolve_score_head(
    protein_model_cfg: DictConfig,
    score_cfg: DictConfig,
    task_cfg: DictConfig,
    state_dict: Optional[STATE_DICT] = None,
 ) -> torch.nn.Module:
    """Instantiates an EquivariantStructureDenoisingModule model for protein-ligand complex
    structure denoising.
    :param protein_model_cfg: Protein model configuration.
    :param score_cfg: Score configuration.
    :param task_cfg: Task configuration.
    :param state_dict: Optional (potentially-pretrained) state dictionary.
    :return: EquivariantStructureDenoisingModule model.
    """
    model = EquivariantStructureDenoisingModule(
        score_cfg.fiber_dim,
        input_dim=protein_model_cfg.residue_dim,
        input_pair_dim=protein_model_cfg.pair_dim,
        hidden_dim=score_cfg.hidden_dim,
        n_stacks=score_cfg.n_stacks,
        n_heads=protein_model_cfg.n_heads,
        dropout=task_cfg.dropout,
    )
    if score_cfg.from_pretrained and state_dict is not None:
        try:
            model.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("score_head")
                }
            )
            log.info("Successfully loaded pretrained score weights.")
        except Exception as e:
            log.warning(f"Skipping loading of pretrained score weights due to: {e}.")
    return model
 def resolve_confidence_head(
    protein_model_cfg: DictConfig,
    confidence_cfg: DictConfig,
    task_cfg: DictConfig,
    state_dict: Optional[STATE_DICT] = None,
 ) -> Tuple[torch.nn.Module, torch.nn.Module]:
    """Instantiates an EquivariantStructureDenoisingModule model for confidence prediction.
    :param protein_model_cfg: Protein model configuration.
    :param confidence_cfg: Confidence configuration.
    :param task_cfg: Task configuration.
    :param state_dict: Optional (potentially-pretrained) state dictionary.
    :return: EquivariantStructureDenoisingModule model and plDDT gram head weights.
    """
    confidence_head = EquivariantStructureDenoisingModule(
        confidence_cfg.fiber_dim,
        input_dim=protein_model_cfg.residue_dim,
        input_pair_dim=protein_model_cfg.pair_dim,
        hidden_dim=confidence_cfg.hidden_dim,
        n_stacks=confidence_cfg.n_stacks,
        n_heads=protein_model_cfg.n_heads,
        dropout=task_cfg.dropout,
    )
    plddt_gram_head = GELUMLP(
        protein_model_cfg.pair_dim,
        8,
        n_hidden_feats=protein_model_cfg.pair_dim,
        zero_init=True,
    )
    if confidence_cfg.from_pretrained and state_dict is not None:
        try:
            confidence_head.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("confidence_head")
                }
            )
            log.info("Successfully loaded pretrained confidence weights.")
        except Exception as e:
            log.warning(f"Skipping loading of pretrained confidence weights due to: {e}.")
        try:
            plddt_gram_head.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("plddt_gram_head")
                }
            )
            log.info("Successfully loaded pretrained pLDDT gram head weights.")
        except Exception as e:
            log.warning(f"Skipping loading of pretrained pLDDT gram head weights due to: {e}.")
    return confidence_head, plddt_gram_head
 def resolve_affinity_head(
    ligand_model_cfg: DictConfig,
    affinity_cfg: DictConfig,
    task_cfg: DictConfig,
    learnable_pooling: bool = True,
    state_dict: Optional[STATE_DICT] = None,
 ) -> Tuple[torch.nn.Module, torch.nn.Module, torch.nn.Module]:
    """Instantiates an EquivariantStructureDenoisingModule model for affinity prediction.
    :param ligand_model_cfg: Ligand model configuration.
    :param affinity_cfg: Affinity configuration.
    :param task_cfg: Task configuration.
    :param learnable_pooling: Whether to use learnable ligand pooling modules.
    :param state_dict: Optional (potentially-pretrained) state dictionary.
    :return: EquivariantStructureDenoisingModule model as well as a ligand pooling module and
        projection head.
    """
    affinity_head = EquivariantStructureDenoisingModule(
        affinity_cfg.fiber_dim,
        input_dim=ligand_model_cfg.node_channels,
        input_pair_dim=ligand_model_cfg.pair_channels,
        hidden_dim=affinity_cfg.hidden_dim,
        n_stacks=affinity_cfg.n_stacks,
        n_heads=ligand_model_cfg.n_heads,
        dropout=affinity_cfg.dropout if affinity_cfg.get("dropout") else task_cfg.dropout,
    )
    if affinity_cfg.ligand_pooling in ["sum", "add", "summation", "addition"]:
        ligand_pooling = SumPooling(learnable=learnable_pooling, hidden_dim=affinity_cfg.fiber_dim)
    elif affinity_cfg.ligand_pooling in ["mean", "avg", "average"]:
        ligand_pooling = AveragePooling(
            learnable=learnable_pooling, hidden_dim=affinity_cfg.fiber_dim
        )
    else:
        raise NotImplementedError(
            f"Unsupported ligand pooling method: {affinity_cfg.ligand_pooling}"
        )
    affinity_proj_head = GELUMLP(
        affinity_cfg.fiber_dim,
        1,
        n_hidden_feats=affinity_cfg.fiber_dim,
        zero_init=True,
    )
    if affinity_cfg.from_pretrained and state_dict is not None:
        try:
            affinity_head.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("affinity_head")
                }
            )
            log.info("Successfully loaded pretrained affinity head weights.")
        except Exception as e:
            log.warning(f"Skipping loading of pretrained affinity head weights due to: {e}.")
        if learnable_pooling:
            try:
                ligand_pooling.load_state_dict(
                    {
                        ".".join(k.split(".")[1:]): v
                        for k, v in state_dict.items()
                        if k.startswith("ligand_pooling")
                    }
                )
                log.info("Successfully loaded pretrained ligand pooling weights.")
            except Exception as e:
                log.warning(f"Skipping loading of pretrained ligand pooling weights due to: {e}.")
        try:
            affinity_proj_head.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("affinity_proj_head")
                }
            )
            log.info("Successfully loaded pretrained affinity projection head weights.")
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained affinity projection head weights due to: {e}."
            )
    return affinity_head, ligand_pooling, affinity_proj_head
--- a/flowdock/models/components/flowdock.py
+++ b/flowdock/models/components/flowdock.py
--- a/flowdock/models/components/hetero_graph.py
+++ b/flowdock/models/components/hetero_graph.py
@@ -0,0 +1,166 @@
 # Adapted from: https://github.com/zrqiao/NeuralPLexer
 from dataclasses import dataclass
 import torch
 from beartype.typing import Dict, List, Tuple
@dataclass(frozen=True)
 class Relation:
    edge_type: str
    edge_rev_name: str
    edge_frd_name: str
    src_node_type: str
    dst_node_type: str
    num_edges: int
 class MultiRelationGraphBatcher:
    """Collate sub-graphs of different node/edge types into a single instance.
    Returned multi-relation edge indices are stored in LongTensor of shape [2, N_edges].
    """
    def __init__(
        self,
        relation_forms: List[Relation],
        graph_metadata: Dict[str, int],
    ):
        """Initialize the batcher."""
        self._relation_forms = relation_forms
        self._make_offset_dict(graph_metadata)
    def _make_offset_dict(self, graph_metadata):
        """Create offset dictionaries for node and edge types."""
        self._node_chunk_sizes = {}
        self._edge_chunk_sizes = {}
        self._offsets_lower = {}
        self._offsets_upper = {}
        all_node_types = set()
        for relation in self._relation_forms:
            assert (
                f"num_{relation.src_node_type}" in graph_metadata.keys()
            ), f"Missing metadata: num_{relation.src_node_type}"
            assert (
                f"num_{relation.dst_node_type}" in graph_metadata.keys()
            ), f"Missing metadata: num_{relation.src_node_type}"
            all_node_types.add(relation.src_node_type)
            all_node_types.add(relation.dst_node_type)
        offset = 0
        # Fix node type ordering
        self.all_node_types = list(all_node_types)
        for node_type in self.all_node_types:
            self._offsets_lower[node_type] = offset
            self._node_chunk_sizes[node_type] = graph_metadata[f"num_{node_type}"]
            new_offset = offset + self._node_chunk_sizes[node_type]
            self._offsets_upper[node_type] = new_offset
            offset = new_offset
    def collate_single_relation_graphs(self, indexer, node_attr_dict, edge_attr_dict):
        """Collate sub-graphs of different node/edge types into a single instance."""
        return {
            "node_attr": self.collate_node_attr(node_attr_dict),
            "edge_attr": self.collate_edge_attr(edge_attr_dict),
            "edge_index": self.collate_idx_list(indexer),
        }
    def collate_idx_list(
        self,
        indexer: Dict[str, torch.Tensor],
    ) -> torch.Tensor:
        """Collate edge indices for all relations."""
        ret_eidxs_rev, ret_eidxs_frd = [], []
        for relation in self._relation_forms:
            assert relation.edge_rev_name in indexer.keys()
            assert relation.edge_frd_name in indexer.keys()
            assert indexer[relation.edge_rev_name].dim() == 1
            assert indexer[relation.edge_frd_name].dim() == 1
            assert torch.all(
                indexer[relation.edge_rev_name] < self._node_chunk_sizes[relation.src_node_type]
            ), f"Node index on edge exceeding boundary: {relation.edge_type}, {self._node_chunk_sizes[relation.src_node_type]}, {self._node_chunk_sizes[relation.dst_node_type]}, {max(indexer[relation.edge_rev_name])}, {max(indexer[relation.edge_frd_name])}"
            assert torch.all(
                indexer[relation.edge_frd_name] < self._node_chunk_sizes[relation.dst_node_type]
            ), f"Node index on edge exceeding boundary: {relation.edge_type}, {self._node_chunk_sizes[relation.src_node_type]}, {self._node_chunk_sizes[relation.dst_node_type]}, {max(indexer[relation.edge_rev_name])}, {max(indexer[relation.edge_frd_name])}"
            ret_eidxs_rev.append(
                indexer[relation.edge_rev_name] + self._offsets_lower[relation.src_node_type]
            )
            ret_eidxs_frd.append(
                indexer[relation.edge_frd_name] + self._offsets_lower[relation.dst_node_type]
            )
        ret_eidxs_rev = torch.cat(ret_eidxs_rev, dim=0)
        ret_eidxs_frd = torch.cat(ret_eidxs_frd, dim=0)
        return torch.stack([ret_eidxs_rev, ret_eidxs_frd], dim=0)
    def collate_node_attr(self, node_attr_dict: Dict[str, torch.Tensor]):
        """Collate node attributes for all node types."""
        for node_type in self.all_node_types:
            assert (
                node_attr_dict[node_type].shape[0] == self._node_chunk_sizes[node_type]
            ), f"Node count mismatch: {node_type}, {node_attr_dict[node_type].shape[0]}, {self._node_chunk_sizes[node_type]}"
        return torch.cat([node_attr_dict[node_type] for node_type in self.all_node_types], dim=0)
    def collate_edge_attr(self, edge_attr_dict: Dict[str, torch.Tensor]):
        """Collate edge attributes for all relations."""
        # for relation in self._relation_forms:
        #     print(relation.edge_type, edge_attr_dict[relation.edge_type].shape)
        return torch.cat(
            [edge_attr_dict[relation.edge_type] for relation in self._relation_forms],
            dim=0,
        )
    def zero_pad_edge_attr(
        self,
        edge_attr_dict: Dict[str, torch.Tensor],
        embedding_dim: int,
        device: torch.device,
    ):
        """Zero pad edge attributes for all relations."""
        for relation in self._relation_forms:
            if edge_attr_dict[relation.edge_type] is None:
                edge_attr_dict[relation.edge_type] = torch.zeros(
                    (relation.num_edges, embedding_dim),
                    device=device,
                )
        return edge_attr_dict
    def offload_node_attr(self, cat_node_attr: torch.Tensor):
        """Offload node attributes for all node types."""
        node_chunk_sizes = [self._node_chunk_sizes[node_type] for node_type in self.all_node_types]
        node_attr_split = torch.split(cat_node_attr, node_chunk_sizes)
        return {
            self.all_node_types[i]: node_attr_split[i] for i in range(len(self.all_node_types))
        }
    def offload_edge_attr(self, cat_edge_attr: torch.Tensor):
        """Offload edge attributes for all relations."""
        edge_chunk_sizes = [relation.num_edges for relation in self._relation_forms]
        edge_attr_split = torch.split(cat_edge_attr, edge_chunk_sizes)
        return {
            self._relation_forms[i].edge_type: edge_attr_split[i]
            for i in range(len(self._relation_forms))
        }
 def make_multi_relation_graph_batcher(
    list_of_relations: List[Tuple[str, str, str, str, str]],
    indexer,
    metadata,
 ):
    """Make a multi-relation graph batcher."""
    # Use one instantiation of the indexer to compute chunk sizes
    relation_forms = [
        Relation(
            edge_type=rl_tuple[0],
            edge_rev_name=rl_tuple[1],
            edge_frd_name=rl_tuple[2],
            src_node_type=rl_tuple[3],
            dst_node_type=rl_tuple[4],
            num_edges=indexer[rl_tuple[1]].shape[0],
        )
        for rl_tuple in list_of_relations
    ]
    return MultiRelationGraphBatcher(
        relation_forms,
        metadata,
    )
--- a/flowdock/models/components/losses.py
+++ b/flowdock/models/components/losses.py
--- a/flowdock/models/components/mht_encoder.py
+++ b/flowdock/models/components/mht_encoder.py
@@ -0,0 +1,364 @@
 # Adapted from: https://github.com/zrqiao/NeuralPLexer
 import rootutils
 import torch
 from beartype.typing import Any, Dict, Optional, Tuple
 from omegaconf import DictConfig
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.models.components.hetero_graph import make_multi_relation_graph_batcher
 from flowdock.models.components.modules import TransformerLayer
 from flowdock.utils import RankedLogger
 from flowdock.utils.model_utils import GELUMLP, segment_softmax, segment_sum
 MODEL_BATCH = Dict[str, Any]
 STATE_DICT = Dict[str, Any]
 log = RankedLogger(__name__, rank_zero_only=True)
 class PathConvStack(torch.nn.Module):
    """Path integral convolution stack for ligand encoding."""
    def __init__(
        self,
        pair_channels: int,
        n_heads: int = 8,
        max_pi_length: int = 8,
        dropout: float = 0.0,
    ):
        """Initialize PathConvStack model."""
        super().__init__()
        self.pair_channels = pair_channels
        self.max_pi_length = max_pi_length
        self.n_heads = n_heads
        self.prop_value_layer = torch.nn.Linear(pair_channels, n_heads, bias=False)
        self.triangle_pair_kernel_layer = torch.nn.Linear(pair_channels, n_heads, bias=False)
        self.prop_update_mlp = GELUMLP(
            n_heads * (max_pi_length + 1), pair_channels, dropout=dropout
        )
    def forward(
        self,
        prop_attr: torch.Tensor,
        stereo_attr: torch.Tensor,
        indexer: Dict[str, torch.LongTensor],
        metadata: Dict[str, Any],
    ) -> torch.Tensor:
        """Forward pass for PathConvStack model.
        :param prop_attr: Atom-frame pair attributes.
        :param stereo_attr: Stereochemistry attributes.
        :param indexer: A dictionary of indices.
        :param metadata: A dictionary of metadata.
        :return: Updated atom-frame pair attributes.
        """
        triangle_pair_kernel = self.triangle_pair_kernel_layer(stereo_attr)
        # Segment-wise softmax, normalized by outgoing triangles
        triangle_pair_alpha = segment_softmax(
            triangle_pair_kernel, indexer["gather_idx_ijkl_jkl"], metadata["num_ijk"]
        )  # .div(self.max_pi_length)
        # Uijk,ijkl->ujkl pair representation update
        kernel = triangle_pair_alpha[indexer["gather_idx_Uijkl_ijkl"]]
        out_prop_attr = [self.prop_value_layer(prop_attr)]
        for _ in range(self.max_pi_length):
            gathered_prop_attr = out_prop_attr[-1][indexer["gather_idx_Uijkl_Uijk"]]
            out_prop_attr.append(
                segment_sum(
                    kernel.mul(gathered_prop_attr),
                    indexer["gather_idx_Uijkl_ujkl"],
                    metadata["num_Uijk"],
                )
            )
        new_prop_attr = torch.cat(out_prop_attr, dim=-1)
        new_prop_attr = self.prop_update_mlp(new_prop_attr) + prop_attr
        return new_prop_attr
 class PIFormer(torch.nn.Module):
    """PIFormer model for ligand encoding."""
    def __init__(
        self,
        node_channels: int,
        pair_channels: int,
        n_atom_encodings: int,
        n_bond_encodings: int,
        n_atom_pos_encodings: int,
        n_stereo_encodings: int,
        heads: int,
        head_dim: int,
        max_path_length: int = 4,
        n_transformer_stacks=4,
        hidden_dim: Optional[int] = None,
        dropout: float = 0.0,
    ):
        """Initialize PIFormer model."""
        super().__init__()
        self.node_channels = node_channels
        self.pair_channels = pair_channels
        self.max_pi_length = max_path_length
        self.n_transformer_stacks = n_transformer_stacks
        self.n_atom_encodings = n_atom_encodings
        self.n_bond_encodings = n_bond_encodings
        self.n_atom_pair_encodings = n_bond_encodings + 4
        self.n_atom_pos_encodings = n_atom_pos_encodings
        self.input_atom_layer = torch.nn.Linear(n_atom_encodings, node_channels)
        self.input_pair_layer = torch.nn.Linear(self.n_atom_pair_encodings, pair_channels)
        self.input_stereo_layer = torch.nn.Linear(n_stereo_encodings, pair_channels)
        self.input_prop_layer = GELUMLP(
            self.n_atom_pair_encodings * 3,
            pair_channels,
        )
        self.path_integral_stacks = torch.nn.ModuleList(
            [
                PathConvStack(
                    pair_channels,
                    max_pi_length=max_path_length,
                    dropout=dropout,
                )
                for _ in range(n_transformer_stacks)
            ]
        )
        self.graph_transformer_stacks = torch.nn.ModuleList(
            [
                TransformerLayer(
                    node_channels,
                    heads,
                    head_dim=head_dim,
                    edge_channels=pair_channels,
                    hidden_dim=hidden_dim,
                    dropout=dropout,
                    edge_update=True,
                )
                for _ in range(n_transformer_stacks)
            ]
        )
    def forward(self, batch: MODEL_BATCH, masking_rate: float = 0.0) -> MODEL_BATCH:
        """Forward pass for PIFormer model.
        :param batch: A batch dictionary.
        :param masking_rate: Masking rate.
        :return: A batch dictionary.
        """
        features = batch["features"]
        indexer = batch["indexer"]
        metadata = batch["metadata"]
        features["atom_encodings"] = features["atom_encodings"]
        atom_attr = features["atom_encodings"]
        atom_pair_attr = features["atom_pair_encodings"]
        af_pair_attr = features["atom_frame_pair_encodings"]
        stereo_enc = features["stereo_chemistry_encodings"]
        batch["features"]["lig_atom_token"] = atom_attr.detach().clone()
        batch["features"]["lig_pair_token"] = atom_pair_attr.detach().clone()
        atom_mask = torch.rand(atom_attr.shape[0], device=atom_attr.device) > masking_rate
        stereo_mask = torch.rand(stereo_enc.shape[0], device=stereo_enc.device) > masking_rate
        atom_pair_mask = (
            torch.rand(atom_pair_attr.shape[0], device=atom_pair_attr.device) > masking_rate
        )
        af_pair_mask = (
            torch.rand(af_pair_attr.shape[0], device=atom_pair_attr.device) > masking_rate
        )
        atom_attr = atom_attr * atom_mask[:, None]
        stereo_enc = stereo_enc * stereo_mask[:, None]
        atom_pair_attr = atom_pair_attr * atom_pair_mask[:, None]
        af_pair_attr = af_pair_attr * af_pair_mask[:, None]
        # Embedding blocks
        metadata["num_atom"] = metadata["num_u"]
        metadata["num_frame"] = metadata["num_ijk"]
        atom_attr = self.input_atom_layer(atom_attr)
        atom_pair_attr = self.input_pair_layer(atom_pair_attr)
        triangle_attr = atom_attr.new_zeros(metadata["num_frame"], self.node_channels)
        # Initialize atom-frame pair attributes. Reusing uv indices
        prop_attr = self.input_prop_layer(af_pair_attr)
        stereo_attr = self.input_stereo_layer(stereo_enc)
        graph_relations = [
            ("atom_to_atom", "gather_idx_uv_u", "gather_idx_uv_v", "atom", "atom"),
            (
                "atom_to_frame",
                "gather_idx_Uijk_u",
                "gather_idx_Uijk_ijk",
                "atom",
                "frame",
            ),
            (
                "frame_to_atom",
                "gather_idx_Uijk_ijk",
                "gather_idx_Uijk_u",
                "frame",
                "atom",
            ),
            (
                "frame_to_frame",
                "gather_idx_ijkl_ijk",
                "gather_idx_ijkl_jkl",
                "frame",
                "frame",
            ),
        ]
        graph_batcher = make_multi_relation_graph_batcher(graph_relations, indexer, metadata)
        merged_edge_idx = graph_batcher.collate_idx_list(indexer)
        node_reps = {"atom": atom_attr, "frame": triangle_attr}
        edge_reps = {
            "atom_to_atom": atom_pair_attr,
            "atom_to_frame": prop_attr,
            "frame_to_atom": prop_attr,
            "frame_to_frame": stereo_attr,
        }
        # Graph path integral recursion
        for block_id in range(self.n_transformer_stacks):
            merged_node_attr = graph_batcher.collate_node_attr(node_reps)
            merged_edge_attr = graph_batcher.collate_edge_attr(edge_reps)
            _, merged_node_attr, merged_edge_attr = self.graph_transformer_stacks[block_id](
                merged_node_attr,
                merged_node_attr,
                merged_edge_idx,
                merged_edge_attr,
            )
            node_reps = graph_batcher.offload_node_attr(merged_node_attr)
            edge_reps = graph_batcher.offload_edge_attr(merged_edge_attr)
            prop_attr = edge_reps["atom_to_frame"]
            stereo_attr = edge_reps["frame_to_frame"]
            prop_attr = prop_attr + self.path_integral_stacks[block_id](
                prop_attr,
                stereo_attr,
                indexer,
                metadata,
            )
            edge_reps["atom_to_frame"] = prop_attr
        node_reps["sampled_frame"] = node_reps["frame"][indexer["gather_idx_I_ijk"]]
        batch["metadata"]["num_lig_atm"] = metadata["num_u"]
        batch["metadata"]["num_lig_trp"] = metadata["num_I"]
        batch["features"]["lig_atom_attr"] = node_reps["atom"]
        # Downsampled ligand frames
        batch["features"]["lig_trp_attr"] = node_reps["sampled_frame"]
        batch["features"]["lig_atom_pair_attr"] = edge_reps["atom_to_atom"]
        batch["features"]["lig_prop_attr"] = edge_reps["atom_to_frame"]
        edge_reps["sampled_atom_to_sampled_frame"] = edge_reps["atom_to_frame"][
            indexer["gather_idx_UI_Uijk"]
        ]
        batch["features"]["lig_af_pair_attr"] = edge_reps["sampled_atom_to_sampled_frame"]
        return batch
 def resolve_ligand_encoder(
    ligand_model_cfg: DictConfig,
    task_cfg: DictConfig,
    state_dict: Optional[STATE_DICT] = None,
 ) -> torch.nn.Module:
    """Instantiates a PIFormer model for ligand encoding.
    :param ligand_model_cfg: Ligand model configuration.
    :param task_cfg: Task configuration.
    :param state_dict: Optional (potentially-pretrained) state dictionary.
    :return: Ligand encoder model.
    """
    model = PIFormer(
        ligand_model_cfg.node_channels,
        ligand_model_cfg.pair_channels,
        ligand_model_cfg.n_atom_encodings,
        ligand_model_cfg.n_bond_encodings,
        ligand_model_cfg.n_atom_pos_encodings,
        ligand_model_cfg.n_stereo_encodings,
        ligand_model_cfg.n_attention_heads,
        ligand_model_cfg.attention_head_dim,
        hidden_dim=ligand_model_cfg.hidden_dim,
        max_path_length=ligand_model_cfg.max_path_integral_length,
        n_transformer_stacks=ligand_model_cfg.n_transformer_stacks,
        dropout=task_cfg.dropout,
    )
    if ligand_model_cfg.from_pretrained and state_dict is not None:
        try:
            model.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("ligand_encoder")
                }
            )
            log.info(
                "Successfully loaded pretrained ligand Molecular Heat Transformer (MHT) weights."
            )
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained ligand Molecular Heat Transformer (MHT) weights due to: {e}."
            )
    return model
 def resolve_relational_reasoning_module(
    protein_model_cfg: DictConfig,
    ligand_model_cfg: DictConfig,
    relational_reasoning_cfg: DictConfig,
    state_dict: Optional[STATE_DICT] = None,
 ) -> Tuple[torch.nn.Module, torch.nn.Module, torch.nn.Module]:
    """Instantiates relational reasoning module for ligand encoding.
    :param protein_model_cfg: Protein model configuration.
    :param ligand_model_cfg: Ligand model configuration.
    :param relational_reasoning_cfg: Relational reasoning configuration.
    :param state_dict: Optional (potentially-pretrained) state dictionary.
    :return: Relational reasoning modules for ligand encoding.
    """
    molgraph_single_projector = torch.nn.Linear(
        ligand_model_cfg.node_channels, protein_model_cfg.residue_dim, bias=False
    )
    molgraph_pair_projector = torch.nn.Linear(
        ligand_model_cfg.pair_channels, protein_model_cfg.pair_dim, bias=False
    )
    covalent_embed = torch.nn.Embedding(2, protein_model_cfg.pair_dim)
    if relational_reasoning_cfg.from_pretrained and state_dict is not None:
        try:
            molgraph_single_projector.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("molgraph_single_projector")
                }
            )
            log.info("Successfully loaded pretrained ligand graph single projector weights.")
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained ligand graph single projector weights due to: {e}."
            )
        try:
            molgraph_pair_projector.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("molgraph_pair_projector")
                }
            )
            log.info("Successfully loaded pretrained ligand graph pair projector weights.")
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained ligand graph pair projector weights due to: {e}."
            )
        try:
            covalent_embed.load_state_dict(
                {
                    ".".join(k.split(".")[1:]): v
                    for k, v in state_dict.items()
                    if k.startswith("covalent_embed")
                }
            )
            log.info("Successfully loaded pretrained ligand covalent embedding weights.")
        except Exception as e:
            log.warning(
                f"Skipping loading of pretrained ligand covalent embedding weights due to: {e}."
            )
    return molgraph_single_projector, molgraph_pair_projector, covalent_embed
--- a/flowdock/models/components/modules.py
+++ b/flowdock/models/components/modules.py
@@ -0,0 +1,423 @@
 import math
 import rootutils
 import torch
 import torch.nn.functional as F
 from beartype.typing import Optional, Tuple, Union
 from openfold.model.primitives import Attention
 from openfold.utils.tensor_utils import permute_final_dims
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils.model_utils import GELUMLP, segment_softmax
 class MultiHeadAttentionConv(torch.nn.Module):
    """Native Pytorch implementation."""
    def __init__(
        self,
        dim: Union[int, Tuple[int, int]],
        head_dim: int,
        edge_dim: int = None,
        n_heads: int = 1,
        dropout: float = 0.0,
        edge_lin: bool = True,
        **kwargs,
    ):
        """Multi-Head Attention Convolution layer."""
        super().__init__()
        self.dim = dim
        self.head_dim = head_dim
        self.n_heads = n_heads
        self.dropout = dropout
        self.edge_dim = edge_dim
        self.edge_lin = edge_lin
        self._alpha = None
        if isinstance(dim, int):
            dim = (dim, dim)
        self.lin_key = torch.nn.Linear(dim[0], n_heads * head_dim, bias=False)
        self.lin_query = torch.nn.Linear(dim[1], n_heads * head_dim, bias=False)
        self.lin_value = torch.nn.Linear(dim[0], n_heads * head_dim, bias=False)
        if edge_lin is True:
            self.lin_edge = torch.nn.Linear(edge_dim, n_heads, bias=False)
        else:
            self.lin_edge = self.register_parameter("lin_edge", None)
        self.reset_parameters()
    def reset_parameters(self):
        """Reset the parameters of the layer."""
        self.lin_key.reset_parameters()
        self.lin_query.reset_parameters()
        self.lin_value.reset_parameters()
        if self.edge_lin:
            self.lin_edge.reset_parameters()
    def forward(
        self,
        x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]],
        edge_index: torch.Tensor,
        edge_attr: torch.Tensor = None,
        return_attention_weights=None,
    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]]:
        """Forward pass of the Multi-Head Attention Convolution layer.
        :param x (torch.Tensor or Tuple[torch.Tensor, torch.Tensor]): The input features.
        :param edge_index (torch.Tensor): The edge index tensor.
        :param edge_attr (torch.Tensor, optional): The edge attribute tensor.
        :param return_attention_weights (bool, optional): If set to `True`,
            will additionally return the tuple
            `(edge_index, attention_weights)`, holding the computed
            attention weights for each edge. Default is `None`.
        :return: The output features or the tuple
            `(output_features, (edge_index, attention_weights))`.
        """
        H, C = self.n_heads, self.head_dim
        if isinstance(x, torch.Tensor):
            x = (x, x)
        query = self.lin_query(x[1]).view(*x[1].shape[:-1], H, C)
        key = self.lin_key(x[0]).view(*x[0].shape[:-1], H, C)
        value = self.lin_value(x[0]).view(*x[0].shape[:-1], H, C)
        attended_values = self.message(key, query, value, edge_attr, edge_index)
        out = self.aggregate(attended_values, edge_index[1], query.shape[0])
        alpha = self._alpha
        self._alpha = None
        out = out.contiguous().view(*out.shape[:-2], H * C)
        if isinstance(return_attention_weights, bool):
            assert alpha is not None
            return out, (edge_index, alpha)
        else:
            return out
    def message(
        self,
        key: torch.Tensor,
        query: torch.Tensor,
        value: torch.Tensor,
        edge_attr: torch.Tensor,
        index: torch.Tensor,
    ) -> torch.Tensor:
        """Add the relative positional encodings to attention scores.
        :param key (torch.Tensor): The key tensor.
        :param query (torch.Tensor): The query tensor.
        :param value (torch.Tensor): The value tensor.
        :param edge_attr (torch.Tensor): The edge attribute tensor.
        :param index (torch.Tensor): The edge index tensor.
        :return: The output tensor.
        """
        edge_bias = 0
        if self.lin_edge is not None:
            assert edge_attr is not None
            edge_bias = self.lin_edge(edge_attr)
        _alpha_z = (query[index[1]] * key[index[0]]).sum(dim=-1) / math.sqrt(
            self.head_dim
        ) + edge_bias
        self._alpha = _alpha_z
        alpha = segment_softmax(_alpha_z, index[1], query.shape[0])
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
        out = value[index[0]]
        out *= alpha.unsqueeze(-1)
        return out
    def aggregate(
        self, src: torch.Tensor, dst_idx: torch.Tensor, dst_size: torch.Size
    ) -> torch.Tensor:
        """Aggregate the source tensor to the destination tensor.
        :param src (torch.Tensor): The source tensor.
        :param dst_idx (torch.Tensor): The destination index tensor.
        :param dst_size (torch.Size): The destination size tensor.
        :return: The output tensor.
        """
        out = torch.zeros(
            dst_size,
            *src.shape[1:],
            dtype=src.dtype,
            device=src.device,
        ).index_add_(0, dst_idx, src)
        return out
 class TransformerLayer(torch.nn.Module):
    """A single layer of a transformer model."""
    def __init__(
        self,
        node_dim: int,
        n_heads: int,
        head_dim: Optional[int] = None,
        hidden_dim: Optional[int] = None,
        bidirectional: bool = False,
        edge_channels: Optional[int] = None,
        dropout: float = 0.0,
        edge_update: bool = False,
    ):
        """Initialize the transformer layer."""
        super().__init__()
        edge_lin = edge_channels is not None
        self.edge_update = edge_update
        if head_dim is None:
            head_dim = node_dim // n_heads
        self.conv = MultiHeadAttentionConv(
            node_dim,
            head_dim,
            edge_dim=edge_channels,
            n_heads=n_heads,
            edge_lin=edge_lin,
            dropout=dropout,
        )
        self.bidirectional = bidirectional
        self.projector = torch.nn.Linear(head_dim * n_heads, node_dim, bias=False)
        self.norm = torch.nn.LayerNorm(node_dim)
        self.mlp = GELUMLP(
            node_dim,
            node_dim,
            n_hidden_feats=hidden_dim,
            dropout=dropout,
            zero_init=True,
        )
        if edge_update:
            self.mlpe = GELUMLP(
                n_heads + edge_channels, edge_channels, dropout=dropout, zero_init=True
            )
    def forward(
        self,
        x_s: torch.Tensor,
        x_a: torch.Tensor,
        edge_index: torch.Tensor,
        edge_attr: Optional[torch.Tensor] = None,
    ) -> Union[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
        """Forward pass through the transformer layer.
        :param x_s (torch.Tensor): The source node features.
        :param x_a (torch.Tensor): The target node features.
        :param edge_index (torch.Tensor): The edge index tensor. :param edge_attr (torch.Tensor,
            optional): The edge attribute tensor.
        :return: The output source and target node features.
        """
        out_a, (edge_index, alpha) = self.conv(
            (x_s, x_a),
            edge_index,
            edge_attr,
            return_attention_weights=True,
        )
        x_a = x_a + self.projector(out_a)
        x_a = self.mlp(self.norm(x_a)) + x_a
        if self.bidirectional:
            out_s = self.conv((x_a, x_s), (edge_index[1], edge_index[0]), edge_attr)
            x_s = x_s + self.projector(out_s)
            x_s = self.mlp(self.norm(x_s)) + x_s
        if self.edge_update:
            edge_attr = edge_attr + self.mlpe(torch.cat([alpha, edge_attr], dim=-1))
            return x_s, x_a, edge_attr
        else:
            return x_s, x_a
 class PointSetAttention(torch.nn.Module):
    """PointSetAttention module."""
    def __init__(
        self,
        fiber_dim: int,
        heads: int = 8,
        point_dim: int = 4,
        edge_dim: Optional[int] = None,
        edge_update: bool = False,
        dropout: float = 0.0,
    ):
        """Initialize the PointSetAttention module."""
        super().__init__()
        self.fiber_dim = fiber_dim
        self.edge_dim = edge_dim
        self.heads = heads
        self.point_dim = point_dim
        self.dropout = dropout
        self.edge_update = edge_update
        self.distance_scaling = 10  # 1 nm
        # num attention contributions
        num_attn_logits = 2
        self.lin_query = torch.nn.Linear(fiber_dim, point_dim * heads, bias=False)
        self.lin_key = torch.nn.Linear(fiber_dim, point_dim * heads, bias=False)
        self.lin_value = torch.nn.Linear(fiber_dim, point_dim * heads, bias=False)
        if edge_dim is not None:
            self.lin_edge = torch.nn.Linear(edge_dim, heads, bias=False)
            if edge_update:
                self.edge_update_mlp = GELUMLP(heads + edge_dim, edge_dim)
        # qkv projection for scalar attention (normal)
        self.scalar_attn_logits_scale = (num_attn_logits * point_dim) ** -0.5
        # qkv projection for point attention (coordinate and orientation aware)
        point_weight_init_value = torch.log(torch.exp(torch.full((heads,), 1.0)) - 1.0)
        self.point_weights = torch.nn.Parameter(point_weight_init_value)
        self.point_attn_logits_scale = ((num_attn_logits * point_dim) * (9 / 2)) ** -0.5
        point_weight_init_value = torch.log(torch.exp(torch.full((heads,), 1.0)) - 1.0)
        self.point_weights = torch.nn.Parameter(point_weight_init_value)
        # combine out - point dim * 4
        self.to_out = torch.nn.Linear(heads * point_dim, fiber_dim, bias=False)
    def forward(
        self,
        x_k: torch.Tensor,
        x_q: torch.Tensor,
        edge_index: torch.LongTensor,
        point_centers_k: torch.Tensor,
        point_centers_q: torch.Tensor,
        x_edge: torch.Tensor = None,
    ):
        """Forward pass of the PointSetAttention module."""
        H, P = self.heads, self.point_dim
        q = self.lin_query(x_q)
        k = self.lin_key(x_k)
        v = self.lin_value(x_k)
        scalar_q = q[..., 0, :].view(-1, H, P)
        scalar_k = k[..., 0, :].view(-1, H, P)
        scalar_v = v[..., 0, :].view(-1, H, P)
        point_q_local = q[..., 1:, :].view(-1, 3, H, P)
        point_k_local = k[..., 1:, :].view(-1, 3, H, P)
        point_v_local = v[..., 1:, :].view(-1, 3, H, P)
        point_q = point_q_local + point_centers_q[..., None, None] / self.distance_scaling
        point_k = point_k_local + point_centers_k[..., None, None] / self.distance_scaling
        point_v = point_v_local + point_centers_k[..., None, None] / self.distance_scaling
        if self.edge_dim is not None:
            edge_bias = self.lin_edge(x_edge)
        else:
            edge_bias = 0
        attn_logits, attentions = self.compute_attention(
            scalar_k, scalar_q, point_k, point_q, edge_bias, edge_index
        )
        res_scalar = self.aggregate(
            attentions[:, :, None] * scalar_v[edge_index[0]],
            edge_index[1],
            scalar_q.shape[0],
        )
        res_points = self.aggregate(
            attentions[:, None, :, None] * point_v[edge_index[0]],
            edge_index[1],
            point_q.shape[0],
        )
        res_points_local = res_points - point_centers_q[..., None, None] / self.distance_scaling
        # [N, H, P], [N, 3, H, P] -> [N, 4, C]
        res = torch.cat([res_scalar.unsqueeze(-3), res_points_local], dim=-3).flatten(-2, -1)
        out = self.to_out(res)  # [N, 4, C]
        if self.edge_update:
            edge_out = self.edge_update_mlp(torch.cat([attn_logits, x_edge], dim=-1))
            return out, edge_out
        return out
    def compute_attention(self, scalar_k, scalar_q, point_k, point_q, edge_bias, index):
        """Compute the attention scores."""
        scalar_q = scalar_q[index[1]]
        scalar_k = scalar_k[index[0]]
        point_q = point_q[index[1]]
        point_k = point_k[index[0]]
        scalar_logits = (scalar_q * scalar_k).sum(dim=-1) * self.scalar_attn_logits_scale
        point_weights = F.softplus(self.point_weights).unsqueeze(0)
        point_logits = (
            torch.square(point_q - point_k).sum(dim=(-3, -1)) * self.point_attn_logits_scale
        )
        logits = scalar_logits - 1 / 2 * point_logits * point_weights + edge_bias
        alpha = segment_softmax(logits, index[1], scalar_q.shape[0])
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
        return logits, alpha
    @staticmethod
    def aggregate(src, dst_idx, dst_size):
        """Aggregate the source tensor to the destination tensor."""
        out = torch.zeros(
            dst_size,
            *src.shape[1:],
            dtype=src.dtype,
            device=src.device,
        ).index_add_(0, dst_idx, src)
        return out
 class BiDirectionalTriangleAttention(torch.nn.Module):
    """
    Adapted from https://github.com/aqlaboratory/openfold
    supports rectangular pair representation tensors
    """
    def __init__(self, c_in: int, c_hidden: int, no_heads: int, inf: float = 1e9):
        """Initialize the Bi-Directional Triangle Attention layer."""
        super().__init__()
        self.c_in = c_in
        self.c_hidden = c_hidden
        self.no_heads = no_heads
        self.inf = inf
        self.linear = torch.nn.Linear(c_in, self.no_heads, bias=False)
        self.mha_1 = Attention(self.c_in, self.c_in, self.c_in, self.c_hidden, self.no_heads)
        self.mha_2 = Attention(self.c_in, self.c_in, self.c_in, self.c_hidden, self.no_heads)
        self.layer_norm = torch.nn.LayerNorm(self.c_in)
    def forward(
        self,
        x1: torch.Tensor,
        x2: torch.Tensor,
        x_pair: torch.Tensor,
        mask: Optional[torch.Tensor] = None,
        use_lma: bool = False,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Forward pass of the Bi-Directional Triangle Attention layer."""
        if mask is None:
            # [*, I, J, K]
            mask = x_pair.new_ones(
                x_pair.shape[:-1],
            )
        # [*, I, J, C_in]
        x1 = self.layer_norm(x1)
        # [*, I, K, C_in]
        x2 = self.layer_norm(x2)
        # [*, I, 1, J, K]
        mask_bias = (self.inf * (mask - 1))[..., :, None, :, :]
        # [*, I, H, J, K]
        triangle_bias = permute_final_dims(self.linear(x_pair), [0, 3, 1, 2])
        biases_J2I = [mask_bias, triangle_bias]
        x1_out = self.mha_1(q_x=x1, kv_x=x2, biases=biases_J2I, use_lma=use_lma)
        x1 = x1 + x1_out
        # transpose the triangle bias for I->J attention.
        mask_bias_T_ = mask_bias.transpose(-2, -1).contiguous()
        triangle_bias_T_ = triangle_bias.transpose(-2, -1).contiguous()
        biases_I2J = [mask_bias_T_, triangle_bias_T_]
        x2_out = self.mha_2(q_x=x2, kv_x=x1, biases=biases_I2J, use_lma=use_lma)
        x2 = x2 + x2_out
        return x1, x2
--- a/flowdock/models/components/noise.py
+++ b/flowdock/models/components/noise.py
@@ -0,0 +1,443 @@
 import numpy as np
 import rootutils
 import torch
 from beartype.typing import Any, Dict, List, Optional, Tuple, Union
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.models.components.transforms import LatentCoordinateConverter
 from flowdock.utils import RankedLogger
 from flowdock.utils.model_utils import segment_mean
 MODEL_BATCH = Dict[str, Any]
 log = RankedLogger(__name__, rank_zero_only=True)
 class DiffusionSDE:
    """Diffusion SDE class.
    Adapted from: https://github.com/HannesStark/FlowSite
    """
    def __init__(self, sigma: torch.Tensor, tau_factor: float = 5.0):
        """Initialize the Diffusion SDE class."""
        self.lamb = 1 / sigma**2
        self.tau_factor = tau_factor
    def var(self, t: torch.Tensor) -> torch.Tensor:
        """Calculate the variance of the diffusion SDE."""
        return (1 - torch.exp(-self.lamb * t)) / self.lamb
    def max_t(self) -> float:
        """Calculate the maximum time of the diffusion SDE."""
        return self.tau_factor / self.lamb
    def mu_factor(self, t: torch.Tensor) -> torch.Tensor:
        """Calculate the mu factor of the diffusion SDE."""
        return torch.exp(-self.lamb * t / 2)
 class HarmonicSDE:
    """Harmonic SDE class.
    Adapted from: https://github.com/HannesStark/FlowSite
    """
    def __init__(self, J: Optional[torch.Tensor] = None, diagonalize: bool = True):
        """Initialize the Harmonic SDE class."""
        self.l_index = 1
        self.use_cuda = False
        if not diagonalize:
            return
        if J is not None:
            self.D, self.P = np.linalg.eigh(J)
            self.N = self.D.size
    @staticmethod
    def diagonalize(
        N,
        ptr: torch.Tensor,
        edges: Optional[List[Tuple[int, int]]] = None,
        antiedges: Optional[List[Tuple[int, int]]] = None,
        a=1,
        b=0.3,
        lamb: Optional[torch.Tensor] = None,
        device: Optional[Union[str, torch.device]] = None,
    ):
        """Diagonalize using the Harmonic SDE."""
        device = device or ptr.device
        J = torch.zeros((N, N), device=device)
        if edges is None:
            for i, j in zip(np.arange(N - 1), np.arange(1, N)):
                J[i, i] += a
                J[j, j] += a
                J[i, j] = J[j, i] = -a
        else:
            for i, j in edges:
                J[i, i] += a
                J[j, j] += a
                J[i, j] = J[j, i] = -a
        if antiedges is not None:
            for i, j in antiedges:
                J[i, i] -= b
                J[j, j] -= b
                J[i, j] = J[j, i] = b
        if edges is not None:
            J += torch.diag(lamb)
        Ds, Ps = [], []
        for start, end in zip(ptr[:-1], ptr[1:]):
            D, P = torch.linalg.eigh(J[start:end, start:end])
            D_ = D
            if edges is None:
                D_inv = 1 / D
                D_inv[0] = 0
                D_ = D_inv
            Ds.append(D_)
            Ps.append(P)
        return torch.cat(Ds), torch.block_diag(*Ps)
    def eigens(self, t):
        """Calculate the eigenvalues of `sigma_t` using the Harmonic SDE."""
        np_ = torch if self.use_cuda else np
        D = 1 / self.D * (1 - np_.exp(-t * self.D))
        t = torch.tensor(t, device="cuda").float() if self.use_cuda else t
        return np_.where(D != 0, D, t)
    def conditional(self, mask, x2):
        """Calculate the conditional distribution using the Harmonic SDE."""
        J_11 = self.J[~mask][:, ~mask]
        J_12 = self.J[~mask][:, mask]
        h = -J_12 @ x2
        mu = np.linalg.inv(J_11) @ h
        D, P = np.linalg.eigh(J_11)
        z = np.random.randn(*mu.shape)
        return (P / D**0.5) @ z + mu
    def A(self, t, invT=False):
        """Calculate the matrix `A` using the Harmonic SDE."""
        D = self.eigens(t)
        A = self.P * (D**0.5)
        if not invT:
            return A
        AinvT = self.P / (D**0.5)
        return A, AinvT
    def Sigma_inv(self, t):
        """Calculate the inverse of the covariance matrix `Sigma` using the Harmonic SDE."""
        D = 1 / self.eigens(t)
        return (self.P * D) @ self.P.T
    def Sigma(self, t):
        """Calculate the covariance matrix `Sigma` using the Harmonic SDE."""
        D = self.eigens(t)
        return (self.P * D) @ self.P.T
    @property
    def J(self):
        """Return the matrix `J`."""
        return (self.P * self.D) @ self.P.T
    def rmsd(self, t):
        """Calculate the root mean square deviation using the Harmonic SDE."""
        l_index = self.l_index
        D = 1 / self.D * (1 - np.exp(-t * self.D))
        return np.sqrt(3 * D[l_index:].mean())
    def sample(self, t, x=None, score=False, k=None, center=True, adj=False):
        """Sample from the Harmonic SDE."""
        l_index = self.l_index
        np_ = torch if self.use_cuda else np
        if x is None:
            if self.use_cuda:
                x = torch.zeros((self.N, 3), device="cuda").float()
            else:
                x = np.zeros((self.N, 3))
        if t == 0:
            return x
        z = (
            np.random.randn(self.N, 3)
            if not self.use_cuda
            else torch.randn(self.N, 3, device="cuda").float()
        )
        D = self.eigens(t)
        xx = self.P.T @ x
        if center:
            z[0] = 0
            xx[0] = 0
        if k:
            z[k + l_index :] = 0
            xx[k + l_index :] = 0
        out = np_.exp(-t * self.D / 2)[:, None] * xx + np_.sqrt(D)[:, None] * z
        if score:
            score = -(1 / np_.sqrt(D))[:, None] * z
            if adj:
                score = score + self.D[:, None] * out
            return self.P @ out, self.P @ score
        return self.P @ out
    def score_norm(self, t, k=None, adj=False):
        """Calculate the score norm using the Harmonic SDE."""
        if k == 0:
            return 0
        l_index = self.l_index
        np_ = torch if self.use_cuda else np
        k = k or self.N - 1
        D = 1 / self.eigens(t)
        if adj:
            D = D * np_.exp(-self.D * t)
        return (D[l_index : k + l_index].sum() / self.N) ** 0.5
    def inject(self, t, modes):
        """Inject noise along the given modes using the Harmonic SDE."""
        z = (
            np.random.randn(self.N, 3)
            if not self.use_cuda
            else torch.randn(self.N, 3, device="cuda").float()
        )
        z[~modes] = 0
        A = self.A(t, invT=False)
        return A @ z
    def score(self, x0, xt, t):
        """Calculate the score of the diffusion kernel using the Harmonic SDE."""
        Sigma_inv = self.Sigma_inv(t)
        mu_t = (self.P * np.exp(-t * self.D / 2)) @ (self.P.T @ x0)
        return Sigma_inv @ (mu_t - xt)
    def project(self, X, k, center=False):
        """Project onto the first `k` nonzero modes using the Harmonic SDE."""
        l_index = self.l_index
        D = self.P.T @ X
        D[k + l_index :] = 0
        if center:
            D[0] = 0
        return self.P @ D
    def unproject(self, X, mask, k, return_Pinv=False):
        """Find the vector along the first k nonzero modes whose mask is closest to `X`"""
        l_index = self.l_index
        PP = self.P[mask, : k + l_index]
        Pinv = np.linalg.pinv(PP)
        out = self.P[:, : k + l_index] @ Pinv @ X
        if return_Pinv:
            return out, Pinv
        return out
    def energy(self, X):
        """Calculate the energy using the Harmonic SDE."""
        l_index = self.l_index
        return (self.D[:, None] * (self.P.T @ X) ** 2).sum(-1)[l_index:] / 2
    @property
    def free_energy(self):
        """Calculate the free energy using the Harmonic SDE."""
        l_index = self.l_index
        return 3 * np.log(self.D[l_index:]).sum() / 2
    def KL_H(self, t):
        """Calculate the Kullback-Leibler divergence using the Harmonic SDE."""
        l_index = self.l_index
        D = self.D[l_index:]
        return -3 * 0.5 * (np.log(1 - np.exp(-D * t)) + np.exp(-D * t)).sum(0)
 def sample_gaussian_prior(
    x0: torch.Tensor,
    latent_converter: LatentCoordinateConverter,
    sigma: float,
    x0_sigma: float = 1e-4,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
    """Sample noise from a Gaussian prior distribution.
    :param x0: ground-truth tensor
    :param latent_converter: The latent coordinate converter
    :param sigma: standard deviation of the Gaussian noise
    :param x0_sigma: standard deviation of the Gaussian noise for the ground-truth tensor
    :return: tuple of ground-truth and predicted tensors with additive Gaussian prior noise
    """
    prior = torch.randn_like(x0)
    x_int_0 = x0 + prior * x0_sigma  # add small Gaussian noise to the ground-truth tensor
    (
        x1_ca_lat,
        x1_cother_lat,
        x1_lig_lat,
    ) = torch.split(
        prior * sigma,
        [
            latent_converter._n_res_per_sample,
            latent_converter._n_cother_per_sample,
            latent_converter._n_ligha_per_sample,
        ],
        dim=1,
    )
    x_int_1 = torch.cat(
        [
            x1_ca_lat,
            x1_cother_lat,
            x1_lig_lat,
        ],
        dim=1,
    )
    return x_int_0, x_int_1
 def sample_protein_harmonic_prior(
    protein_ca_x0: torch.Tensor,
    protein_cother_x0: torch.Tensor,
    batch: MODEL_BATCH,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Sample protein noise from a harmonic prior distribution.
    Adapted from: https://github.com/bjing2016/alphaflow
    Note that this function represents non-Ca atoms as Gaussian noise
    centered around each harmonically-noised Ca atom.
    :param protein_ca_x0: ground-truth protein Ca-atom tensor
    :param protein_cother_x0: ground-truth protein other-atom tensor
    :param batch: A batch dictionary
    :return: tuple of harmonic protein Ca atom noise and Gaussian protein other atom noise
    """
    indexer = batch["indexer"]
    metadata = batch["metadata"]
    protein_bid = indexer["gather_idx_a_structid"]
    protein_num_nodes = protein_ca_x0.size(0) * protein_ca_x0.size(1)
    ptr = torch.cumsum(torch.bincount(protein_bid), dim=0)
    ptr = torch.cat((torch.tensor([0], device=protein_bid.device), ptr))
    try:
        D_inv, P = HarmonicSDE.diagonalize(
            protein_num_nodes,
            ptr,
            a=3 / (3.8**2),
        )
    except Exception as e:
        log.error(
            f"Failed to call HarmonicSDE.diagonalize() for protein(s) {metadata['sample_ID_per_sample']} due to: {e}"
        )
        raise e
    noise = torch.randn((protein_num_nodes, 3), device=protein_ca_x0.device)
    harmonic_ca_noise = P @ (torch.sqrt(D_inv)[:, None] * noise)
    gaussian_cother_noise = (
        torch.randn_like(protein_cother_x0.flatten(0, 1))
        + harmonic_ca_noise[indexer["gather_idx_a_cotherid"]]
    )
    return (
        harmonic_ca_noise.view(protein_ca_x0.size()).contiguous(),
        gaussian_cother_noise.view(protein_cother_x0.size()).contiguous(),
    )
 def sample_ligand_harmonic_prior(
    lig_x0: torch.Tensor, protein_ca_x0: torch.Tensor, batch: MODEL_BATCH, sigma: float = 1.0
 ) -> torch.Tensor:
    """
    Sample ligand noise from a harmonic prior distribution.
    Adapted from: https://github.com/HannesStark/FlowSite
    :param lig_x0: ground-truth ligand tensor
    :param protein_x0: ground-truth protein Ca-atom tensor
    :param batch: A batch dictionary
    :param sigma: standard deviation of the harmonic noise
    :return: tensor of harmonic noise
    """
    indexer = batch["indexer"]
    metadata = batch["metadata"]
    lig_num_nodes = lig_x0.size(0) * lig_x0.size(1)
    num_molid_per_sample = max(metadata["num_molid_per_sample"])
    # NOTE: here, we distinguish each ligand chain in a complex for harmonic chain sampling
    lig_bid = indexer["gather_idx_i_molid"]
    protein_sigma = (
        segment_mean(
            torch.square(protein_ca_x0).flatten(0, 1),
            indexer["gather_idx_a_structid"],
            metadata["num_structid"],
        ).mean(dim=-1)
        ** 0.5
    ).repeat_interleave(num_molid_per_sample)
    sde = DiffusionSDE(protein_sigma * sigma)
    edges = torch.stack(
        (
            indexer["gather_idx_ij_i"],
            indexer["gather_idx_ij_j"],
        )
    )
    edges = edges[:, edges[0] < edges[1]]  # de-duplicate edges
    ptr = torch.cumsum(torch.bincount(lig_bid), dim=0)
    ptr = torch.cat((torch.tensor([0], device=lig_bid.device), ptr))
    try:
        D, P = HarmonicSDE.diagonalize(
            lig_num_nodes,
            ptr,
            edges=edges.T,
            lamb=sde.lamb[lig_bid],
        )
    except Exception as e:
        log.error(
            f"Failed to call HarmonicSDE.diagonalize() for ligand(s) {metadata['sample_ID_per_sample']} due to: {e}"
        )
        raise e
    noise = torch.randn((lig_num_nodes, 3), device=lig_x0.device)
    prior = P @ (noise / torch.sqrt(D)[:, None])
    return prior.view(lig_x0.size()).contiguous()
 def sample_complex_harmonic_prior(
    x0: torch.Tensor,
    latent_converter: LatentCoordinateConverter,
    batch: MODEL_BATCH,
    x0_sigma: float = 1e-4,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Sample protein-ligand complex noise from a harmonic prior distribution.
    From: https://github.com/bjing2016/alphaflow
    :param x0: ground-truth tensor
    :param latent_converter: The latent coordinate converter
    :param batch: A batch dictionary
    :param x0_sigma: standard deviation of the Gaussian noise for the ground-truth tensor
    :return: tuple of ground-truth and predicted tensors with additive Gaussian and harmonic prior noise, respectively
    """
    ca_lat, cother_lat, lig_lat = x0.split(
        [
            latent_converter._n_res_per_sample,
            latent_converter._n_cother_per_sample,
            latent_converter._n_ligha_per_sample,
        ],
        dim=1,
    )
    harmonic_ca_lat, gaussian_cother_lat = sample_protein_harmonic_prior(
        ca_lat,
        cother_lat,
        batch,
    )
    harmonic_lig_lat = sample_ligand_harmonic_prior(lig_lat, harmonic_ca_lat, batch)
    x1 = torch.cat(
        [
            # NOTE: the following normalization steps assume that `self.latent_model == "default"`
            harmonic_ca_lat / latent_converter.ca_scale,
            gaussian_cother_lat / latent_converter.other_scale,
            harmonic_lig_lat / latent_converter.other_scale,
        ],
        dim=1,
    )
    gaussian_prior = torch.randn_like(x0)
    return x0 + gaussian_prior * x0_sigma, x1
 def sample_esmfold_prior(
    x0: torch.Tensor, x1: torch.Tensor, sigma: float, x0_sigma: float = 1e-4
 ) -> Tuple[torch.Tensor, torch.Tensor]:
    """Sample noise from an ESMFold prior distribution.
    :param x0: ground-truth tensor
    :param x1: predicted tensor
    :param sigma: standard deviation of the ESMFold prior's additive Gaussian noise
    :param x0_sigma: standard deviation of the Gaussian noise for the ground-truth tensor
    :return: tuple of ground-truth and predicted tensors with additive Gaussian prior noise
    """
    prior_noise = torch.randn_like(x0)
    return x0 + prior_noise * x0_sigma, x1 + prior_noise * sigma
--- a/flowdock/models/components/transforms.py
+++ b/flowdock/models/components/transforms.py
@@ -0,0 +1,241 @@
 # Adapted from: https://github.com/zrqiao/NeuralPLexer
 import rootutils
 import torch
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils.model_utils import segment_mean
 class LatentCoordinateConverter:
    """Transform the batched feature dict to latent coordinate arrays."""
    def __init__(self, config, prot_atom37_namemap, lig_namemap):
        """Initialize the converter."""
        super().__init__()
        self.config = config
        self.prot_namemap = prot_atom37_namemap
        self.lig_namemap = lig_namemap
        self.cached_noise = None
        self._last_pred_ca_trace = None
    @staticmethod
    def nested_get(dic, keys):
        """Get the value in the nested dictionary."""
        for key in keys:
            dic = dic[key]
        return dic
    @staticmethod
    def nested_set(dic, keys, value):
        """Set the value in the nested dictionary."""
        for key in keys[:-1]:
            dic = dic.setdefault(key, {})
        dic[keys[-1]] = value
    def to_latent(self, batch):
        """Convert the batched feature dict to latent coordinates."""
        return None
    def assign_to_batch(self, batch, x_int):
        """Assign the latent coordinates to the batched feature dict."""
        return None
 class DefaultPLCoordinateConverter(LatentCoordinateConverter):
    """Minimal conversion, using internal coords for sidechains and global coords for others."""
    def __init__(self, config, prot_atom37_namemap, lig_namemap):
        """Initialize the converter."""
        super().__init__(config, prot_atom37_namemap, lig_namemap)
        # Scale parameters in Angstrom
        self.ca_scale = config.global_max_sigma
        self.other_scale = config.internal_max_sigma
    def to_latent(self, batch: dict):
        """Convert the batched feature dict to latent coordinates."""
        indexer = batch["indexer"]
        metadata = batch["metadata"]
        self._batch_size = metadata["num_structid"]
        atom37_mask = batch["features"]["res_atom_mask"].bool()
        self._cother_mask = atom37_mask.clone()
        self._cother_mask[:, 1] = False
        atom37_coords = self.nested_get(batch, self.prot_namemap[0])
        try:
            apo_available = True
            apo_atom37_coords = self.nested_get(
                batch, self.prot_namemap[0][:-1] + ("apo_" + self.prot_namemap[0][-1],)
            )
        except KeyError:
            apo_available = False
            apo_atom37_coords = torch.zeros_like(atom37_coords)
        ca_atom_centroid_coords = segment_mean(
            # NOTE: in contrast to NeuralPLexer, we center all coordinates at the origin using the Ca atom centroids
            atom37_coords[:, 1],
            indexer["gather_idx_a_structid"],
            self._batch_size,
        )
        if apo_available:
            apo_ca_atom_centroid_coords = segment_mean(
                apo_atom37_coords[:, 1],
                indexer["gather_idx_a_structid"],
                self._batch_size,
            )
        else:
            apo_ca_atom_centroid_coords = torch.zeros_like(ca_atom_centroid_coords)
        ca_coords_glob = (
            (atom37_coords[:, 1] - ca_atom_centroid_coords[indexer["gather_idx_a_structid"]])
            .contiguous()
            .view(self._batch_size, -1, 3)
        )
        if apo_available:
            apo_ca_coords_glob = (
                (
                    apo_atom37_coords[:, 1]
                    - apo_ca_atom_centroid_coords[indexer["gather_idx_a_structid"]]
                )
                .contiguous()
                .view(self._batch_size, -1, 3)
            )
        else:
            apo_ca_coords_glob = torch.zeros_like(ca_coords_glob)
        cother_coords_int = (
            (atom37_coords - ca_atom_centroid_coords[indexer["gather_idx_a_structid"], None])[
                self._cother_mask
            ]
            .contiguous()
            .view(self._batch_size, -1, 3)
        )
        if apo_available:
            apo_cother_coords_int = (
                (
                    apo_atom37_coords
                    - apo_ca_atom_centroid_coords[indexer["gather_idx_a_structid"], None]
                )[self._cother_mask]
                .contiguous()
                .view(self._batch_size, -1, 3)
            )
        else:
            apo_cother_coords_int = torch.zeros_like(cother_coords_int)
        self._n_res_per_sample = ca_coords_glob.shape[1]
        self._n_cother_per_sample = cother_coords_int.shape[1]
        if batch["misc"]["protein_only"]:
            self._n_ligha_per_sample = 0
            x_int = torch.cat(
                [
                    ca_coords_glob / self.ca_scale,
                    apo_ca_coords_glob / self.ca_scale,
                    cother_coords_int / self.other_scale,
                    apo_cother_coords_int / self.other_scale,
                ],
                dim=1,
            )
            return x_int
        lig_ha_coords = self.nested_get(batch, self.lig_namemap[0])
        lig_ha_coords_int = (
            lig_ha_coords - ca_atom_centroid_coords[indexer["gather_idx_i_structid"]]
        )
        lig_ha_coords_int = lig_ha_coords_int.contiguous().view(self._batch_size, -1, 3)
        ca_atom_centroid_coords = ca_atom_centroid_coords.contiguous().view(
            self._batch_size, -1, 3
        )
        apo_ca_atom_centroid_coords = apo_ca_atom_centroid_coords.contiguous().view(
            self._batch_size, -1, 3
        )
        x_int = torch.cat(
            [
                ca_coords_glob / self.ca_scale,
                apo_ca_coords_glob / self.ca_scale,
                cother_coords_int / self.other_scale,
                apo_cother_coords_int / self.other_scale,
                ca_atom_centroid_coords / self.ca_scale,
                apo_ca_atom_centroid_coords / self.ca_scale,
                lig_ha_coords_int / self.other_scale,
            ],
            dim=1,
        )
        # NOTE: since we use the Ca atom centroids for centralization, we have only one molid per sample
        self._n_molid_per_sample = ca_atom_centroid_coords.shape[1]
        self._n_ligha_per_sample = lig_ha_coords_int.shape[1]
        return x_int
    def assign_to_batch(self, batch: dict, x_lat: torch.Tensor):
        """Assign the latent coordinates to the batched feature dict."""
        indexer = batch["indexer"]
        new_atom37_coords = x_lat.new_zeros(self._batch_size * self._n_res_per_sample, 37, 3)
        apo_new_atom37_coords = x_lat.new_zeros(self._batch_size * self._n_res_per_sample, 37, 3)
        if batch["misc"]["protein_only"]:
            ca_lat, apo_ca_lat, cother_lat, apo_cother_lat = torch.split(
                x_lat,
                [
                    self._n_res_per_sample,
                    self._n_res_per_sample,
                    self._n_cother_per_sample,
                    self._n_cother_per_sample,
                ],
                dim=1,
            )
        else:
            (
                ca_lat,
                apo_ca_lat,
                cother_lat,
                apo_cother_lat,
                ca_cent_lat,
                _,
                lig_lat,
            ) = torch.split(
                x_lat,
                [
                    self._n_res_per_sample,
                    self._n_res_per_sample,
                    self._n_cother_per_sample,
                    self._n_cother_per_sample,
                    self._n_molid_per_sample,
                    self._n_molid_per_sample,
                    self._n_ligha_per_sample,
                ],
                dim=1,
            )
        new_ca_glob = (ca_lat * self.ca_scale).contiguous().flatten(0, 1)
        apo_new_ca_glob = (apo_ca_lat * self.ca_scale).contiguous().flatten(0, 1)
        new_atom37_coords[self._cother_mask] = (
            (cother_lat * self.other_scale).contiguous().flatten(0, 1)
        )
        apo_new_atom37_coords[self._cother_mask] = (
            (apo_cother_lat * self.other_scale).contiguous().flatten(0, 1)
        )
        new_atom37_coords = new_atom37_coords
        apo_new_atom37_coords = apo_new_atom37_coords
        new_atom37_coords[~self._cother_mask] = 0
        apo_new_atom37_coords[~self._cother_mask] = 0
        new_atom37_coords[:, 1] = new_ca_glob
        apo_new_atom37_coords[:, 1] = apo_new_ca_glob
        self.nested_set(batch, self.prot_namemap[1], new_atom37_coords)
        self.nested_set(
            batch,
            self.prot_namemap[1][:-1] + ("apo_" + self.prot_namemap[1][-1],),
            apo_new_atom37_coords,
        )
        if batch["misc"]["protein_only"]:
            self.nested_set(batch, self.lig_namemap[1], None)
            self.empty_cache()
            return batch
        new_ligha_coords_int = (lig_lat * self.other_scale).contiguous().flatten(0, 1)
        new_ligha_coords_cent = (ca_cent_lat * self.ca_scale).contiguous().flatten(0, 1)
        new_ligha_coords = (
            new_ligha_coords_int + new_ligha_coords_cent[indexer["gather_idx_i_structid"]]
        )
        self.nested_set(batch, self.lig_namemap[1], new_ligha_coords)
        self.empty_cache()
        return batch
    def empty_cache(self):
        """Empty the cached variables."""
        self._batch_size = None
        self._cother_mask = None
        self._n_res_per_sample = None
        self._n_cother_per_sample = None
        self._n_ligha_per_sample = None
        self._n_molid_per_sample = None
--- a/flowdock/models/flowdock_fm_module.py
+++ b/flowdock/models/flowdock_fm_module.py
@@ -0,0 +1,943 @@
 import os
 import esm
 import numpy as np
 import rootutils
 import torch
 from beartype.typing import Any, Dict, Literal, Optional, Union
 from lightning import LightningModule
 from omegaconf import DictConfig
 from torchmetrics.functional.regression import (
    mean_absolute_error,
    mean_squared_error,
    pearson_corrcoef,
    spearman_corrcoef,
 )
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.models.components.losses import (
    eval_auxiliary_estimation_losses,
    eval_structure_prediction_losses,
 )
 from flowdock.utils import RankedLogger
 from flowdock.utils.data_utils import pdb_filepath_to_protein, prepare_batch
 from flowdock.utils.model_utils import extract_esm_embeddings
 from flowdock.utils.sampling_utils import multi_pose_sampling
 from flowdock.utils.visualization_utils import (
    construct_prot_lig_pairs,
    write_prot_lig_pairs_to_pdb_file,
 )
 MODEL_BATCH = Dict[str, Any]
 MODEL_STAGE = Literal["train", "val", "test", "predict"]
 LOSS_MODES_LIST = [
    "structure_prediction",
    "auxiliary_estimation",
    "auxiliary_estimation_without_structure_prediction",
 ]
 LOSS_MODES = Literal[
    "structure_prediction",
    "auxiliary_estimation",
    "auxiliary_estimation_without_structure_prediction",
 ]
 AUX_ESTIMATION_STAGES = ["train", "val", "test"]
 log = RankedLogger(__name__, rank_zero_only=True)
 class FlowDockFMLitModule(LightningModule):
    """A `LightningModule` for geometric flow matching (FM) with FlowDock.
    A `LightningModule` implements 8 key methods:
    ```python
    def __init__(self):
    # Define initialization code here.
    def setup(self, stage):
    # Things to setup before each stage, 'fit', 'validate', 'test', 'predict'.
    # This hook is called on every process when using DDP.
    def training_step(self, batch, batch_idx):
    # The complete training step.
    def validation_step(self, batch, batch_idx):
    # The complete validation step.
    def test_step(self, batch, batch_idx):
    # The complete test step.
    def predict_step(self, batch, batch_idx):
    # The complete predict step.
    def configure_optimizers(self):
    # Define and configure optimizers and LR schedulers.
    ```
    Docs:
        https://lightning.ai/docs/pytorch/latest/common/lightning_module.html
    """
    def __init__(
        self,
        net: torch.nn.Module,
        optimizer: torch.optim.Optimizer,
        scheduler: torch.optim.lr_scheduler,
        compile: bool,
        cfg: DictConfig,
        **kwargs: Dict[str, Any],
    ):
        """Initialize a `FlowDockFMLitModule`.
        :param net: The model to train.
        :param optimizer: The optimizer to use for training.
        :param scheduler: The learning rate scheduler to use for training.
        :param compile: Whether to compile the model before training.
        :param cfg: The model configuration.
        :param kwargs: Additional keyword arguments.
        """
        super().__init__()
        # the model along with its hyperparameters
        self.net = net(cfg)
        # this line allows to access init params with 'self.hparams' attribute
        # also ensures init params will be stored in ckpt
        self.save_hyperparameters(logger=False, ignore=["net"])
        # for validating input arguments
        if self.hparams.cfg.task.loss_mode not in LOSS_MODES_LIST:
            raise ValueError(
                f"Invalid loss mode: {self.hparams.cfg.task.loss_mode}. Must be one of {LOSS_MODES}."
            )
        # for inspecting the model's outputs during validation and testing
        (
            self.training_step_outputs,
            self.validation_step_outputs,
            self.test_step_outputs,
            self.predict_step_outputs,
        ) = (
            [],
            [],
            [],
            [],
        )
    def forward(
        self,
        batch: MODEL_BATCH,
        iter_id: Union[int, str] = 0,
        observed_block_contacts: Optional[torch.Tensor] = None,
        contact_prediction: bool = True,
        infer_geometry_prior: bool = False,
        score: bool = False,
        affinity: bool = True,
        use_template: bool = False,
        **kwargs: Dict[str, Any],
    ) -> MODEL_BATCH:
        """Perform a forward pass through the model.
        :param batch: A batch dictionary.
        :param iter_id: The current iteration ID.
        :param observed_block_contacts: Observed block contacts.
        :param contact_prediction: Whether to predict contacts.
        :param infer_geometry_prior: Whether to predict using a geometry prior.
        :param score: Whether to predict a denoised complex structure.
        :param affinity: Whether to predict ligand binding affinity.
        :param use_template: Whether to use a template protein structure.
        :param kwargs: Additional keyword arguments.
        :return: Batch dictionary with outputs.
        """
        return self.net(
            batch,
            iter_id=iter_id,
            observed_block_contacts=observed_block_contacts,
            contact_prediction=contact_prediction,
            infer_geometry_prior=infer_geometry_prior,
            score=score,
            affinity=affinity,
            use_template=use_template,
            training=self.training,
            **kwargs,
        )
    def model_step(
        self,
        batch: MODEL_BATCH,
        batch_idx: int,
        stage: MODEL_STAGE,
        loss_mode: Optional[LOSS_MODES] = None,
    ) -> MODEL_BATCH:
        """Perform a single model step on a batch of data.
        :param batch: A batch dictionary.
        :param batch_idx: The index of the current batch.
        :param stage: The current model stage (i.e., `train`, `val`, `test`, or `predict`).
        :param loss_mode: The loss mode to use for training.
        :return: Batch dictionary with losses.
        """
        prepare_batch(batch)
        predicting_aux_outputs = (
            self.hparams.cfg.confidence.enabled or self.hparams.cfg.affinity.enabled
        )
        is_aux_loss_stage = stage in AUX_ESTIMATION_STAGES
        is_aux_batch = batch_idx % self.hparams.cfg.task.aux_batch_freq == 0
        struct_pred_loss_mode_requested = (
            loss_mode is not None and loss_mode == "structure_prediction"
        )
        should_eval_aux_loss = (
            predicting_aux_outputs
            and is_aux_loss_stage
            and is_aux_batch
            and not struct_pred_loss_mode_requested
            and (
                not self.hparams.cfg.task.freeze_confidence
                or (
                    not self.hparams.cfg.task.freeze_affinity
                    and batch["features"]["affinity"].any().item()
                )
            )
        )
        eval_aux_loss_mode_requested = (
            predicting_aux_outputs
            and loss_mode is not None
            and "auxiliary_estimation" in loss_mode
        )
        if should_eval_aux_loss or eval_aux_loss_mode_requested:
            return eval_auxiliary_estimation_losses(
                self, batch, stage, loss_mode, training=self.training
            )
        loss_fn = eval_structure_prediction_losses
        return loss_fn(self, batch, batch_idx, self.device, stage, t_1=1.0)
    def on_train_start(self):
        """Lightning hook that is called when training begins."""
        pass
    def training_step(self, batch: MODEL_BATCH, batch_idx: int) -> torch.Tensor:
        """Perform a single training step on a batch of data from the training set.
        :param batch: A batch dictionary.
        :param batch_idx: The index of the current batch.
        :return: A tensor of losses between model predictions and targets.
        """
        if self.hparams.cfg.task.overfitting_example_name is not None and not all(
            name == self.hparams.cfg.task.overfitting_example_name
            for name in batch["metadata"]["sample_ID_per_sample"]
        ):
            return None
        try:
            batch = self.model_step(batch, batch_idx, "train")
        except Exception as e:
            log.error(
                f"Failed to perform training step for batch index {batch_idx} due to: {e}. Skipping example."
            )
            return None
        if self.hparams.cfg.affinity.enabled and "affinity_logits" in batch["outputs"]:
            training_outputs = {
                "affinity_logits": batch["outputs"]["affinity_logits"],
                "affinity": batch["features"]["affinity"],
            }
            self.training_step_outputs.append(training_outputs)
        # return loss or backpropagation will fail
        return batch["outputs"]["loss"]
    def on_train_epoch_end(self):
        """Lightning hook that is called when a training epoch ends."""
        if self.hparams.cfg.affinity.enabled and any(
            "affinity_logits" in output for output in self.training_step_outputs
        ):
            affinity_logits = torch.cat(
                [
                    output["affinity_logits"]
                    for output in self.training_step_outputs
                    if "affinity_logits" in output
                ]
            )
            affinity = torch.cat(
                [
                    output["affinity"]
                    for output in self.training_step_outputs
                    if "affinity_logits" in output
                ]
            )
            affinity_logits = affinity_logits[~affinity.isnan()]
            affinity = affinity[~affinity.isnan()]
            if affinity.numel() > 1:
                # NOTE: there must be at least two affinity batches to properly score the affinity predictions
                aff_rmse = torch.sqrt(mean_squared_error(affinity_logits, affinity))
                aff_mae = mean_absolute_error(affinity_logits, affinity)
                aff_pearson = pearson_corrcoef(affinity_logits, affinity)
                aff_spearman = spearman_corrcoef(affinity_logits, affinity)
                self.log(
                    "train_affinity/RMSE",
                    aff_rmse.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=False,
                )
                self.log(
                    "train_affinity/MAE",
                    aff_mae.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=False,
                )
                self.log(
                    "train_affinity/Pearson",
                    aff_pearson.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=False,
                )
                self.log(
                    "train_affinity/Spearman",
                    aff_spearman.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=False,
                )
        self.training_step_outputs.clear()  # free memory
    def on_validation_start(self):
        """Lightning hook that is called when validation begins."""
        # create a directory to store model outputs from each validation epoch
        os.makedirs(
            os.path.join(self.trainer.default_root_dir, "validation_epoch_outputs"), exist_ok=True
        )
    def validation_step(self, batch: MODEL_BATCH, batch_idx: int, dataloader_idx: int = 0):
        """Perform a single validation step on a batch of data from the validation set.
        :param batch: A batch dictionary.
        :param batch_idx: The index of the current batch.
        :param dataloader_idx: The index of the current dataloader.
        """
        if self.hparams.cfg.task.overfitting_example_name is not None and not all(
            name == self.hparams.cfg.task.overfitting_example_name
            for name in batch["metadata"]["sample_ID_per_sample"]
        ):
            return None
        try:
            prepare_batch(batch)
            sampling_stats = self.net.sample_pl_complex_structures(
                batch,
                sampler="VDODE",
                sampler_eta=1.0,
                num_steps=10,
                start_time=1.0,
                exact_prior=False,
                return_all_states=True,
                eval_input_protein=True,
            )
            all_frames = sampling_stats["all_frames"]
            del sampling_stats["all_frames"]
            for metric_name in sampling_stats.keys():
                log_stat = sampling_stats[metric_name].mean().detach()
                batch_size = sampling_stats[metric_name].shape[0]
                self.log(
                    f"val_sampling/{metric_name}",
                    log_stat,
                    on_step=True,
                    on_epoch=True,
                    batch_size=batch_size,
                )
            sampling_stats = self.net.sample_pl_complex_structures(
                batch,
                sampler="VDODE",
                sampler_eta=1.0,
                num_steps=10,
                start_time=1.0,
                exact_prior=False,
                use_template=False,
            )
            for metric_name in sampling_stats.keys():
                log_stat = sampling_stats[metric_name].mean().detach()
                batch_size = sampling_stats[metric_name].shape[0]
                self.log(
                    f"val_sampling_notemplate/{metric_name}",
                    log_stat,
                    on_step=True,
                    on_epoch=True,
                    batch_size=batch_size,
                )
            sampling_stats = self.net.sample_pl_complex_structures(
                batch,
                sampler="VDODE",
                sampler_eta=1.0,
                num_steps=10,
                start_time=1.0,
                return_summary_stats=True,
                exact_prior=True,
            )
            for metric_name in sampling_stats.keys():
                log_stat = sampling_stats[metric_name].mean().detach()
                batch_size = sampling_stats[metric_name].shape[0]
                self.log(
                    f"val_sampling_trueprior/{metric_name}",
                    log_stat,
                    on_step=True,
                    on_epoch=True,
                    batch_size=batch_size,
                )
            batch = self.model_step(batch, batch_idx, "val")
        except Exception as e:
            log.error(
                f"Failed to perform validation step for batch index {batch_idx} of dataloader {dataloader_idx} due to: {e}. Skipping example."
            )
            return None
        # store model outputs for inspection
        validation_outputs = {}
        if self.hparams.cfg.task.visualize_generated_samples:
            validation_outputs = {
                "name": batch["metadata"]["sample_ID_per_sample"],
                "batch_size": batch["metadata"]["num_structid"],
                "aatype": batch["features"]["res_type"].long().cpu().numpy(),
                "res_atom_mask": batch["features"]["res_atom_mask"].cpu().numpy(),
                "protein_coordinates_list": [
                    frame["receptor_padded"].cpu().numpy() for frame in all_frames
                ],
                "ligand_coordinates_list": [
                    frame["ligands"].cpu().numpy() for frame in all_frames
                ],
                "ligand_mol": batch["metadata"]["mol_per_sample"],
                "protein_batch_indexer": batch["indexer"]["gather_idx_a_structid"].cpu().numpy(),
                "ligand_batch_indexer": batch["indexer"]["gather_idx_i_structid"].cpu().numpy(),
                "gt_protein_coordinates": batch["features"]["res_atom_positions"].cpu().numpy(),
                "gt_ligand_coordinates": batch["features"]["sdf_coordinates"].cpu().numpy(),
                "dataloader_idx": dataloader_idx,
            }
        if self.hparams.cfg.affinity.enabled and "affinity_logits" in batch["outputs"]:
            validation_outputs.update(
                {
                    "affinity_logits": batch["outputs"]["affinity_logits"],
                    "affinity": batch["features"]["affinity"],
                    "dataloader_idx": dataloader_idx,
                }
            )
        if validation_outputs:
            self.validation_step_outputs.append(validation_outputs)
    def on_validation_epoch_end(self):
        "Lightning hook that is called when a validation epoch ends."
        if self.hparams.cfg.task.visualize_generated_samples:
            for i, outputs in enumerate(self.validation_step_outputs):
                for batch_index in range(outputs["batch_size"]):
                    prot_lig_pairs = construct_prot_lig_pairs(outputs, batch_index)
                    write_prot_lig_pairs_to_pdb_file(
                        prot_lig_pairs,
                        os.path.join(
                            self.trainer.default_root_dir,
                            "validation_epoch_outputs",
                            f"{outputs['name'][batch_index]}_validation_epoch_{self.current_epoch}_global_step_{self.global_step}_output_{i}_batch_{batch_index}_dataloader_{outputs['dataloader_idx']}.pdb",
                        ),
                    )
        if self.hparams.cfg.affinity.enabled and any(
            "affinity_logits" in output for output in self.validation_step_outputs
        ):
            affinity_logits = torch.cat(
                [
                    output["affinity_logits"]
                    for output in self.validation_step_outputs
                    if "affinity_logits" in output
                ]
            )
            affinity = torch.cat(
                [
                    output["affinity"]
                    for output in self.validation_step_outputs
                    if "affinity_logits" in output
                ]
            )
            affinity_logits = affinity_logits[~affinity.isnan()]
            affinity = affinity[~affinity.isnan()]
            if affinity.numel() > 1:
                # NOTE: there must be at least two affinity batches to properly score the affinity predictions
                aff_rmse = torch.sqrt(mean_squared_error(affinity_logits, affinity))
                aff_mae = mean_absolute_error(affinity_logits, affinity)
                aff_pearson = pearson_corrcoef(affinity_logits, affinity)
                aff_spearman = spearman_corrcoef(affinity_logits, affinity)
                self.log(
                    "val_affinity/RMSE",
                    aff_rmse.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
                self.log(
                    "val_affinity/MAE",
                    aff_mae.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
                self.log(
                    "val_affinity/Pearson",
                    aff_pearson.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
                self.log(
                    "val_affinity/Spearman",
                    aff_spearman.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
        self.validation_step_outputs.clear()  # free memory
    def on_test_start(self):
        """Lightning hook that is called when testing begins."""
        # create a directory to store model outputs from each test epoch
        os.makedirs(
            os.path.join(self.trainer.default_root_dir, "test_epoch_outputs"), exist_ok=True
        )
    def test_step(self, batch: MODEL_BATCH, batch_idx: int, dataloader_idx: int = 0):
        """Perform a single test step on a batch of data from the test set.
        :param batch: A batch dictionary.
        :param batch_idx: The index of the current batch.
        :param dataloader_idx: The index of the current dataloader.
        """
        if self.hparams.cfg.task.overfitting_example_name is not None and not all(
            name == self.hparams.cfg.task.overfitting_example_name
            for name in batch["metadata"]["sample_ID_per_sample"]
        ):
            return None
        try:
            prepare_batch(batch)
            if self.hparams.cfg.task.eval_structure_prediction:
                sampling_stats = self.net.sample_pl_complex_structures(
                    batch,
                    sampler=self.hparams.cfg.task.sampler,
                    sampler_eta=self.hparams.cfg.task.sampler_eta,
                    num_steps=self.hparams.cfg.task.num_steps,
                    start_time=self.hparams.cfg.task.start_time,
                    exact_prior=False,
                    return_all_states=True,
                    eval_input_protein=True,
                )
                all_frames = sampling_stats["all_frames"]
                del sampling_stats["all_frames"]
                for metric_name in sampling_stats.keys():
                    log_stat = sampling_stats[metric_name].mean().detach()
                    batch_size = sampling_stats[metric_name].shape[0]
                    self.log(
                        f"test_sampling/{metric_name}",
                        log_stat,
                        on_step=True,
                        on_epoch=True,
                        batch_size=batch_size,
                    )
                sampling_stats = self.net.sample_pl_complex_structures(
                    batch,
                    sampler=self.hparams.cfg.task.sampler,
                    sampler_eta=self.hparams.cfg.task.sampler_eta,
                    num_steps=self.hparams.cfg.task.num_steps,
                    start_time=self.hparams.cfg.task.start_time,
                    exact_prior=False,
                    use_template=False,
                )
                for metric_name in sampling_stats.keys():
                    log_stat = sampling_stats[metric_name].mean().detach()
                    batch_size = sampling_stats[metric_name].shape[0]
                    self.log(
                        f"test_sampling_notemplate/{metric_name}",
                        log_stat,
                        on_step=True,
                        on_epoch=True,
                        batch_size=batch_size,
                    )
                sampling_stats = self.net.sample_pl_complex_structures(
                    batch,
                    sampler=self.hparams.cfg.task.sampler,
                    sampler_eta=self.hparams.cfg.task.sampler_eta,
                    num_steps=self.hparams.cfg.task.num_steps,
                    start_time=self.hparams.cfg.task.start_time,
                    return_summary_stats=True,
                    exact_prior=True,
                )
                for metric_name in sampling_stats.keys():
                    log_stat = sampling_stats[metric_name].mean().detach()
                    batch_size = sampling_stats[metric_name].shape[0]
                    self.log(
                        f"test_sampling_trueprior/{metric_name}",
                        log_stat,
                        on_step=True,
                        on_epoch=True,
                        batch_size=batch_size,
                    )
            batch = self.model_step(
                batch, batch_idx, "test", loss_mode=self.hparams.cfg.task.loss_mode
            )
        except Exception as e:
            log.error(
                f"Failed to perform test step for {batch['metadata']['sample_ID_per_sample']} with batch index {batch_idx} of dataloader {dataloader_idx} due to: {e}."
            )
            raise e
        # store model outputs for inspection
        test_outputs = {}
        if (
            self.hparams.cfg.task.visualize_generated_samples
            and self.hparams.cfg.task.eval_structure_prediction
        ):
            test_outputs.update(
                {
                    "name": batch["metadata"]["sample_ID_per_sample"],
                    "batch_size": batch["metadata"]["num_structid"],
                    "aatype": batch["features"]["res_type"].long().cpu().numpy(),
                    "res_atom_mask": batch["features"]["res_atom_mask"].cpu().numpy(),
                    "protein_coordinates_list": [
                        frame["receptor_padded"].cpu().numpy() for frame in all_frames
                    ],
                    "ligand_coordinates_list": [
                        frame["ligands"].cpu().numpy() for frame in all_frames
                    ],
                    "ligand_mol": batch["metadata"]["mol_per_sample"],
                    "protein_batch_indexer": batch["indexer"]["gather_idx_a_structid"]
                    .cpu()
                    .numpy(),
                    "ligand_batch_indexer": batch["indexer"]["gather_idx_i_structid"]
                    .cpu()
                    .numpy(),
                    "gt_protein_coordinates": batch["features"]["res_atom_positions"]
                    .cpu()
                    .numpy(),
                    "gt_ligand_coordinates": batch["features"]["sdf_coordinates"].cpu().numpy(),
                    "dataloader_idx": dataloader_idx,
                }
            )
        if self.hparams.cfg.affinity.enabled and "affinity_logits" in batch["outputs"]:
            test_outputs.update(
                {
                    "affinity_logits": batch["outputs"]["affinity_logits"],
                    "affinity": batch["features"]["affinity"],
                    "dataloader_idx": dataloader_idx,
                }
            )
        if test_outputs:
            self.test_step_outputs.append(test_outputs)
    def on_test_epoch_end(self):
        """Lightning hook that is called when a test epoch ends."""
        if (
            self.hparams.cfg.task.visualize_generated_samples
            and self.hparams.cfg.task.eval_structure_prediction
        ):
            for i, outputs in enumerate(self.test_step_outputs):
                for batch_index in range(outputs["batch_size"]):
                    prot_lig_pairs = construct_prot_lig_pairs(outputs, batch_index)
                    write_prot_lig_pairs_to_pdb_file(
                        prot_lig_pairs,
                        os.path.join(
                            self.trainer.default_root_dir,
                            "test_epoch_outputs",
                            f"{outputs['name'][batch_index]}_test_epoch_{self.current_epoch}_global_step_{self.global_step}_output_{i}_batch_{batch_index}_dataloader_{outputs['dataloader_idx']}.pdb",
                        ),
                    )
        if self.hparams.cfg.affinity.enabled and any(
            "affinity_logits" in output for output in self.test_step_outputs
        ):
            affinity_logits = torch.cat(
                [
                    output["affinity_logits"]
                    for output in self.test_step_outputs
                    if "affinity_logits" in output
                ]
            )
            affinity = torch.cat(
                [
                    output["affinity"]
                    for output in self.test_step_outputs
                    if "affinity_logits" in output
                ]
            )
            affinity_logits = affinity_logits[~affinity.isnan()]
            affinity = affinity[~affinity.isnan()]
            if affinity.numel() > 1:
                # NOTE: there must be at least two affinity batches to properly score the affinity predictions
                aff_rmse = torch.sqrt(mean_squared_error(affinity_logits, affinity))
                aff_mae = mean_absolute_error(affinity_logits, affinity)
                aff_pearson = pearson_corrcoef(affinity_logits, affinity)
                aff_spearman = spearman_corrcoef(affinity_logits, affinity)
                self.log(
                    "test_affinity/RMSE",
                    aff_rmse.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
                self.log(
                    "test_affinity/MAE",
                    aff_mae.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
                self.log(
                    "test_affinity/Pearson",
                    aff_pearson.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
                self.log(
                    "test_affinity/Spearman",
                    aff_spearman.detach(),
                    on_epoch=True,
                    batch_size=len(affinity),
                    sync_dist=True,
                )
        self.test_step_outputs.clear()  # free memory
    def on_predict_start(self):
        """Lightning hook that is called when testing begins."""
        # create a directory to store model outputs from each predict epoch
        os.makedirs(
            os.path.join(self.trainer.default_root_dir, "predict_epoch_outputs"), exist_ok=True
        )
        log.info("Loading pretrained ESM model...")
        esm_model, self.esm_alphabet = esm.pretrained.load_model_and_alphabet_hub(
            self.hparams.cfg.model.cfg.protein_encoder.esm_version
        )
        self.esm_model = esm_model.eval().float()
        self.esm_batch_converter = self.esm_alphabet.get_batch_converter()
        self.esm_model.cpu()
        skip_loading_esmfold_weights = (
            # skip loading ESMFold weights if the template protein structure for a single complex input is provided
            self.hparams.cfg.task.csv_path is None
            and self.hparams.cfg.task.input_template is not None
            and os.path.exists(self.hparams.cfg.task.input_template)
        )
        if not skip_loading_esmfold_weights:
            log.info("Loading pretrained ESMFold model...")
            esmfold_model = esm.pretrained.esmfold_v1()
            self.esmfold_model = esmfold_model.eval().float()
            self.esmfold_model.set_chunk_size(self.hparams.cfg.esmfold_chunk_size)
            self.esmfold_model.cpu()
    def predict_step(self, batch: MODEL_BATCH, batch_idx: int, dataloader_idx: int = 0):
        """Perform a single predict step on a batch of data from the predict set.
        :param batch: A batch dictionary.
        :param batch_idx: The index of the current batch.
        :param dataloader_idx: The index of the current dataloader.
        """
        rec_path = batch["rec_path"][0]
        ligand_paths = list(
            path[0] for path in batch["lig_paths"]
        )  # unpack a list of (batched) single-element string tuples
        sample_id = batch["sample_id"][0] if "sample_id" in batch else "sample"
        input_template = batch["input_template"][0] if "input_template" in batch else None
        out_path = (
            os.path.join(self.hparams.cfg.out_path, sample_id)
            if "sample_id" in batch
            else self.hparams.cfg.out_path
        )
        # generate ESM embeddings for the protein
        protein = pdb_filepath_to_protein(rec_path)
        sequences = [
            "".join(np.array(list(chain_seq))[chain_mask])
            for (_, chain_seq, chain_mask) in protein.letter_sequences
        ]
        esm_embeddings = extract_esm_embeddings(
            self.esm_model,
            self.esm_alphabet,
            self.esm_batch_converter,
            sequences,
            device="cpu",
            esm_repr_layer=self.hparams.cfg.model.cfg.protein_encoder.esm_repr_layer,
        )
        sequences_to_embeddings = {
            f"{seq}:{i}": esm_embeddings[i].cpu().numpy() for i, seq in enumerate(sequences)
        }
        # generate initial ESMFold-predicted structure for the protein if a template is not provided
        apo_rec_path = None
        if input_template and os.path.exists(input_template):
            apo_protein = pdb_filepath_to_protein(input_template)
            apo_chain_seq_masked = "".join(
                "".join(np.array(list(chain_seq))[chain_mask])
                for (_, chain_seq, chain_mask) in apo_protein.letter_sequences
            )
            chain_seq_masked = "".join(
                "".join(np.array(list(chain_seq))[chain_mask])
                for (_, chain_seq, chain_mask) in protein.letter_sequences
            )
            if apo_chain_seq_masked != chain_seq_masked:
                log.error(
                    f"Provided template protein structure {input_template} does not match the input protein sequence within {rec_path}. Skipping example {sample_id} at batch index {batch_idx} of dataloader {dataloader_idx}."
                )
                return None
            log.info(f"Starting from provided template protein structure: {input_template}")
            apo_rec_path = input_template
        if apo_rec_path is None and self.hparams.cfg.prior_type == "esmfold":
            esmfold_sequence = ":".join(sequences)
            apo_rec_path = rec_path.replace(".pdb", "_apo.pdb")
            with torch.no_grad():
                esmfold_pdb_output = self.esmfold_model.infer_pdb(esmfold_sequence)
            with open(apo_rec_path, "w") as f:
                f.write(esmfold_pdb_output)
        _, _, _, _, _, all_frames, batch_all, b_factors, plddt_rankings = multi_pose_sampling(
            rec_path,
            ligand_paths,
            self.hparams.cfg,
            self,
            out_path,
            separate_pdb=self.hparams.cfg.separate_pdb,
            apo_receptor_path=apo_rec_path,
            sample_id=sample_id,
            protein=protein,
            sequences_to_embeddings=sequences_to_embeddings,
            return_all_states=self.hparams.cfg.task.visualize_generated_samples,
            auxiliary_estimation_only=self.hparams.cfg.task.auxiliary_estimation_only,
        )
        # store model outputs for inspection
        if self.hparams.cfg.task.visualize_generated_samples:
            predict_outputs = {
                "name": batch_all["metadata"]["sample_ID_per_sample"],
                "batch_size": batch_all["metadata"]["num_structid"],
                "aatype": batch_all["features"]["res_type"].long().cpu().numpy(),
                "res_atom_mask": batch_all["features"]["res_atom_mask"].cpu().numpy(),
                "protein_coordinates_list": [
                    frame["receptor_padded"].cpu().numpy() for frame in all_frames
                ],
                "ligand_coordinates_list": [
                    frame["ligands"].cpu().numpy() for frame in all_frames
                ],
                "ligand_mol": batch_all["metadata"]["mol_per_sample"],
                "protein_batch_indexer": batch_all["indexer"]["gather_idx_a_structid"]
                .cpu()
                .numpy(),
                "ligand_batch_indexer": batch_all["indexer"]["gather_idx_i_structid"]
                .cpu()
                .numpy(),
                "b_factors": b_factors,
                "plddt_rankings": plddt_rankings,
            }
            self.predict_step_outputs.append(predict_outputs)
    def on_predict_epoch_end(self):
        """Lightning hook that is called when a predict epoch ends."""
        if self.hparams.cfg.task.visualize_generated_samples:
            for i, outputs in enumerate(self.predict_step_outputs):
                for batch_index in range(outputs["batch_size"]):
                    prot_lig_pairs = construct_prot_lig_pairs(outputs, batch_index)
                    ranking = (
                        outputs["plddt_rankings"][batch_index]
                        if "plddt_rankings" in outputs
                        else None
                    )
                    write_prot_lig_pairs_to_pdb_file(
                        prot_lig_pairs,
                        os.path.join(
                            self.hparams.cfg.out_path,
                            outputs["name"][batch_index],
                            "predict_epoch_outputs",
                            f"{outputs['name'][batch_index]}{f'_rank{ranking + 1}' if ranking is not None else ''}_predict_epoch_{self.current_epoch}_global_step_{self.global_step}_output_{i}_batch_{batch_index}.pdb",
                        ),
                    )
        self.predict_step_outputs.clear()  # free memory
    def on_after_backward(self):
        """Skip updates in case of unstable gradients.
        Reference: https://github.com/Lightning-AI/lightning/issues/4956
        """
        valid_gradients = True
        for _, param in self.named_parameters():
            if param.grad is not None:
                valid_gradients = not (
                    torch.isnan(param.grad).any() or torch.isinf(param.grad).any()
                )
                if not valid_gradients:
                    break
        if not valid_gradients:
            log.warning(
                "Detected `inf` or `nan` values in gradients. Not updating model parameters."
            )
            self.zero_grad()
    def optimizer_step(
        self,
        epoch,
        batch_idx,
        optimizer,
        optimizer_closure,
    ):
        """Override the optimizer step to dynamically update the learning rate.
        :param epoch: The current epoch.
        :param batch_idx: The index of the current batch.
        :param optimizer: The optimizer to use for training.
        :param optimizer_closure: The optimizer closure.
        """
        # update params
        optimizer = optimizer.optimizer
        optimizer.step(closure=optimizer_closure)
        # warm up learning rate
        if self.trainer.global_step < 1000:
            lr_scale = min(1.0, float(self.trainer.global_step + 1) / 1000.0)
            for pg in optimizer.param_groups:
                # NOTE: `self.hparams.optimizer.keywords["lr"]` refers to the optimizer's initial learning rate
                pg["lr"] = lr_scale * self.hparams.optimizer.keywords["lr"]
    def setup(self, stage: str):
        """Lightning hook that is called at the beginning of fit (train + validate), validate,
        test, or predict.
        This is a good hook when you need to build models dynamically or adjust something about
        them. This hook is called on every process when using DDP.
        :param stage: Either `"fit"`, `"validate"`, `"test"`, or `"predict"`.
        """
        if self.hparams.compile and stage == "fit":
            self.net = torch.compile(self.net)
    def configure_optimizers(self) -> Dict[str, Any]:
        """Choose what optimizers and learning-rate schedulers to use in your optimization.
        Normally you'd need one. But in the case of GANs or similar you might have multiple.
        Examples:
            https://lightning.ai/docs/pytorch/latest/common/lightning_module.html#configure-optimizers
        :return: A dict containing the configured optimizers and learning-rate schedulers to be used for training.
        """
        try:
            optimizer = self.hparams.optimizer(params=self.trainer.model.parameters())
        except TypeError:
            # NOTE: strategies such as DeepSpeed require `params` to instead be specified as `model_params`
            optimizer = self.hparams.optimizer(model_params=self.trainer.model.parameters())
        if self.hparams.scheduler is not None:
            scheduler = self.hparams.scheduler(optimizer=optimizer)
            return {
                "optimizer": optimizer,
                "lr_scheduler": {
                    "scheduler": scheduler,
                    "monitor": "val/loss",
                    "interval": "epoch",
                    "frequency": 1,
                },
            }
        return {"optimizer": optimizer}
 if __name__ == "__main__":
    _ = FlowDockFMLitModule(None, None, None, None)
--- a/flowdock/sample.py
+++ b/flowdock/sample.py
@@ -0,0 +1,284 @@
 import os
 import hydra
 import lightning as L
 import lovely_tensors as lt
 import pandas as pd
 import rootutils
 import torch
 from beartype.typing import Any, Dict, List, Tuple
 from lightning import LightningModule, Trainer
 from lightning.fabric.plugins.environments.cluster_environment import ClusterEnvironment
 from lightning.pytorch.loggers import Logger
 from lightning.pytorch.strategies.strategy import Strategy
 from omegaconf import DictConfig, open_dict
 from torch.utils.data import DataLoader
 lt.monkey_patch()
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 # ------------------------------------------------------------------------------------ #
 # the setup_root above is equivalent to:
 # - adding project root dir to PYTHONPATH
 #       (so you don't need to force user to install project as a package)
 #       (necessary before importing any local modules e.g. `from flowdock import utils`)
 # - setting up PROJECT_ROOT environment variable
 #       (which is used as a base for paths in "configs/paths/default.yaml")
 #       (this way all filepaths are the same no matter where you run the code)
 # - loading environment variables from ".env" in root dir
 #
 # you can remove it if you:
 # 1. either install project as a package or move entry files to project root dir
 # 2. set `root_dir` to "." in "configs/paths/default.yaml"
 #
 # more info: https://github.com/ashleve/rootutils
 # ------------------------------------------------------------------------------------ #
 from flowdock import register_custom_omegaconf_resolvers, resolve_omegaconf_variable
 from flowdock.utils import (
    RankedLogger,
    extras,
    instantiate_loggers,
    log_hyperparameters,
    task_wrapper,
 )
 from flowdock.utils.data_utils import (
    create_full_pdb_with_zero_coordinates,
    create_temp_ligand_frag_files,
 )
 log = RankedLogger(__name__, rank_zero_only=True)
 AVAILABLE_SAMPLING_TASKS = ["batched_structure_sampling"]
 class SamplingDataset(torch.utils.data.Dataset):
    """Dataset for sampling."""
    def __init__(self, cfg: DictConfig):
        """Initializes the SamplingDataset."""
        if cfg.sampling_task == "batched_structure_sampling":
            if cfg.csv_path is not None:
                # handle variable CSV inputs
                df_rows = []
                self.df = pd.read_csv(cfg.csv_path)
                for _, row in self.df.iterrows():
                    sample_id = row.id
                    input_receptor = row.input_receptor
                    input_ligand = row.input_ligand
                    input_template = row.input_template
                    assert input_receptor is not None, "Receptor path is required for sampling."
                    if input_ligand is not None:
                        if input_ligand.endswith(".sdf"):
                            ligand_paths = create_temp_ligand_frag_files(input_ligand)
                        else:
                            ligand_paths = list(input_ligand.split("|"))
                    else:
                        ligand_paths = None  # handle `null` ligand input
                    if not input_receptor.endswith(".pdb"):
                        log.warning(
                            "Assuming the provided receptor input is a protein sequence. Creating a dummy PDB file."
                        )
                        create_full_pdb_with_zero_coordinates(
                            input_receptor, os.path.join(cfg.out_path, f"input_{sample_id}.pdb")
                        )
                        input_receptor = os.path.join(cfg.out_path, f"input_{sample_id}.pdb")
                    df_row = {
                        "sample_id": sample_id,
                        "rec_path": input_receptor,
                        "lig_paths": ligand_paths,
                    }
                    if input_template is not None:
                        df_row["input_template"] = input_template
                    df_rows.append(df_row)
                self.df = pd.DataFrame(df_rows)
            else:
                sample_id = cfg.sample_id
                input_receptor = cfg.input_receptor
                input_ligand = cfg.input_ligand
                if input_ligand is not None:
                    if input_ligand.endswith(".sdf"):
                        ligand_paths = create_temp_ligand_frag_files(input_ligand)
                    else:
                        ligand_paths = list(input_ligand.split("|"))
                else:
                    ligand_paths = None  # handle `null` ligand input
                if not input_receptor.endswith(".pdb"):
                    log.warning(
                        "Assuming the provided receptor input is a protein sequence. Creating a dummy PDB file."
                    )
                    create_full_pdb_with_zero_coordinates(
                        input_receptor, os.path.join(cfg.out_path, "input.pdb")
                    )
                    input_receptor = os.path.join(cfg.out_path, "input.pdb")
                self.df = pd.DataFrame(
                    [
                        {
                            "sample_id": sample_id,
                            "rec_path": input_receptor,
                            "lig_paths": ligand_paths,
                        }
                    ]
                )
                if cfg.input_template is not None:
                    self.df["input_template"] = cfg.input_template
        else:
            raise NotImplementedError(f"Sampling task {cfg.sampling_task} is not implemented.")
    def __len__(self):
        """Returns the length of the dataset."""
        return len(self.df)
    def __getitem__(self, idx: int) -> Tuple[str, str]:
        """Returns the input receptor and input ligand."""
        return self.df.iloc[idx].to_dict()
@task_wrapper
 def sample(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
    """Samples using given checkpoint on a datamodule predictset.
    This method is wrapped in optional @task_wrapper decorator, that controls the behavior during
    failure. Useful for multiruns, saving info about the crash, etc.
    :param cfg: DictConfig configuration composed by Hydra.
    :return: Tuple[dict, dict] with metrics and dict with all instantiated objects.
    """
    assert cfg.ckpt_path, "Please provide a checkpoint path with which to sample!"
    assert os.path.exists(cfg.ckpt_path), f"Checkpoint path {cfg.ckpt_path} does not exist!"
    assert (
        cfg.sampling_task in AVAILABLE_SAMPLING_TASKS
    ), f"Sampling task {cfg.sampling_task} is not one of the following available tasks: {AVAILABLE_SAMPLING_TASKS}."
    assert (cfg.input_receptor is not None and cfg.input_ligand is not None) or (
        cfg.csv_path is not None and os.path.exists(cfg.csv_path)
    ), "Please provide either an input receptor and ligand or a CSV file with receptor and ligand sequences/filepaths."
    # set seed for random number generators in pytorch, numpy and python.random
    if cfg.get("seed"):
        L.seed_everything(cfg.seed, workers=True)
    log.info(
        f"Setting `float32_matmul_precision` to {cfg.model.cfg.task.float32_matmul_precision}."
    )
    torch.set_float32_matmul_precision(precision=cfg.model.cfg.task.float32_matmul_precision)
    # Establish model input arguments
    with open_dict(cfg):
        # NOTE: Structure trajectories will not be visualized when performing auxiliary estimation only
        cfg.model.cfg.prior_type = cfg.prior_type
        cfg.model.cfg.task.detect_covalent = cfg.detect_covalent
        cfg.model.cfg.task.use_template = cfg.use_template
        cfg.model.cfg.task.csv_path = cfg.csv_path
        cfg.model.cfg.task.input_receptor = cfg.input_receptor
        cfg.model.cfg.task.input_ligand = cfg.input_ligand
        cfg.model.cfg.task.input_template = cfg.input_template
        cfg.model.cfg.task.visualize_generated_samples = (
            cfg.visualize_sample_trajectories and not cfg.auxiliary_estimation_only
        )
        cfg.model.cfg.task.auxiliary_estimation_only = cfg.auxiliary_estimation_only
    if cfg.latent_model is not None:
        with open_dict(cfg):
            cfg.model.cfg.latent_model = cfg.latent_model
    with open_dict(cfg):
        if cfg.start_time == "auto":
            cfg.start_time = 1.0
        else:
            cfg.start_time = float(cfg.start_time)
    log.info("Converting sampling inputs into a <SamplingDataset>")
    dataloaders: List[DataLoader] = [
        DataLoader(
            SamplingDataset(cfg),
            batch_size=1,
            shuffle=False,
            num_workers=0,
            pin_memory=False,
        )
    ]
    log.info(f"Instantiating model <{cfg.model._target_}>")
    model: LightningModule = hydra.utils.instantiate(cfg.model)
    model.hparams.cfg.update(cfg)  # update model config with the sampling config
    log.info("Instantiating loggers...")
    logger: List[Logger] = instantiate_loggers(cfg.get("logger"))
    plugins = None
    if "_target_" in cfg.environment:
        log.info(f"Instantiating environment <{cfg.environment._target_}>")
        plugins: ClusterEnvironment = hydra.utils.instantiate(cfg.environment)
    strategy = getattr(cfg.trainer, "strategy", None)
    if "_target_" in cfg.strategy:
        log.info(f"Instantiating strategy <{cfg.strategy._target_}>")
        strategy: Strategy = hydra.utils.instantiate(cfg.strategy)
        if (
            "mixed_precision" in strategy.__dict__
            and getattr(strategy, "mixed_precision", None) is not None
        ):
            strategy.mixed_precision.param_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.param_dtype)
                if getattr(cfg.strategy.mixed_precision, "param_dtype", None) is not None
                else None
            )
            strategy.mixed_precision.reduce_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.reduce_dtype)
                if getattr(cfg.strategy.mixed_precision, "reduce_dtype", None) is not None
                else None
            )
            strategy.mixed_precision.buffer_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.buffer_dtype)
                if getattr(cfg.strategy.mixed_precision, "buffer_dtype", None) is not None
                else None
            )
    log.info(f"Instantiating trainer <{cfg.trainer._target_}>")
    trainer: Trainer = (
        hydra.utils.instantiate(
            cfg.trainer,
            logger=logger,
            plugins=plugins,
            strategy=strategy,
        )
        if strategy is not None
        else hydra.utils.instantiate(
            cfg.trainer,
            logger=logger,
            plugins=plugins,
        )
    )
    object_dict = {
        "cfg": cfg,
        "model": model,
        "logger": logger,
        "trainer": trainer,
    }
    if logger:
        log.info("Logging hyperparameters!")
        log_hyperparameters(object_dict)
    trainer.predict(model=model, dataloaders=dataloaders, ckpt_path=cfg.ckpt_path)
    metric_dict = trainer.callback_metrics
    return metric_dict, object_dict
@hydra.main(version_base="1.3", config_path="../configs", config_name="sample.yaml")
 def main(cfg: DictConfig) -> None:
    """Main entry point for sampling.
    :param cfg: DictConfig configuration composed by Hydra.
    """
    # apply extra utilities
    # (e.g. ask for tags if none are provided in cfg, print cfg tree, etc.)
    extras(cfg)
    sample(cfg)
 if __name__ == "__main__":
    register_custom_omegaconf_resolvers()
    main()
--- a/flowdock/train.py
+++ b/flowdock/train.py
@@ -0,0 +1,196 @@
 import os
 import hydra
 import lightning as L
 import lovely_tensors as lt
 import rootutils
 import torch
 from beartype.typing import Any, Dict, List, Optional, Tuple
 from lightning import Callback, LightningDataModule, LightningModule, Trainer
 from lightning.fabric.plugins.environments.cluster_environment import ClusterEnvironment
 from lightning.pytorch.loggers import Logger
 from lightning.pytorch.strategies.strategy import Strategy
 from omegaconf import DictConfig
 lt.monkey_patch()
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 # ------------------------------------------------------------------------------------ #
 # the setup_root above is equivalent to:
 # - adding project root dir to PYTHONPATH
 #       (so you don't need to force user to install project as a package)
 #       (necessary before importing any local modules e.g. `from flowdock import utils`)
 # - setting up PROJECT_ROOT environment variable
 #       (which is used as a base for paths in "configs/paths/default.yaml")
 #       (this way all filepaths are the same no matter where you run the code)
 # - loading environment variables from ".env" in root dir
 #
 # you can remove it if you:
 # 1. either install project as a package or move entry files to project root dir
 # 2. set `root_dir` to "." in "configs/paths/default.yaml"
 #
 # more info: https://github.com/ashleve/rootutils
 # ------------------------------------------------------------------------------------ #
 from flowdock import register_custom_omegaconf_resolvers, resolve_omegaconf_variable
 from flowdock.utils import (
    RankedLogger,
    extras,
    get_metric_value,
    instantiate_callbacks,
    instantiate_loggers,
    log_hyperparameters,
    task_wrapper,
 )
 log = RankedLogger(__name__, rank_zero_only=True)
@task_wrapper
 def train(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
    """Trains the model. Can additionally evaluate on a testset, using best weights obtained during
    training.
    This method is wrapped in optional @task_wrapper decorator, that controls the behavior during
    failure. Useful for multiruns, saving info about the crash, etc.
    :param cfg: A DictConfig configuration composed by Hydra.
    :return: A tuple with metrics and dict with all instantiated objects.
    """
    # set seed for random number generators in pytorch, numpy and python.random
    if cfg.get("seed"):
        L.seed_everything(cfg.seed, workers=True)
    log.info(
        f"Setting `float32_matmul_precision` to {cfg.model.cfg.task.float32_matmul_precision}."
    )
    torch.set_float32_matmul_precision(precision=cfg.model.cfg.task.float32_matmul_precision)
    log.info(f"Instantiating datamodule <{cfg.data._target_}>")
    datamodule: LightningDataModule = hydra.utils.instantiate(cfg.data, stage="fit")
    log.info(f"Instantiating model <{cfg.model._target_}>")
    model: LightningModule = hydra.utils.instantiate(cfg.model)
    log.info("Instantiating callbacks...")
    callbacks: List[Callback] = instantiate_callbacks(cfg.get("callbacks"))
    log.info("Instantiating loggers...")
    logger: List[Logger] = instantiate_loggers(cfg.get("logger"))
    plugins = None
    if "_target_" in cfg.environment:
        log.info(f"Instantiating environment <{cfg.environment._target_}>")
        plugins: ClusterEnvironment = hydra.utils.instantiate(cfg.environment)
    strategy = getattr(cfg.trainer, "strategy", None)
    if "_target_" in cfg.strategy:
        log.info(f"Instantiating strategy <{cfg.strategy._target_}>")
        strategy: Strategy = hydra.utils.instantiate(cfg.strategy)
        if (
            "mixed_precision" in strategy.__dict__
            and getattr(strategy, "mixed_precision", None) is not None
        ):
            strategy.mixed_precision.param_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.param_dtype)
                if getattr(cfg.strategy.mixed_precision, "param_dtype", None) is not None
                else None
            )
            strategy.mixed_precision.reduce_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.reduce_dtype)
                if getattr(cfg.strategy.mixed_precision, "reduce_dtype", None) is not None
                else None
            )
            strategy.mixed_precision.buffer_dtype = (
                resolve_omegaconf_variable(cfg.strategy.mixed_precision.buffer_dtype)
                if getattr(cfg.strategy.mixed_precision, "buffer_dtype", None) is not None
                else None
            )
    log.info(f"Instantiating trainer <{cfg.trainer._target_}>")
    trainer: Trainer = (
        hydra.utils.instantiate(
            cfg.trainer,
            callbacks=callbacks,
            logger=logger,
            plugins=plugins,
            strategy=strategy,
        )
        if strategy is not None
        else hydra.utils.instantiate(
            cfg.trainer,
            callbacks=callbacks,
            logger=logger,
            plugins=plugins,
        )
    )
    object_dict = {
        "cfg": cfg,
        "datamodule": datamodule,
        "model": model,
        "callbacks": callbacks,
        "logger": logger,
        "trainer": trainer,
    }
    if logger:
        log.info("Logging hyperparameters!")
        log_hyperparameters(object_dict)
    if cfg.get("train"):
        log.info("Starting training!")
        ckpt_path = None
        if cfg.get("ckpt_path") and os.path.exists(cfg.get("ckpt_path")):
            ckpt_path = cfg.get("ckpt_path")
        elif cfg.get("ckpt_path"):
            log.warning(
                "`ckpt_path` was given, but the path does not exist. Training with new model weights."
            )
        trainer.fit(model=model, datamodule=datamodule, ckpt_path=ckpt_path)
    train_metrics = trainer.callback_metrics
    if cfg.get("test"):
        log.info("Starting testing!")
        ckpt_path = trainer.checkpoint_callback.best_model_path
        if ckpt_path == "":
            log.warning("Best ckpt not found! Using current weights for testing...")
            ckpt_path = None
        trainer.test(model=model, datamodule=datamodule, ckpt_path=ckpt_path)
        log.info(f"Best ckpt path: {ckpt_path}")
    test_metrics = trainer.callback_metrics
    # merge train and test metrics
    metric_dict = {**train_metrics, **test_metrics}
    return metric_dict, object_dict
@hydra.main(version_base="1.3", config_path="../configs", config_name="train.yaml")
 def main(cfg: DictConfig) -> Optional[float]:
    """Main entry point for training.
    :param cfg: DictConfig configuration composed by Hydra.
    :return: Optional[float] with optimized metric value.
    """
    # apply extra utilities
    # (e.g. ask for tags if none are provided in cfg, print cfg tree, etc.)
    extras(cfg)
    # train the model
    metric_dict, _ = train(cfg)
    # safely retrieve metric value for hydra-based hyperparameter optimization
    metric_value = get_metric_value(
        metric_dict=metric_dict, metric_name=cfg.get("optimized_metric")
    )
    # return optimized metric
    return metric_value
 if __name__ == "__main__":
    register_custom_omegaconf_resolvers()
    main()
--- a/flowdock/utils/init.py
+++ b/flowdock/utils/init.py
@@ -0,0 +1,5 @@
 from flowdock.utils.instantiators import instantiate_callbacks, instantiate_loggers
 from flowdock.utils.logging_utils import log_hyperparameters
 from flowdock.utils.pylogger import RankedLogger
 from flowdock.utils.rich_utils import enforce_tags, print_config_tree
 from flowdock.utils.utils import extras, get_metric_value, task_wrapper
--- a/flowdock/utils/data_utils.py
+++ b/flowdock/utils/data_utils.py
--- a/flowdock/utils/frame_utils.py
+++ b/flowdock/utils/frame_utils.py
@@ -0,0 +1,77 @@
 import torch
 from beartype.typing import Optional
 class RigidTransform:
    """Rigid Transform class."""
    def __init__(self, t: torch.Tensor, R: Optional[torch.Tensor] = None):
        """Initialize Rigid Transform."""
        self.t = t
        if R is None:
            R = t.new_zeros(*t.shape, 3)
        self.R = R
    def __getitem__(self, key):
        """Get item from Rigid Transform."""
        return RigidTransform(self.t[key], self.R[key])
    def unsqueeze(self, dim):
        """Unsqueeze Rigid Transform."""
        return RigidTransform(self.t.unsqueeze(dim), self.R.unsqueeze(dim))
    def squeeze(self, dim):
        """Squeeze Rigid Transform."""
        return RigidTransform(self.t.squeeze(dim), self.R.squeeze(dim))
    def concatenate(self, other, dim=0):
        """Concatenate Rigid Transform."""
        return RigidTransform(
            torch.cat([self.t, other.t], dim=dim),
            torch.cat([self.R, other.R], dim=dim),
        )
 def get_frame_matrix(
    ri: torch.Tensor, rj: torch.Tensor, rk: torch.Tensor, eps: float = 1e-4, strict: bool = False
 ):
    """Get frame matrix from three points using the regularized Gram-Schmidt algorithm.
    Note that this implementation allows for shearing.
    """
    v1 = ri - rj
    v2 = rk - rj
    if strict:
        # v1 = v1 + torch.randn_like(rj).mul(eps)
        # v2 = v2 + torch.randn_like(rj).mul(eps)
        e1 = v1 / v1.norm(dim=-1, keepdim=True)
        # Project and pad
        u2 = v2 - e1.mul(e1.mul(v2).sum(-1, keepdim=True))
        e2 = u2 / u2.norm(dim=-1, keepdim=True)
    else:
        e1 = v1 / v1.square().sum(dim=-1, keepdim=True).add(eps).sqrt()
        # Project and pad
        u2 = v2 - e1.mul(e1.mul(v2).sum(-1, keepdim=True))
        e2 = u2 / u2.square().sum(dim=-1, keepdim=True).add(eps).sqrt()
    e3 = torch.cross(e1, e2, dim=-1)
    # Rows - lab frame, columns - internal frame
    rot_j = torch.stack([e1, e2, e3], dim=-1)
    return RigidTransform(rj, torch.nan_to_num(rot_j, 0.0))
 def cartesian_to_internal(rs: torch.Tensor, frames: RigidTransform):
    """Right-multiply the pose matrix."""
    rs_loc = rs - frames.t
    rs_loc = torch.matmul(rs_loc.unsqueeze(-2), frames.R)
    return rs_loc.squeeze(-2)
 def apply_similarity_transform(
    X: torch.Tensor, R: torch.Tensor, T: torch.Tensor, s: torch.Tensor
 ) -> torch.Tensor:
    """Apply a similarity transform to a set of points X.
    From: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/ops/points_alignment.html
    """
    X = s[:, None, None] * torch.bmm(X, R) + T[:, None, :]
    return X
--- a/flowdock/utils/generate_wandb_id.py
+++ b/flowdock/utils/generate_wandb_id.py
@@ -0,0 +1,4 @@
 import wandb
 if __name__ == "__main__":
    print(f"Generated WandB run ID: {wandb.util.generate_id()}")
--- a/flowdock/utils/inspect_ode_samplers.py
+++ b/flowdock/utils/inspect_ode_samplers.py
@@ -0,0 +1,55 @@
 import argparse
 import torch
 from beartype import beartype
 from beartype.typing import Literal
 def clamp_tensor(value: torch.Tensor, min: float = 1e-6, max: float = 1 - 1e-6) -> torch.Tensor:
    """Set the upper and lower bounds of a tensor via clamping.
    :param value: The tensor to clamp.
    :param min: The minimum value to clamp to. Default is `1e-6`.
    :param max: The maximum value to clamp to. Default is `1 - 1e-6`.
    :return: The clamped tensor.
    """
    return value.clamp(min=min, max=max)
@beartype
 def main(
    start_time: float, num_steps: int, sampler: Literal["ODE", "VDODE"] = "VDODE", eta: float = 1.0
 ):
    """Inspect different ODE samplers by printing the left hand side (LHS) and right hand side.
    (RHS) of their time ratio schedules. Note that the LHS and RHS are clamped to the range
    `[1e-6, 1 - 1e-6]` by default.
    :param start_time: The start time of the ODE sampler.
    :param num_steps: The number of steps to take.
    :param sampler: The ODE sampler to use.
    :param eta: The variance diminishing factor.
    """
    assert 0 < start_time <= 1.0, "The argument `start_time` must be in the range (0, 1]."
    schedule = torch.linspace(start_time, 0, num_steps + 1)
    for t, s in zip(schedule[:-1], schedule[1:]):
        if sampler == "ODE":
            # Baseline ODE
            print(
                f"t: {t:.3f}; s: {s:.3f}; LHS -> (1 - t) * x0_hat: {clamp_tensor((1 - t)):.3f}; RHS -> t * xt: {clamp_tensor(t):.3f}"
            )
        elif sampler == "VDODE":
            # Variance Diminishing ODE (VD-ODE)
            print(
                f"t: {t:.3f}; s: {s:.3f}; LHS -> (1 - ((s / t) * eta)) * x0_hat: {clamp_tensor(1 - ((s / t) * eta)):.3f}; RHS -> ((s / t) * eta) * xt: {clamp_tensor((s / t) * eta):.3f}"
            )
 if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument("--start_time", type=float, default=1.0)
    argparser.add_argument("--num_steps", type=int, default=40)
    argparser.add_argument("--sampler", type=str, choices=["ODE", "VDODE"], default="VDODE")
    argparser.add_argument("--eta", type=float, default=1.0)
    args = argparser.parse_args()
    main(args.start_time, args.num_steps, sampler=args.sampler, eta=args.eta)
--- a/flowdock/utils/instantiators.py
+++ b/flowdock/utils/instantiators.py
@@ -0,0 +1,58 @@
 import hydra
 import rootutils
 from beartype.typing import List
 from lightning import Callback
 from lightning.pytorch.loggers import Logger
 from omegaconf import DictConfig
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils import pylogger
 log = pylogger.RankedLogger(__name__, rank_zero_only=True)
 def instantiate_callbacks(callbacks_cfg: DictConfig) -> List[Callback]:
    """Instantiates callbacks from config.
    :param callbacks_cfg: A DictConfig object containing callback configurations.
    :return: A list of instantiated callbacks.
    """
    callbacks: List[Callback] = []
    if not callbacks_cfg:
        log.warning("No callback configs found! Skipping..")
        return callbacks
    if not isinstance(callbacks_cfg, DictConfig):
        raise TypeError("Callbacks config must be a DictConfig!")
    for _, cb_conf in callbacks_cfg.items():
        if isinstance(cb_conf, DictConfig) and "_target_" in cb_conf:
            log.info(f"Instantiating callback <{cb_conf._target_}>")
            callbacks.append(hydra.utils.instantiate(cb_conf))
    return callbacks
 def instantiate_loggers(logger_cfg: DictConfig) -> List[Logger]:
    """Instantiates loggers from config.
    :param logger_cfg: A DictConfig object containing logger configurations.
    :return: A list of instantiated loggers.
    """
    logger: List[Logger] = []
    if not logger_cfg:
        log.warning("No logger configs found! Skipping...")
        return logger
    if not isinstance(logger_cfg, DictConfig):
        raise TypeError("Logger config must be a DictConfig!")
    for _, lg_conf in logger_cfg.items():
        if isinstance(lg_conf, DictConfig) and "_target_" in lg_conf:
            log.info(f"Instantiating logger <{lg_conf._target_}>")
            logger.append(hydra.utils.instantiate(lg_conf))
    return logger
--- a/flowdock/utils/logging_utils.py
+++ b/flowdock/utils/logging_utils.py
@@ -0,0 +1,59 @@
 import rootutils
 from beartype.typing import Any, Dict
 from lightning_utilities.core.rank_zero import rank_zero_only
 from omegaconf import OmegaConf
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils import pylogger
 log = pylogger.RankedLogger(__name__, rank_zero_only=True)
@rank_zero_only
 def log_hyperparameters(object_dict: Dict[str, Any]) -> None:
    """Controls which config parts are saved by Lightning loggers.
    Additionally saves:
        - Number of model parameters
    :param object_dict: A dictionary containing the following objects:
        - `"cfg"`: A DictConfig object containing the main config.
        - `"model"`: The Lightning model.
        - `"trainer"`: The Lightning trainer.
    """
    hparams = {}
    cfg = OmegaConf.to_container(object_dict["cfg"])
    model = object_dict["model"]
    trainer = object_dict["trainer"]
    if not trainer.logger:
        log.warning("Logger not found! Skipping hyperparameter logging...")
        return
    hparams["model"] = cfg["model"]
    # save number of model parameters
    hparams["model/params/total"] = sum(p.numel() for p in model.parameters())
    hparams["model/params/trainable"] = sum(
        p.numel() for p in model.parameters() if p.requires_grad
    )
    hparams["model/params/non_trainable"] = sum(
        p.numel() for p in model.parameters() if not p.requires_grad
    )
    hparams["data"] = cfg["data"]
    hparams["trainer"] = cfg["trainer"]
    hparams["callbacks"] = cfg.get("callbacks")
    hparams["extras"] = cfg.get("extras")
    hparams["task_name"] = cfg.get("task_name")
    hparams["tags"] = cfg.get("tags")
    hparams["ckpt_path"] = cfg.get("ckpt_path")
    hparams["seed"] = cfg.get("seed")
    # send hparams to all loggers
    for logger in trainer.loggers:
        logger.log_hyperparams(hparams)
--- a/flowdock/utils/metric_utils.py
+++ b/flowdock/utils/metric_utils.py
@@ -0,0 +1,88 @@
 import subprocess  # nosec
 import torch
 from beartype import beartype
 from beartype.typing import Any, Dict, List, Optional, Tuple
 MODEL_BATCH = Dict[str, Any]
@beartype
 def calculate_usalign_metrics(
    pred_pdb_filepath: str,
    reference_pdb_filepath: str,
    usalign_exec_path: str,
    flags: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
    """Calculates US-align structural metrics between predicted and reference macromolecular
    structures.
    :param pred_pdb_filepath: Filepath to predicted macromolecular structure in PDB format.
    :param reference_pdb_filepath: Filepath to reference macromolecular structure in PDB format.
    :param usalign_exec_path: Path to US-align executable.
    :param flags: Command-line flags to pass to US-align, optional.
    :return: Dictionary containing macromolecular US-align structural metrics and metadata.
    """
    # run US-align with subprocess and capture output
    cmd = [usalign_exec_path, pred_pdb_filepath, reference_pdb_filepath]
    if flags is not None:
        cmd += flags
    output = subprocess.check_output(cmd, text=True, stderr=subprocess.PIPE)  # nosec
    # parse US-align output to extract structural metrics
    metrics = {}
    for line in output.splitlines():
        line = line.strip()
        if line.startswith("Name of Structure_1:"):
            metrics["Name of Structure_1"] = line.split(": ", 1)[1]
        elif line.startswith("Name of Structure_2:"):
            metrics["Name of Structure_2"] = line.split(": ", 1)[1]
        elif line.startswith("Length of Structure_1:"):
            metrics["Length of Structure_1"] = int(line.split(": ")[1].split()[0])
        elif line.startswith("Length of Structure_2:"):
            metrics["Length of Structure_2"] = int(line.split(": ")[1].split()[0])
        elif line.startswith("Aligned length="):
            aligned_length = line.split("=")[1].split(",")[0]
            rmsd = line.split("=")[2].split(",")[0]
            seq_id = line.split("=")[4]
            metrics["Aligned length"] = int(aligned_length.strip())
            metrics["RMSD"] = float(rmsd.strip())
            metrics["Seq_ID"] = float(seq_id.strip())
        elif line.startswith("TM-score="):
            if "normalized by length of Structure_1" in line:
                metrics["TM-score_1"] = float(line.split("=")[1].split()[0])
            elif "normalized by length of Structure_2" in line:
                metrics["TM-score_2"] = float(line.split("=")[1].split()[0])
    return metrics
 def compute_per_atom_lddt(
    batch: MODEL_BATCH, pred_coords: torch.Tensor, target_coords: torch.Tensor
 ) -> Tuple[torch.Tensor, torch.Tensor]:
    """Computes per-atom local distance difference test (LDDT) between predicted and target
    coordinates.
    :param batch: Dictionary containing metadata and target coordinates.
    :param pred_coords: Predicted atomic coordinates.
    :param target_coords: Target atomic coordinates.
    :return: Tuple of lDDT and lDDT list.
    """
    pred_coords = pred_coords.contiguous().view(batch["metadata"]["num_structid"], -1, 3)
    target_coords = target_coords.contiguous().view(batch["metadata"]["num_structid"], -1, 3)
    target_dist = (target_coords[:, :, None] - target_coords[:, None, :]).norm(dim=-1)
    pred_dist = (pred_coords[:, :, None] - pred_coords[:, None, :]).norm(dim=-1)
    conserved_mask = target_dist < 15.0
    lddt_list = []
    thresholds = [0, 0.5, 1, 2, 4, 6, 8, 12, 1e9]
    for threshold_idx in range(8):
        distdiff = (pred_dist - target_dist).abs()
        bin_fraction = (distdiff > thresholds[threshold_idx]) & (
            distdiff < thresholds[threshold_idx + 1]
        )
        lddt_list.append(
            bin_fraction.mul(conserved_mask).long().sum(dim=2) / conserved_mask.long().sum(dim=2)
        )
    lddt_list = torch.stack(lddt_list, dim=-1)
    lddt = torch.cumsum(lddt_list[:, :, :4], dim=-1).mean(dim=-1)
    return lddt, lddt_list
--- a/flowdock/utils/model_utils.py
+++ b/flowdock/utils/model_utils.py
@@ -0,0 +1,446 @@
 # Adapted from: https://github.com/zrqiao/NeuralPLexer
 import rootutils
 import torch
 import torch.nn.functional as F
 from beartype.typing import Any, Dict, List, Optional, Tuple, Union
 from torch_scatter import scatter_max, scatter_min
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils import RankedLogger
 MODEL_BATCH = Dict[str, Any]
 STATE_DICT = Dict[str, Any]
 log = RankedLogger(__name__, rank_zero_only=True)
 class GELUMLP(torch.nn.Module):
    """Simple MLP with post-LayerNorm."""
    def __init__(
        self,
        n_in_feats: int,
        n_out_feats: int,
        n_hidden_feats: Optional[int] = None,
        dropout: float = 0.0,
        zero_init: bool = False,
    ):
        """Initialize the GELUMLP."""
        super().__init__()
        self.dropout = dropout
        if n_hidden_feats is None:
            self.layers = torch.nn.Sequential(
                torch.nn.Linear(n_in_feats, n_in_feats),
                torch.nn.GELU(),
                torch.nn.LayerNorm(n_in_feats),
                torch.nn.Linear(n_in_feats, n_out_feats),
            )
        else:
            self.layers = torch.nn.Sequential(
                torch.nn.Linear(n_in_feats, n_hidden_feats),
                torch.nn.GELU(),
                torch.nn.Dropout(p=self.dropout),
                torch.nn.Linear(n_hidden_feats, n_hidden_feats),
                torch.nn.GELU(),
                torch.nn.LayerNorm(n_hidden_feats),
                torch.nn.Linear(n_hidden_feats, n_out_feats),
            )
        torch.nn.init.xavier_uniform_(self.layers[0].weight, gain=1)
        # zero init for residual branches
        if zero_init:
            self.layers[-1].weight.data.fill_(0.0)
        else:
            torch.nn.init.xavier_uniform_(self.layers[-1].weight, gain=1)
    def _zero_init(self, module):
        """Zero-initialize weights and biases."""
        if isinstance(module, torch.nn.Linear):
            module.weight.data.zero_()
            if module.bias is not None:
                module.bias.data.zero_()
    def forward(self, x: torch.Tensor):
        """Forward pass through the GELUMLP."""
        x = F.dropout(x, p=self.dropout, training=self.training)
        return self.layers(x)
 class SumPooling(torch.nn.Module):
    """Sum pooling layer."""
    def __init__(self, learnable: bool, hidden_dim: int = 1):
        """Initialize the SumPooling layer."""
        super().__init__()
        self.pooled_transform = (
            torch.nn.Linear(hidden_dim, hidden_dim) if learnable else torch.nn.Identity()
        )
    def forward(self, x, dst_idx, dst_size):
        """Forward pass through the SumPooling layer."""
        return self.pooled_transform(segment_sum(x, dst_idx, dst_size))
 class AveragePooling(torch.nn.Module):
    """Average pooling layer."""
    def __init__(self, learnable: bool, hidden_dim: int = 1):
        """Initialize the AveragePooling layer."""
        super().__init__()
        self.pooled_transform = (
            torch.nn.Linear(hidden_dim, hidden_dim) if learnable else torch.nn.Identity()
        )
    def forward(self, x, dst_idx, dst_size):
        """Forward pass through the AveragePooling layer."""
        out = torch.zeros(
            dst_size,
            *x.shape[1:],
            dtype=x.dtype,
            device=x.device,
        ).index_add_(0, dst_idx, x)
        nmr = torch.zeros(
            dst_size,
            *x.shape[1:],
            dtype=x.dtype,
            device=x.device,
        ).index_add_(0, dst_idx, torch.ones_like(x))
        return self.pooled_transform(out / (nmr + 1e-8))
 def init_weights(m):
    """Initialize weights with Kaiming uniform."""
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.kaiming_uniform_(m.weight)
 def segment_sum(src, dst_idx, dst_size):
    """Computes the sum of each segment in a tensor."""
    out = torch.zeros(
        dst_size,
        *src.shape[1:],
        dtype=src.dtype,
        device=src.device,
    ).index_add_(0, dst_idx, src)
    return out
 def segment_mean(src, dst_idx, dst_size):
    """Computes the mean value of each segment in a tensor."""
    out = torch.zeros(
        dst_size,
        *src.shape[1:],
        dtype=src.dtype,
        device=src.device,
    ).index_add_(0, dst_idx, src)
    denom = (
        torch.zeros(
            dst_size,
            *src.shape[1:],
            dtype=src.dtype,
            device=src.device,
        ).index_add_(0, dst_idx, torch.ones_like(src))
        + 1e-8
    )
    return out / denom
 def segment_argmin(scores, dst_idx, dst_size, randomize: bool = False) -> torch.Tensor:
    """Samples the index of the minimum value in each segment."""
    if randomize:
        noise = torch.rand_like(scores)
        scores = scores - torch.log(-torch.log(noise))
    _, sampled_idx = scatter_min(scores, dst_idx, dim=0, dim_size=dst_size)
    return sampled_idx
 def segment_logsumexp(src, dst_idx, dst_size, extra_dims=None):
    """Computes the logsumexp of each segment in a tensor."""
    src_max, _ = scatter_max(src, dst_idx, dim=0, dim_size=dst_size)
    if extra_dims is not None:
        src_max = torch.amax(src_max, dim=extra_dims, keepdim=True)
    src = src - src_max[dst_idx]
    out = torch.zeros(
        dst_size,
        *src.shape[1:],
        dtype=src.dtype,
        device=src.device,
    ).index_add_(0, dst_idx, torch.exp(src))
    if extra_dims is not None:
        out = torch.sum(out, dim=extra_dims)
    return torch.log(out + 1e-8) + src_max.view(*out.shape)
 def segment_softmax(src, dst_idx, dst_size, extra_dims=None, floor_value=None):
    """Computes the softmax of each segment in a tensor."""
    src_max, _ = scatter_max(src, dst_idx, dim=0, dim_size=dst_size)
    if extra_dims is not None:
        src_max = torch.amax(src_max, dim=extra_dims, keepdim=True)
    src = src - src_max[dst_idx]
    exp1 = torch.exp(src)
    exp0 = torch.zeros(
        dst_size,
        *src.shape[1:],
        dtype=src.dtype,
        device=src.device,
    ).index_add_(0, dst_idx, exp1)
    if extra_dims is not None:
        exp0 = torch.sum(exp0, dim=extra_dims, keepdim=True)
    exp0 = torch.index_select(input=exp0, dim=0, index=dst_idx)
    exp = exp1.div(exp0 + 1e-8)
    if floor_value is not None:
        exp = exp.clamp(min=floor_value)
        exp0 = torch.zeros(
            dst_size,
            *src.shape[1:],
            dtype=src.dtype,
            device=src.device,
        ).index_add_(0, dst_idx, exp)
        if extra_dims is not None:
            exp0 = torch.sum(exp0, dim=extra_dims, keepdim=True)
        exp0 = torch.index_select(input=exp0, dim=0, index=dst_idx)
        exp = exp.div(exp0 + 1e-8)
    return exp
 def batched_sample_onehot(logits, dim=0, max_only=False):
    """Implements the Gumbel-Max trick to sample from a one-hot distribution."""
    if max_only:
        sampled_idx = torch.argmax(logits, dim=dim, keepdim=True)
    else:
        noise = torch.rand_like(logits)
        sampled_idx = torch.argmax(logits - torch.log(-torch.log(noise)), dim=dim, keepdim=True)
    out_onehot = torch.zeros_like(logits, dtype=torch.bool)
    out_onehot.scatter_(dim=dim, index=sampled_idx, value=1)
    return out_onehot
 def topk_edge_mask_from_logits(scores, k, randomize=False):
    """Samples the top-k edges from a set of logits."""
    assert len(scores.shape) == 3, "Scores should have shape [B, N, N]"
    if randomize:
        noise = torch.rand_like(scores)
        scores = scores - torch.log(-torch.log(noise))
    node_degree = min(k, scores.shape[2])
    _, topk_idx = torch.topk(scores, node_degree, dim=-1, largest=True)
    edge_mask = scores.new_zeros(scores.shape, dtype=torch.bool)
    edge_mask = edge_mask.scatter_(dim=2, index=topk_idx, value=1).bool()
    return edge_mask
 def sample_inplace_to_torch(sample):
    """Convert NumPy sample to PyTorch tensors."""
    if sample is None:
        return None
    sample["features"] = {k: torch.FloatTensor(v) for k, v in sample["features"].items()}
    sample["indexer"] = {k: torch.LongTensor(v) for k, v in sample["indexer"].items()}
    if "labels" in sample.keys():
        sample["labels"] = {k: torch.FloatTensor(v) for k, v in sample["labels"].items()}
    return sample
 def inplace_to_device(sample, device):
    """Move sample to device."""
    sample["features"] = {k: v.to(device) for k, v in sample["features"].items()}
    sample["indexer"] = {k: v.to(device) for k, v in sample["indexer"].items()}
    if "labels" in sample.keys():
        sample["labels"] = sample["labels"].to(device)
    return sample
 def inplace_to_torch(sample):
    """Convert NumPy sample to PyTorch tensors."""
    if sample is None:
        return None
    sample["features"] = {k: torch.FloatTensor(v) for k, v in sample["features"].items()}
    sample["indexer"] = {k: torch.LongTensor(v) for k, v in sample["indexer"].items()}
    if "labels" in sample.keys():
        sample["labels"] = {k: torch.FloatTensor(v) for k, v in sample["labels"].items()}
    return sample
 def distance_to_gaussian_contact_logits(
    x: torch.Tensor, contact_scale: float, cutoff: Optional[float] = None
 ) -> torch.Tensor:
    """Convert distance to Gaussian contact logits.
    :param x: Distance tensor.
    :param contact_scale: The contact scale.
    :param cutoff: The distance cutoff.
    :return: Gaussian contact logits.
    """
    if cutoff is None:
        cutoff = contact_scale * 2
    return torch.log(torch.clamp(1 - (x / cutoff), min=1e-9))
 def distogram_to_gaussian_contact_logits(
    dgram: torch.Tensor, dist_bins: torch.Tensor, contact_scale: float
 ) -> torch.Tensor:
    """Convert a distance histogram (distogram) matrix to a Gaussian contact map.
    :param dgram: A distogram matrix.
    :return: A Gaussian contact map.
    """
    return torch.logsumexp(
        dgram + distance_to_gaussian_contact_logits(dist_bins, contact_scale),
        dim=-1,
    )
 def eval_true_contact_maps(
    batch: MODEL_BATCH, contact_scale: float, **kwargs: Dict[str, Any]
 ) -> Tuple[torch.Tensor, torch.Tensor]:
    """Evaluate true contact maps.
    :param batch: A batch dictionary.
    :param contact_scale: The contact scale.
    :param kwargs: Additional keyword arguments.
    :return: True contact maps.
    """
    indexer = batch["indexer"]
    batch_size = batch["metadata"]["num_structid"]
    with torch.no_grad():
        # Residue centroids
        res_cent_coords = (
            batch["features"]["res_atom_positions"]
            .mul(batch["features"]["res_atom_mask"].bool()[:, :, None])
            .sum(dim=1)
            .div(batch["features"]["res_atom_mask"].bool().sum(dim=1)[:, None] + 1e-9)
        )
        res_lig_dist = (
            res_cent_coords.view(batch_size, -1, 3)[:, :, None]
            - batch["features"]["sdf_coordinates"][indexer["gather_idx_U_u"]].view(
                batch_size, -1, 3
            )[:, None, :]
        ).norm(dim=-1)
        res_lig_contact_logit = distance_to_gaussian_contact_logits(
            res_lig_dist, contact_scale, **kwargs
        )
    return res_lig_dist, res_lig_contact_logit.flatten()
 def sample_reslig_contact_matrix(
    batch: MODEL_BATCH, res_lig_logits: torch.Tensor, last: Optional[torch.Tensor] = None
 ) -> torch.Tensor:
    """Sample residue-ligand contact matrix.
    :param batch: A batch dictionary.
    :param res_lig_logits: Residue-ligand contact logits.
    :param last: The last contact matrix.
    :return: Sampled residue-ligand contact matrix.
    """
    metadata = batch["metadata"]
    batch_size = metadata["num_structid"]
    max(metadata["num_molid_per_sample"])
    n_a_per_sample = max(metadata["num_a_per_sample"])
    n_I_per_sample = max(metadata["num_I_per_sample"])
    res_lig_logits = res_lig_logits.view(batch_size, n_a_per_sample, n_I_per_sample)
    # Sampling from unoccupied lattice sites
    if last is None:
        last = torch.zeros_like(res_lig_logits, dtype=torch.bool)
    # Column-graph-wise masking for already sampled ligands
    # sampled_ligand_mask = torch.amax(last, dim=1, keepdim=True)
    sampled_frame_mask = torch.sum(last, dim=1, keepdim=True).contiguous()
    masked_logits = res_lig_logits - sampled_frame_mask * 1e9
    sampled_block_onehot = batched_sample_onehot(masked_logits.flatten(1, 2), dim=1).view(
        batch_size, n_a_per_sample, n_I_per_sample
    )
    new_block_contact_mat = last + sampled_block_onehot
    # Remove non-contact patches
    valid_logit_mask = res_lig_logits > -16.0
    new_block_contact_mat = (new_block_contact_mat * valid_logit_mask).bool()
    return new_block_contact_mat
 def merge_res_lig_logits_to_graph(
    batch: MODEL_BATCH,
    res_lig_logits: torch.Tensor,
    single_protein_batch: bool,
 ) -> torch.Tensor:
    """Patch merging [B, N_res, N_atm] -> [B, N_res, N_graph].
    :param batch: A batch dictionary.
    :param res_lig_logits: Residue-ligand contact logits.
    :param single_protein_batch: Whether to use single protein batch.
    :return: Merged residue-ligand logits.
    """
    assert single_protein_batch, "Only single protein batch is supported."
    metadata = batch["metadata"]
    indexer = batch["indexer"]
    batch_size = metadata["num_structid"]
    max(metadata["num_molid_per_sample"])
    n_mol_per_sample = max(metadata["num_molid_per_sample"])
    n_a_per_sample = max(metadata["num_a_per_sample"])
    n_I_per_sample = max(metadata["num_I_per_sample"])
    res_lig_logits = res_lig_logits.view(batch_size, n_a_per_sample, n_I_per_sample)
    graph_wise_logits = segment_logsumexp(
        res_lig_logits.permute(2, 0, 1),
        indexer["gather_idx_I_molid"][:n_I_per_sample],
        n_mol_per_sample,
    ).permute(1, 2, 0)
    return graph_wise_logits
 def sample_res_rowmask_from_contacts(
    batch: MODEL_BATCH,
    res_ligatm_logits: torch.Tensor,
    single_protein_batch: bool,
 ) -> torch.Tensor:
    """Sample residue row mask from contacts.
    :param batch: A batch dictionary.
    :param res_ligatm_logits: Residue-ligand atom contact logits.
    :return: Sampled residue row mask.
    """
    metadata = batch["metadata"]
    max(metadata["num_molid_per_sample"])
    lig_wise_logits = (
        merge_res_lig_logits_to_graph(batch, res_ligatm_logits, single_protein_batch)
        .permute(0, 2, 1)
        .contiguous()
    )
    sampled_res_onehot_mask = batched_sample_onehot(lig_wise_logits.flatten(0, 1), dim=1)
    return sampled_res_onehot_mask
 def extract_esm_embeddings(
    esm_model: torch.nn.Module,
    esm_alphabet: torch.nn.Module,
    esm_batch_converter: torch.nn.Module,
    sequences: List[str],
    device: Union[str, torch.device],
    esm_repr_layer: int = 33,
 ) -> List[torch.Tensor]:
    """Extract embeddings from ESM model.
    :param esm_model: ESM model.
    :param esm_alphabet: ESM alphabet.
    :param esm_batch_converter: ESM batch converter.
    :param sequences: A list of sequences.
    :param device: Device to use.
    :param esm_repr_layer: ESM representation layer index from which to extract embeddings.
    :return: A corresponding list of embeddings.
    """
    # Disable dropout for deterministic results
    esm_model.eval()
    # Prepare data (first 2 sequences from ESMStructuralSplitDataset superfamily / 4)
    data = [(str(i), seq) for i, seq in enumerate(sequences)]
    _, _, batch_tokens = esm_batch_converter(data)
    batch_tokens = batch_tokens.to(device)
    batch_lens = (batch_tokens != esm_alphabet.padding_idx).sum(1)
    # Extract per-residue representations (on CPU)
    with torch.no_grad():
        results = esm_model(batch_tokens, repr_layers=[esm_repr_layer], return_contacts=True)
    token_representations = results["representations"][esm_repr_layer]
    # Generate per-residue representations
    # NOTE: token 0 is always a beginning-of-sequence token, so the first residue is token 1.
    sequence_representations = []
    for i, tokens_len in enumerate(batch_lens):
        sequence_representations.append(token_representations[i, 1 : tokens_len - 1])
    return sequence_representations
--- a/flowdock/utils/pylogger.py
+++ b/flowdock/utils/pylogger.py
@@ -0,0 +1,51 @@
 import logging
 from beartype.typing import Mapping, Optional
 from lightning_utilities.core.rank_zero import rank_prefixed_message, rank_zero_only
 class RankedLogger(logging.LoggerAdapter):
    """A multi-GPU-friendly python command line logger."""
    def __init__(
        self,
        name: str = __name__,
        rank_zero_only: bool = False,
        extra: Optional[Mapping[str, object]] = None,
    ) -> None:
        """Initializes a multi-GPU-friendly python command line logger that logs on all processes
        with their rank prefixed in the log message.
        :param name: The name of the logger. Default is ``__name__``.
        :param rank_zero_only: Whether to force all logs to only occur on the rank zero process. Default is `False`.
        :param extra: (Optional) A dict-like object which provides contextual information. See `logging.LoggerAdapter`.
        """
        logger = logging.getLogger(name)
        super().__init__(logger=logger, extra=extra)
        self.rank_zero_only = rank_zero_only
    def log(self, level: int, msg: str, rank: Optional[int] = None, *args, **kwargs) -> None:
        """Delegate a log call to the underlying logger, after prefixing its message with the rank
        of the process it's being logged from. If `'rank'` is provided, then the log will only
        occur on that rank/process.
        :param level: The level to log at. Look at `logging.__init__.py` for more information.
        :param msg: The message to log.
        :param rank: The rank to log at.
        :param args: Additional args to pass to the underlying logging function.
        :param kwargs: Any additional keyword args to pass to the underlying logging function.
        """
        if self.isEnabledFor(level):
            msg, kwargs = self.process(msg, kwargs)
            current_rank = getattr(rank_zero_only, "rank", None)
            if current_rank is None:
                raise RuntimeError("The `rank_zero_only.rank` needs to be set before use")
            msg = rank_prefixed_message(msg, current_rank)
            if self.rank_zero_only:
                if current_rank == 0:
                    self.logger.log(level, msg, *args, **kwargs)
            else:
                if rank is None:
                    self.logger.log(level, msg, *args, **kwargs)
                elif current_rank == rank:
                    self.logger.log(level, msg, *args, **kwargs)
--- a/flowdock/utils/rich_utils.py
+++ b/flowdock/utils/rich_utils.py
@@ -0,0 +1,102 @@
 from pathlib import Path
 import rich
 import rich.syntax
 import rich.tree
 import rootutils
 from beartype.typing import Sequence
 from hydra.core.hydra_config import HydraConfig
 from lightning_utilities.core.rank_zero import rank_zero_only
 from omegaconf import DictConfig, OmegaConf, open_dict
 from rich.prompt import Prompt
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils import pylogger
 log = pylogger.RankedLogger(__name__, rank_zero_only=True)
@rank_zero_only
 def print_config_tree(
    cfg: DictConfig,
    print_order: Sequence[str] = (
        "data",
        "model",
        "callbacks",
        "logger",
        "trainer",
        "paths",
        "extras",
    ),
    resolve: bool = False,
    save_to_file: bool = False,
 ) -> None:
    """Prints the contents of a DictConfig as a tree structure using the Rich library.
    :param cfg: A DictConfig composed by Hydra.
    :param print_order: Determines in what order config components are printed. Default is ``("data", "model",
    "callbacks", "logger", "trainer", "paths", "extras")``.
    :param resolve: Whether to resolve reference fields of DictConfig. Default is ``False``.
    :param save_to_file: Whether to export config to the hydra output folder. Default is ``False``.
    """
    style = "dim"
    tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
    queue = []
    # add fields from `print_order` to queue
    for field in print_order:
        queue.append(field) if field in cfg else log.warning(
            f"Field '{field}' not found in config. Skipping '{field}' config printing..."
        )
    # add all the other fields to queue (not specified in `print_order`)
    for field in cfg:
        if field not in queue:
            queue.append(field)
    # generate config tree from queue
    for field in queue:
        branch = tree.add(field, style=style, guide_style=style)
        config_group = cfg[field]
        if isinstance(config_group, DictConfig):
            branch_content = OmegaConf.to_yaml(config_group, resolve=resolve)
        else:
            branch_content = str(config_group)
        branch.add(rich.syntax.Syntax(branch_content, "yaml"))
    # print config tree
    rich.print(tree)
    # save config tree to file
    if save_to_file:
        with open(Path(cfg.paths.output_dir, "config_tree.log"), "w") as file:
            rich.print(tree, file=file)
@rank_zero_only
 def enforce_tags(cfg: DictConfig, save_to_file: bool = False) -> None:
    """Prompts user to input tags from command line if no tags are provided in config.
    :param cfg: A DictConfig composed by Hydra.
    :param save_to_file: Whether to export tags to the hydra output folder. Default is ``False``.
    """
    if not cfg.get("tags"):
        if "id" in HydraConfig().cfg.hydra.job:
            raise ValueError("Specify tags before launching a multirun!")
        log.warning("No tags provided in config. Prompting user to input tags...")
        tags = Prompt.ask("Enter a list of comma separated tags", default="dev")
        tags = [t.strip() for t in tags.split(",") if t != ""]
        with open_dict(cfg):
            cfg.tags = tags
        log.info(f"Tags: {cfg.tags}")
    if save_to_file:
        with open(Path(cfg.paths.output_dir, "tags.log"), "w") as file:
            rich.print(cfg.tags, file=file)
--- a/flowdock/utils/sampling_utils.py
+++ b/flowdock/utils/sampling_utils.py
@@ -0,0 +1,427 @@
 import os
 import numpy as np
 import pandas as pd
 import rootutils
 import torch
 from beartype.typing import Any, Dict, List, Optional, Tuple
 from lightning import LightningModule
 from omegaconf import DictConfig
 from rdkit import Chem
 from rdkit.Chem import AllChem
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.data.components.mol_features import (
    collate_numpy_samples,
    process_mol_file,
 )
 from flowdock.utils import RankedLogger
 from flowdock.utils.data_utils import (
    FDProtein,
    merge_protein_and_ligands,
    pdb_filepath_to_protein,
    prepare_batch,
    process_protein,
 )
 from flowdock.utils.model_utils import inplace_to_device, inplace_to_torch, segment_mean
 from flowdock.utils.visualization_utils import (
    write_conformer_sdf,
    write_pdb_models,
    write_pdb_single,
 )
 log = RankedLogger(__name__, rank_zero_only=True)
 def featurize_protein_and_ligands(
    rec_path: str,
    lig_paths: List[str],
    n_lig_patches: int,
    apo_rec_path: Optional[str] = None,
    chain_id: Optional[str] = None,
    protein: Optional[FDProtein] = None,
    sequences_to_embeddings: Optional[Dict[str, np.ndarray]] = None,
    enforce_sanitization: bool = False,
    discard_sdf_coords: bool = False,
    **kwargs: Dict[str, Any],
 ):
    """Featurize a protein-ligand complex.
    :param rec_path: Path to the receptor file.
    :param lig_paths: List of paths to the ligand files.
    :param n_lig_patches: Number of ligand patches.
    :param apo_rec_path: Path to the apo receptor file.
    :param chain_id: Chain ID of the receptor.
    :param protein: Optional protein object.
    :param sequences_to_embeddings: Mapping of sequences to embeddings.
    :param enforce_sanitization: Whether to enforce sanitization.
    :param discard_sdf_coords: Whether to discard SDF coordinates.
    :param kwargs: Additional keyword arguments.
    :return: Featurized protein-ligand complex.
    """
    assert rec_path is not None
    if lig_paths is None:
        lig_paths = []
    if isinstance(lig_paths, str):
        lig_paths = [lig_paths]
    out_mol = None
    lig_samples = []
    for lig_path in lig_paths:
        try:
            lig_sample, mol_ref = process_mol_file(
                lig_path,
                sanitize=True,
                return_mol=True,
                discard_coords=discard_sdf_coords,
            )
        except Exception as e:
            if enforce_sanitization:
                raise
            log.warning(
                f"RDKit sanitization failed for ligand {lig_path} due to: {e}. Loading raw attributes."
            )
            lig_sample, mol_ref = process_mol_file(
                lig_path,
                sanitize=False,
                return_mol=True,
                discard_coords=discard_sdf_coords,
            )
        lig_samples.append(lig_sample)
        if out_mol is None:
            out_mol = mol_ref
        else:
            out_mol = AllChem.CombineMols(out_mol, mol_ref)
    protein = protein if protein is not None else pdb_filepath_to_protein(rec_path)
    rec_sample = process_protein(
        protein,
        chain_id=chain_id,
        sequences_to_embeddings=None if apo_rec_path is not None else sequences_to_embeddings,
        **kwargs,
    )
    if apo_rec_path is not None:
        apo_protein = pdb_filepath_to_protein(apo_rec_path)
        apo_rec_sample = process_protein(
            apo_protein,
            chain_id=chain_id,
            sequences_to_embeddings=sequences_to_embeddings,
            **kwargs,
        )
        for key in rec_sample.keys():
            for subkey, value in apo_rec_sample[key].items():
                rec_sample[key]["apo_" + subkey] = value
    merged_sample = merge_protein_and_ligands(
        lig_samples,
        rec_sample,
        n_lig_patches=n_lig_patches,
        label=None,
    )
    return merged_sample, out_mol
 def multi_pose_sampling(
    receptor_path: str,
    ligand_path: str,
    cfg: DictConfig,
    lit_module: LightningModule,
    out_path: str,
    save_pdb: bool = True,
    separate_pdb: bool = True,
    chain_id: Optional[str] = None,
    apo_receptor_path: Optional[str] = None,
    sample_id: Optional[str] = None,
    protein: Optional[FDProtein] = None,
    sequences_to_embeddings: Optional[Dict[str, np.ndarray]] = None,
    confidence: bool = True,
    affinity: bool = True,
    return_all_states: bool = False,
    auxiliary_estimation_only: bool = False,
    **kwargs: Dict[str, Any],
 ) -> Tuple[
    Optional[Chem.Mol],
    Optional[List[float]],
    Optional[List[float]],
    Optional[List[float]],
    Optional[List[Any]],
    Optional[Any],
    Optional[np.ndarray],
    Optional[np.ndarray],
 ]:
    """Sample multiple poses of a protein-ligand complex.
    :param receptor_path: Path to the receptor file.
    :param ligand_path: Path to the ligand file.
    :param cfg: Config dictionary.
    :param lit_module: LightningModule instance.
    :param out_path: Path to save the output files.
    :param save_pdb: Whether to save PDB files.
    :param separate_pdb: Whether to save separate PDB files for each pose.
    :param chain_id: Chain ID of the receptor.
    :param apo_receptor_path: Path to the optional apo receptor file.
    :param sample_id: Optional sample ID.
    :param protein: Optional protein object.
    :param sequences_to_embeddings: Mapping of sequences to embeddings.
    :param confidence: Whether to estimate confidence scores.
    :param affinity: Whether to estimate affinity scores.
    :param return_all_states: Whether to return all states.
    :param auxiliary_estimation_only: Whether to only estimate auxiliary outputs (e.g., confidence,
        affinity) for the input (generated) samples (potentially derived from external sources).
    :param kwargs: Additional keyword arguments.
    :return: Reference molecule, protein plDDTs, ligand plDDTs, ligand fragment plDDTs, estimated
        binding affinities, structure trajectories, input batch, B-factors, and structure rankings.
    """
    if return_all_states and auxiliary_estimation_only:
        # NOTE: If auxiliary estimation is solely enabled, structure trajectory sampling will be disabled
        return_all_states = False
    struct_res_all, lig_res_all = [], []
    plddt_all, plddt_lig_all, plddt_ligs_all, res_plddt_all = [], [], [], []
    affinity_all, ligs_affinity_all = [], []
    frames_all = []
    chunk_size = cfg.chunk_size
    for _ in range(cfg.n_samples // chunk_size):
        # Resample anchor node frames
        np_sample, mol = featurize_protein_and_ligands(
            receptor_path,
            ligand_path,
            n_lig_patches=lit_module.hparams.cfg.mol_encoder.n_patches,
            apo_rec_path=apo_receptor_path,
            chain_id=chain_id,
            protein=protein,
            sequences_to_embeddings=sequences_to_embeddings,
            discard_sdf_coords=cfg.discard_sdf_coords and not auxiliary_estimation_only,
            **kwargs,
        )
        np_sample_batched = collate_numpy_samples([np_sample for _ in range(chunk_size)])
        sample = inplace_to_device(inplace_to_torch(np_sample_batched), device=lit_module.device)
        prepare_batch(sample)
        if auxiliary_estimation_only:
            # Predict auxiliary quantities using the provided input protein and ligand structures
            if "num_molid" in sample["metadata"].keys() and sample["metadata"]["num_molid"] > 0:
                sample["misc"]["protein_only"] = False
            else:
                sample["misc"]["protein_only"] = True
            output_struct = {
                "receptor": sample["features"]["res_atom_positions"].flatten(0, 1),
                "receptor_padded": sample["features"]["res_atom_positions"],
                "ligands": sample["features"]["sdf_coordinates"],
            }
        else:
            output_struct = lit_module.net.sample_pl_complex_structures(
                sample,
                sampler=cfg.sampler,
                sampler_eta=cfg.sampler_eta,
                num_steps=cfg.num_steps,
                return_all_states=return_all_states,
                start_time=cfg.start_time,
                exact_prior=cfg.exact_prior,
            )
        frames_all.append(output_struct.get("all_frames", None))
        if mol is not None:
            ref_mol = AllChem.Mol(mol)
            out_x1 = np.split(output_struct["ligands"].cpu().numpy(), cfg.chunk_size)
        out_x2 = np.split(output_struct["receptor_padded"].cpu().numpy(), cfg.chunk_size)
        if confidence and affinity:
            assert (
                lit_module.net.confidence_cfg.enabled
            ), "Confidence estimation must be enabled in the model configuration."
            assert (
                lit_module.net.affinity_cfg.enabled
            ), "Affinity estimation must be enabled in the model configuration."
            plddt, plddt_lig, plddt_ligs = lit_module.net.run_auxiliary_estimation(
                sample,
                output_struct,
                return_avg_stats=True,
                training=False,
            )
            aff = sample["outputs"]["affinity_logits"]
        elif confidence:
            assert (
                lit_module.net.confidence_cfg.enabled
            ), "Confidence estimation must be enabled in the model configuration."
            plddt, plddt_lig, plddt_ligs = lit_module.net.run_auxiliary_estimation(
                sample,
                output_struct,
                return_avg_stats=True,
                training=False,
            )
        elif affinity:
            assert (
                lit_module.net.affinity_cfg.enabled
            ), "Affinity estimation must be enabled in the model configuration."
            lit_module.net.run_auxiliary_estimation(
                sample, output_struct, return_avg_stats=True, training=False
            )
            plddt, plddt_lig = None, None
            aff = sample["outputs"]["affinity_logits"].cpu()
        mol_idx_i_structid = segment_mean(
            sample["indexer"]["gather_idx_i_structid"],
            sample["indexer"]["gather_idx_i_molid"],
            sample["metadata"]["num_molid"],
        ).long()
        for struct_idx in range(cfg.chunk_size):
            struct_res = {
                "features": {
                    "asym_id": np_sample["features"]["res_chain_id"],
                    "residue_index": np.arange(len(np_sample["features"]["res_type"])) + 1,
                    "aatype": np_sample["features"]["res_type"],
                },
                "structure_module": {
                    "final_atom_positions": out_x2[struct_idx],
                    "final_atom_mask": sample["features"]["res_atom_mask"].bool().cpu().numpy(),
                },
            }
            struct_res_all.append(struct_res)
            if mol is not None:
                lig_res_all.append(out_x1[struct_idx])
            if confidence:
                plddt_all.append(plddt[struct_idx].item())
                res_plddt_all.append(
                    sample["outputs"]["plddt"][
                        struct_idx, : sample["metadata"]["num_a_per_sample"][0]
                    ]
                    .cpu()
                    .numpy()
                )
                if plddt_lig is None:
                    plddt_lig_all.append(None)
                else:
                    plddt_lig_all.append(plddt_lig[struct_idx].item())
                if plddt_ligs is None:
                    plddt_ligs_all.append(None)
                else:
                    plddt_ligs_all.append(plddt_ligs[mol_idx_i_structid == struct_idx].tolist())
            if affinity:
                # collect the average affinity across all ligands in each complex
                ligs_aff = aff[mol_idx_i_structid == struct_idx]
                affinity_all.append(ligs_aff.mean().item())
                ligs_affinity_all.append(ligs_aff.tolist())
    if confidence and cfg.rank_outputs_by_confidence:
        plddt_lig_predicted = all(plddt_lig_all)
        if cfg.plddt_ranking_type == "protein":
            struct_plddts = np.array(plddt_all)  # rank outputs using average protein plDDT
        elif cfg.plddt_ranking_type == "ligand":
            struct_plddts = np.array(
                plddt_lig_all if plddt_lig_predicted else plddt_all
            )  # rank outputs using average ligand plDDT if available
            if not plddt_lig_predicted:
                log.warning(
                    "Ligand plDDT not available for all samples, using protein plDDT instead"
                )
        elif cfg.plddt_ranking_type == "protein_ligand":
            struct_plddts = np.array(
                plddt_all + plddt_lig_all if plddt_lig_predicted else plddt_all
            )  # rank outputs using the sum of the average protein and ligand plDDTs if ligand plDDT is available
            if not plddt_lig_predicted:
                log.warning(
                    "Ligand plDDT not available for all samples, using protein plDDT instead"
                )
        struct_plddt_rankings = np.argsort(
            -struct_plddts
        ).argsort()  # ensure that higher plDDTs have a higher rank (e.g., `rank1`)
    receptor_plddt = np.array(res_plddt_all) if confidence else None
    b_factors = (
        np.repeat(
            receptor_plddt[..., None],
            struct_res_all[0]["structure_module"]["final_atom_mask"].shape[-1],
            axis=-1,
        )
        if confidence
        else None
    )
    if save_pdb:
        if separate_pdb:
            for struct_id, struct_res in enumerate(struct_res_all):
                if confidence and cfg.rank_outputs_by_confidence:
                    write_pdb_single(
                        struct_res,
                        out_path=os.path.join(
                            out_path,
                            f"prot_rank{struct_plddt_rankings[struct_id] + 1}_plddt{struct_plddts[struct_id]:.7f}{f'_affinity{affinity_all[struct_id]:.7f}' if affinity else ''}.pdb",
                        ),
                        b_factors=b_factors[struct_id] if confidence else None,
                    )
                else:
                    write_pdb_single(
                        struct_res,
                        out_path=os.path.join(
                            out_path,
                            f"prot_{struct_id}{f'_affinity{affinity_all[struct_id]:.7f}' if affinity else ''}.pdb",
                        ),
                        b_factors=b_factors[struct_id] if confidence else None,
                    )
        write_pdb_models(
            struct_res_all, out_path=os.path.join(out_path, "prot_all.pdb"), b_factors=b_factors
        )
    if mol is not None:
        write_conformer_sdf(ref_mol, None, out_path=os.path.join(out_path, "lig_ref.sdf"))
        lig_res_all = np.array(lig_res_all)
        write_conformer_sdf(mol, lig_res_all, out_path=os.path.join(out_path, "lig_all.sdf"))
        for struct_id in range(len(lig_res_all)):
            if confidence and cfg.rank_outputs_by_confidence:
                write_conformer_sdf(
                    mol,
                    lig_res_all[struct_id : struct_id + 1],
                    out_path=os.path.join(
                        out_path,
                        f"lig_rank{struct_plddt_rankings[struct_id] + 1}_plddt{struct_plddts[struct_id]:.7f}{f'_affinity{affinity_all[struct_id]:.7f}' if affinity else ''}.sdf",
                    ),
                )
            else:
                write_conformer_sdf(
                    mol,
                    lig_res_all[struct_id : struct_id + 1],
                    out_path=os.path.join(
                        out_path,
                        f"lig_{struct_id}{f'_affinity{affinity_all[struct_id]:.7f}' if affinity else ''}.sdf",
                    ),
                )
        if confidence:
            aux_estimation_all_df = pd.DataFrame(
                {
                    "sample_id": [sample_id] * len(struct_res_all),
                    "rank": struct_plddt_rankings + 1 if cfg.rank_outputs_by_confidence else None,
                    "plddt_ligs": plddt_ligs_all,
                    "affinity_ligs": ligs_affinity_all,
                }
            )
            aux_estimation_all_df.to_csv(
                os.path.join(out_path, f"{sample_id if sample_id is not None else 'sample'}_auxiliary_estimation.csv"), index=False
            )
    else:
        ref_mol = None
    if not confidence:
        plddt_all, plddt_lig_all, plddt_ligs_all = None, None, None
    if not affinity:
        affinity_all = None
    if return_all_states:
        if mol is not None:
            np_sample["metadata"]["sample_ID"] = sample_id if sample_id is not None else "sample"
            np_sample["metadata"]["mol"] = ref_mol
        batch_all = inplace_to_torch(
            collate_numpy_samples([np_sample for _ in range(cfg.n_samples)])
        )
        merge_frames_all = frames_all[0]
        for frames in frames_all[1:]:
            for frame_index, frame in enumerate(frames):
                for key in frame.keys():
                    merge_frames_all[frame_index][key] = torch.cat(
                        [merge_frames_all[frame_index][key], frame[key]], dim=0
                    )
        frames_all = merge_frames_all
    else:
        frames_all = None
        batch_all = None
    if not (confidence and cfg.rank_outputs_by_confidence):
        struct_plddt_rankings = None
    return (
        ref_mol,
        plddt_all,
        plddt_lig_all,
        plddt_ligs_all,
        affinity_all,
        frames_all,
        batch_all,
        b_factors,
        struct_plddt_rankings,
    )
--- a/flowdock/utils/utils.py
+++ b/flowdock/utils/utils.py
@@ -0,0 +1,153 @@
 import warnings
 from importlib.util import find_spec
 import rootutils
 from beartype.typing import Any, Callable, Dict, List, Optional, Tuple
 from omegaconf import DictConfig
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.utils import pylogger, rich_utils
 log = pylogger.RankedLogger(__name__, rank_zero_only=True)
 def extras(cfg: DictConfig) -> None:
    """Applies optional utilities before the task is started.
    Utilities:
        - Ignoring python warnings
        - Setting tags from command line
        - Rich config printing
    :param cfg: A DictConfig object containing the config tree.
    """
    # return if no `extras` config
    if not cfg.get("extras"):
        log.warning("Extras config not found! <cfg.extras=null>")
        return
    # disable python warnings
    if cfg.extras.get("ignore_warnings"):
        log.info("Disabling python warnings! <cfg.extras.ignore_warnings=True>")
        warnings.filterwarnings("ignore")
    # prompt user to input tags from command line if none are provided in the config
    if cfg.extras.get("enforce_tags"):
        log.info("Enforcing tags! <cfg.extras.enforce_tags=True>")
        rich_utils.enforce_tags(cfg, save_to_file=True)
    # pretty print config tree using Rich library
    if cfg.extras.get("print_config"):
        log.info("Printing config tree with Rich! <cfg.extras.print_config=True>")
        rich_utils.print_config_tree(cfg, resolve=True, save_to_file=True)
 def task_wrapper(task_func: Callable) -> Callable:
    """Optional decorator that controls the failure behavior when executing the task function.
    This wrapper can be used to:
        - make sure loggers are closed even if the task function raises an exception (prevents multirun failure)
        - save the exception to a `.log` file
        - mark the run as failed with a dedicated file in the `logs/` folder (so we can find and rerun it later)
        - etc. (adjust depending on your needs)
    Example:
    ```
    @utils.task_wrapper
    def train(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        ...
        return metric_dict, object_dict
    ```
    :param task_func: The task function to be wrapped.
    :return: The wrapped task function.
    """
    def wrap(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        # execute the task
        try:
            metric_dict, object_dict = task_func(cfg=cfg)
        # things to do if exception occurs
        except Exception as ex:
            # save exception to `.log` file
            log.exception("")
            # some hyperparameter combinations might be invalid or cause out-of-memory errors
            # so when using hparam search plugins like Optuna, you might want to disable
            # raising the below exception to avoid multirun failure
            raise ex
        # things to always do after either success or exception
        finally:
            # display output dir path in terminal
            log.info(f"Output dir: {cfg.paths.output_dir}")
            # always close wandb run (even if exception occurs so multirun won't fail)
            if find_spec("wandb"):  # check if wandb is installed
                import wandb
                if wandb.run:
                    log.info("Closing wandb!")
                    wandb.finish()
        return metric_dict, object_dict
    return wrap
 def get_metric_value(metric_dict: Dict[str, Any], metric_name: Optional[str]) -> Optional[float]:
    """Safely retrieves value of the metric logged in LightningModule.
    :param metric_dict: A dict containing metric values.
    :param metric_name: If provided, the name of the metric to retrieve.
    :return: If a metric name was provided, the value of the metric.
    """
    if not metric_name:
        log.info("Metric name is None! Skipping metric value retrieval...")
        return None
    if metric_name not in metric_dict:
        raise Exception(
            f"Metric value not found! <metric_name={metric_name}>\n"
            "Make sure metric name logged in LightningModule is correct!\n"
            "Make sure `optimized_metric` name in `hparams_search` config is correct!"
        )
    metric_value = metric_dict[metric_name].item()
    log.info(f"Retrieved metric value! <{metric_name}={metric_value}>")
    return metric_value
 def read_strings_from_txt(path: str) -> List[str]:
    """Reads strings from a text file and returns them as a list.
    :param path: Path to the text file.
    :return: List of strings.
    """
    with open(path) as file:
        # NOTE: every line will be one element of the returned list
        lines = file.readlines()
        return [line.rstrip() for line in lines]
 def fasta_to_dict(filename: str) -> Dict[str, str]:
    """Converts a FASTA file to a dictionary where the keys are the sequence IDs and the values are
    the sequences.
    :param filename: Path to the FASTA file.
    :return: Dictionary with sequence IDs as keys and sequences as values.
    """
    fasta_dict = {}
    with open(filename) as file:
        for line in file:
            line = line.rstrip()  # remove trailing whitespace
            if line.startswith(">"):  # identifier line
                seq_id = line[1:]  # remove the '>' character
                fasta_dict[seq_id] = ""
            else:  # sequence line
                fasta_dict[seq_id] += line
    return fasta_dict
--- a/flowdock/utils/visualization_utils.py
+++ b/flowdock/utils/visualization_utils.py
@@ -0,0 +1,365 @@
 import os
 import numpy as np
 import rootutils
 from beartype import beartype
 from beartype.typing import Any, Dict, List, Mapping, Optional, Tuple, Union
 from openfold.np.protein import Protein as OFProtein
 from rdkit import Chem
 from rdkit.Geometry.rdGeometry import Point3D
 rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 from flowdock.data.components import residue_constants
 from flowdock.utils.data_utils import (
    PDB_CHAIN_IDS,
    PDB_MAX_CHAINS,
    FDProtein,
    create_full_prot,
    get_mol_with_new_conformer_coords,
 )
 FeatureDict = Mapping[str, np.ndarray]
 ModelOutput = Mapping[str, Any]  # Is a nested dict.
 PROT_LIG_PAIRS = List[Tuple[OFProtein, Tuple[Chem.Mol, ...]]]
@beartype
 def _chain_end(
    atom_index: Union[int, np.int64],
    end_resname: str,
    chain_name: str,
    residue_index: Union[int, np.int64],
 ) -> str:
    """Returns a PDB `TER` record for the end of a chain.
    Adapted from: https://github.com/jasonkyuyim/se3_diffusion
    :param atom_index: The index of the last atom in the chain.
    :param end_resname: The residue name of the last residue in the chain.
    :param chain_name: The chain name of the last residue in the chain.
    :param residue_index: The residue index of the last residue in the chain.
    :return: A PDB `TER` record.
    """
    chain_end = "TER"
    return (
        f"{chain_end:<6}{atom_index:>5}      {end_resname:>3} "
        f"{chain_name:>1}{residue_index:>4}"
    )
@beartype
 def res_1to3(restypes: List[str], r: Union[int, np.int64]) -> str:
    """Convert a residue type from 1-letter to 3-letter code.
    :param restypes: List of residue types.
    :param r: Residue type index.
    :return: 3-letter code as a string.
    """
    return residue_constants.restype_1to3.get(restypes[r], "UNK")
@beartype
 def to_pdb(prot: Union[OFProtein, FDProtein], model=1, add_end=True, add_endmdl=True) -> str:
    """Converts a `Protein` instance to a PDB string.
    Adapted from: https://github.com/jasonkyuyim/se3_diffusion
    :param prot: The protein to convert to PDB.
    :param model: The model number to use.
    :param add_end: Whether to add an `END` record.
    :param add_endmdl: Whether to add an `ENDMDL` record.
    :return: PDB string.
    """
    restypes = residue_constants.restypes + ["X"]
    atom_types = residue_constants.atom_types
    pdb_lines = []
    atom_mask = prot.atom_mask
    aatype = prot.aatype
    atom_positions = prot.atom_positions
    residue_index = prot.residue_index.astype(int)
    chain_index = prot.chain_index.astype(int)
    b_factors = prot.b_factors
    if np.any(aatype > residue_constants.restype_num):
        raise ValueError("Invalid aatypes.")
    # construct a mapping from chain integer indices to chain ID strings
    chain_ids = {}
    for i in np.unique(chain_index):  # NOTE: `np.unique` gives sorted output
        if i >= PDB_MAX_CHAINS:
            raise ValueError(f"The PDB format supports at most {PDB_MAX_CHAINS} chains.")
        chain_ids[i] = PDB_CHAIN_IDS[i]
    pdb_lines.append(f"MODEL     {model}")
    atom_index = 1
    last_chain_index = chain_index[0]
    # add all atom sites
    for i in range(aatype.shape[0]):
        # close the previous chain if in a multichain PDB
        if last_chain_index != chain_index[i]:
            pdb_lines.append(
                _chain_end(
                    atom_index,
                    res_1to3(restypes, aatype[i - 1]),
                    chain_ids[chain_index[i - 1]],
                    residue_index[i - 1],
                )
            )
            last_chain_index = chain_index[i]
            atom_index += 1  # NOTE: atom index increases at the `TER` symbol
        res_name_3 = res_1to3(restypes, aatype[i])
        for atom_name, pos, mask, b_factor in zip(
            atom_types, atom_positions[i], atom_mask[i], b_factors[i]
        ):
            if mask < 0.5:
                continue
            record_type = "ATOM"
            name = atom_name if len(atom_name) == 4 else f" {atom_name}"
            alt_loc = ""
            insertion_code = ""
            occupancy = 1.00
            element = atom_name[0]  # NOTE: `Protein` supports only C, N, O, S, this works
            charge = ""
            # NOTE: PDB is a columnar format, every space matters here!
            atom_line = (
                f"{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}"
                f"{res_name_3:>3} {chain_ids[chain_index[i]]:>1}"
                f"{residue_index[i]:>4}{insertion_code:>1}   "
                f"{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}"
                f"{occupancy:>6.2f}{b_factor:>6.2f}          "
                f"{element:>2}{charge:>2}"
            )
            pdb_lines.append(atom_line)
            atom_index += 1
    # close the final chain
    pdb_lines.append(
        _chain_end(
            atom_index,
            res_1to3(restypes, aatype[-1]),
            chain_ids[chain_index[-1]],
            residue_index[-1],
        )
    )
    if add_endmdl:
        pdb_lines.append("ENDMDL")
    if add_end:
        pdb_lines.append("END")
    # pad all lines to 80 characters
    pdb_lines = [line.ljust(80) for line in pdb_lines]
    return "\n".join(pdb_lines) + "\n"  # add terminating newline
@beartype
 def construct_prot_lig_pairs(outputs: Dict[str, Any], batch_index: int) -> PROT_LIG_PAIRS:
    """Construct protein-ligand pairs from model outputs.
    :param outputs: The model outputs.
    :param batch_index: The index of the current batch.
    :return: A list of protein-ligand object pairs.
    """
    protein_batch_indexer = outputs["protein_batch_indexer"]
    ligand_batch_indexer = outputs["ligand_batch_indexer"]
    protein_all_atom_mask = outputs["res_atom_mask"][protein_batch_indexer == batch_index]
    protein_all_atom_coordinates_mask = np.broadcast_to(
        np.expand_dims(protein_all_atom_mask, -1), (protein_all_atom_mask.shape[0], 37, 3)
    )
    protein_aatype = outputs["aatype"][protein_batch_indexer == batch_index]
    # assemble predicted structures
    prot_lig_pairs = []
    for protein_coordinates, ligand_coordinates in zip(
        outputs["protein_coordinates_list"], outputs["ligand_coordinates_list"]
    ):
        protein_all_atom_coordinates = (
            protein_coordinates[protein_batch_indexer == batch_index]
            * protein_all_atom_coordinates_mask
        )
        protein = create_full_prot(
            protein_all_atom_coordinates,
            protein_all_atom_mask,
            protein_aatype,
            b_factors=outputs["b_factors"][batch_index] if "b_factors" in outputs else None,
        )
        ligand = get_mol_with_new_conformer_coords(
            outputs["ligand_mol"][batch_index],
            ligand_coordinates[ligand_batch_indexer == batch_index],
        )
        ligands = tuple(Chem.GetMolFrags(ligand, asMols=True, sanitizeFrags=False))
        prot_lig_pairs.append((protein, ligands))
    # assemble ground-truth structures
    if "gt_protein_coordinates" in outputs and "gt_ligand_coordinates" in outputs:
        protein_gt_all_atom_coordinates = (
            outputs["gt_protein_coordinates"][protein_batch_indexer == batch_index]
            * protein_all_atom_coordinates_mask
        )
        gt_protein = create_full_prot(
            protein_gt_all_atom_coordinates,
            protein_all_atom_mask,
            protein_aatype,
        )
        gt_ligand = get_mol_with_new_conformer_coords(
            outputs["ligand_mol"][batch_index],
            outputs["gt_ligand_coordinates"][ligand_batch_indexer == batch_index],
        )
        gt_ligands = tuple(Chem.GetMolFrags(gt_ligand, asMols=True, sanitizeFrags=False))
        prot_lig_pairs.append((gt_protein, gt_ligands))
    return prot_lig_pairs
@beartype
 def write_prot_lig_pairs_to_pdb_file(prot_lig_pairs: PROT_LIG_PAIRS, output_filepath: str):
    """Write a list of protein-ligand pairs to a PDB file.
    :param prot_lig_pairs: List of protein-ligand object pairs, where each ligand may consist of
        multiple ligand chains.
    :param output_filepath: Output file path.
    """
    os.makedirs(os.path.dirname(output_filepath), exist_ok=True)
    with open(output_filepath, "w") as f:
        model_id = 1
        for prot, lig_mols in prot_lig_pairs:
            pdb_prot = to_pdb(prot, model=model_id, add_end=False, add_endmdl=False)
            f.write(pdb_prot)
            for lig_mol in lig_mols:
                f.write(
                    Chem.MolToPDBBlock(lig_mol).replace(
                        "END\n", "TER\n"
                    )  # enable proper ligand chain separation
                )
            f.write("END\n")
            f.write("ENDMDL\n")  # add `ENDMDL` line to separate models
            model_id += 1
 def from_prediction(
    features: FeatureDict,
    result: ModelOutput,
    b_factors: Optional[np.ndarray] = None,
    remove_leading_feature_dimension: bool = False,
 ) -> FDProtein:
    """Assembles a protein from a prediction.
    Args:
      features: Dictionary holding model inputs.
      result: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.
      remove_leading_feature_dimension: Whether to remove the leading dimension
        of the `features` values.
    Returns:
      A protein instance.
    """
    fold_output = result["structure_module"]
    def _maybe_remove_leading_dim(arr: np.ndarray) -> np.ndarray:
        return arr[0] if remove_leading_feature_dimension else arr
    if "asym_id" in features:
        chain_index = _maybe_remove_leading_dim(features["asym_id"])
    else:
        chain_index = np.zeros_like(_maybe_remove_leading_dim(features["aatype"]))
    if b_factors is None:
        b_factors = np.zeros_like(fold_output["final_atom_mask"])
    return FDProtein(
        letter_sequences=None,
        aatype=_maybe_remove_leading_dim(features["aatype"]),
        atom_positions=fold_output["final_atom_positions"],
        atom_mask=fold_output["final_atom_mask"],
        residue_index=_maybe_remove_leading_dim(features["residue_index"]),
        chain_index=chain_index,
        b_factors=b_factors,
        atomtypes=None,
    )
 def write_pdb_single(
    result: ModelOutput,
    out_path: str = os.path.join("test_results", "debug.pdb"),
    model: int = 1,
    b_factors: Optional[np.ndarray] = None,
 ):
    """Write a single model to a PDB file.
    :param result: Model results batch.
    :param out_path: Output path.
    :param model: Model ID.
    :param b_factors: Optional B-factors.
    """
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    protein = from_prediction(result["features"], result, b_factors=b_factors)
    out_string = to_pdb(protein, model=model)
    with open(out_path, "w") as of:
        of.write(out_string)
 def write_pdb_models(
    results,
    out_path: str = os.path.join("test_results", "debug.pdb"),
    b_factors: Optional[np.ndarray] = None,
 ):
    """Write multiple models to a PDB file.
    :param results: Model results.
    :param out_path: Output path.
    :param b_factors: Optional B-factors.
    """
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    with open(out_path, "w") as of:
        for mid, result in enumerate(results):
            protein = from_prediction(
                result["features"],
                result,
                b_factors=b_factors[mid] if b_factors is not None else None,
            )
            out_string = to_pdb(protein, model=mid + 1)
            of.write(out_string)
        of.write("END")
 def write_conformer_sdf(
    mol: Chem.Mol,
    confs: Optional[np.array] = None,
    out_path: str = os.path.join("test_results", "debug.sdf"),
 ):
    """Write a molecule with conformers to an SDF file.
    :param mol: RDKit molecule.
    :param confs: Conformers.
    :param out_path: Output path.
    """
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    if confs is None:
        w = Chem.SDWriter(out_path)
        w.write(mol)
        w.close()
        return 0
    mol.RemoveAllConformers()
    for i in range(len(confs)):
        conf = Chem.Conformer(mol.GetNumAtoms())
        for j in range(mol.GetNumAtoms()):
            x, y, z = confs[i, j].tolist()
            conf.SetAtomPosition(j, Point3D(x, y, z))
        mol.AddConformer(conf, assignId=True)
    w = Chem.SDWriter(out_path)
    try:
        for cid in range(len(confs)):
            w.write(mol, confId=cid)
    except Exception as e:
        w.SetKekulize(False)
        for cid in range(len(confs)):
            w.write(mol, confId=cid)
    w.close()
    return 0
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,2 @@`
							`# this file is required for inferring the project root directory`
							`# do not delete`
		`@@ -0,0 +1 @@`
							`# this file is needed here to include configs when building project as a package`
		`@@ -0,0 +1 @@`
							`_target_: lightning.fabric.plugins.environments.LightningEnvironment`