Files
digital-patients/Dockerfile_borzoi
Olamide Isreal 9e6a16c19b Initial commit: digital-patients pipeline (clean, no large files)
Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
2026-03-26 15:15:23 +01:00

93 lines
3.5 KiB
Plaintext

FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
USER root
SHELL ["/bin/bash", "-c"]
WORKDIR /home
RUN mkdir -p /home/omic
WORKDIR /home/omic
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y && apt-get install -y --no-install-recommends \
build-essential \
cmake \
curl \
git \
wget \
cuda-toolkit-12-2 \
ca-certificates \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh \
&& bash miniconda.sh -b -p /opt/conda \
&& rm miniconda.sh \
&& ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \
&& echo "conda activate base" >> ~/.bashrc \
&& find /opt/conda/ -follow -type f -name '*.a' -delete \
&& find /opt/conda/ -follow -type f -name '*.js.map' -delete \
&& /opt/conda/bin/conda clean -afy
ENV PATH /opt/conda/bin:$PATH
# main conda env (borzoi)
RUN conda create -n borzoi python=3.9
ENV PATH "$PATH:/opt/conda/envs/borzoi/bin"
RUN echo "source activate borzoi" >> ~/.bashrc
RUN conda clean --all -f -y
# Install borzoi
RUN git clone https://github.com/calico/baskerville.git
WORKDIR /home/omic/baskerville
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install -e .
WORKDIR /home/omic
RUN git clone https://github.com/calico/borzoi.git
WORKDIR /home/omic/borzoi
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install -e .
# Install pretrained models
RUN mkdir -p /home/omic/borzoi/saved_models
WORKDIR /home/omic/borzoi/saved_models
RUN mkdir -p f0 && mkdir -p f1 && mkdir -p f2 && mkdir -p f3
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f0/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f0/model0_best.h5
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f1/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f1/model0_best.h5
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f2/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f3/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5
WORKDIR /home/omic/borzoi
# Get hg38
RUN wget -O - https://hgdownload2.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38.fa
# Get other supporting data
RUN wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > gencode41_basic_nort.gtf
RUN wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz
RUN wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz
# Index the reference
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install pyfaidx
RUN /opt/conda/envs/borzoi/bin/python3 -c "import pyfaidx; pyfaidx.Faidx('hg38.fa')"
#Install samtools
RUN apt-get update -y && apt-get install -y libz-dev liblzma-dev libbz2-dev libcurl4-gnutls-dev
WORKDIR /home/omic
RUN git clone --recurse-submodules https://github.com/samtools/htslib.git
RUN git clone https://github.com/samtools/bcftools.git
WORKDIR /home/omic/bcftools
# The following is optional:
#RUN autoheader && autoconf && ./configure --enable-libgsl --enable-perl-filters
RUN make
RUN export BCFTOOLS_PLUGINS=/home/omic/bcftools
WORKDIR /home/omic/borzoi
COPY ncbiRefSeq_bigger.csv .
COPY ncbiRefSeq_subset.pickle .
#copy MANE data
COPY prot_bigger.csv .
COPY prot_subset.pickle .
COPY TPM_NO_MUTATIONS.csv .
# test