Initial commit: digital-patients pipeline (clean, no large files)
Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
This commit is contained in:
92
Dockerfile_borzoi
Normal file
92
Dockerfile_borzoi
Normal file
@@ -0,0 +1,92 @@
|
||||
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
|
||||
|
||||
USER root
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
WORKDIR /home
|
||||
RUN mkdir -p /home/omic
|
||||
WORKDIR /home/omic
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
wget \
|
||||
cuda-toolkit-12-2 \
|
||||
ca-certificates \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh \
|
||||
&& bash miniconda.sh -b -p /opt/conda \
|
||||
&& rm miniconda.sh \
|
||||
&& ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \
|
||||
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \
|
||||
&& echo "conda activate base" >> ~/.bashrc \
|
||||
&& find /opt/conda/ -follow -type f -name '*.a' -delete \
|
||||
&& find /opt/conda/ -follow -type f -name '*.js.map' -delete \
|
||||
&& /opt/conda/bin/conda clean -afy
|
||||
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
|
||||
# main conda env (borzoi)
|
||||
RUN conda create -n borzoi python=3.9
|
||||
ENV PATH "$PATH:/opt/conda/envs/borzoi/bin"
|
||||
RUN echo "source activate borzoi" >> ~/.bashrc
|
||||
RUN conda clean --all -f -y
|
||||
|
||||
# Install borzoi
|
||||
RUN git clone https://github.com/calico/baskerville.git
|
||||
WORKDIR /home/omic/baskerville
|
||||
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install -e .
|
||||
WORKDIR /home/omic
|
||||
RUN git clone https://github.com/calico/borzoi.git
|
||||
WORKDIR /home/omic/borzoi
|
||||
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install -e .
|
||||
|
||||
# Install pretrained models
|
||||
RUN mkdir -p /home/omic/borzoi/saved_models
|
||||
WORKDIR /home/omic/borzoi/saved_models
|
||||
RUN mkdir -p f0 && mkdir -p f1 && mkdir -p f2 && mkdir -p f3
|
||||
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f0/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f0/model0_best.h5
|
||||
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f1/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f1/model0_best.h5
|
||||
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f2/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5
|
||||
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f3/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5
|
||||
|
||||
WORKDIR /home/omic/borzoi
|
||||
|
||||
# Get hg38
|
||||
RUN wget -O - https://hgdownload2.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38.fa
|
||||
|
||||
# Get other supporting data
|
||||
RUN wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > gencode41_basic_nort.gtf
|
||||
RUN wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz
|
||||
RUN wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz
|
||||
|
||||
# Index the reference
|
||||
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install pyfaidx
|
||||
RUN /opt/conda/envs/borzoi/bin/python3 -c "import pyfaidx; pyfaidx.Faidx('hg38.fa')"
|
||||
|
||||
#Install samtools
|
||||
RUN apt-get update -y && apt-get install -y libz-dev liblzma-dev libbz2-dev libcurl4-gnutls-dev
|
||||
WORKDIR /home/omic
|
||||
RUN git clone --recurse-submodules https://github.com/samtools/htslib.git
|
||||
RUN git clone https://github.com/samtools/bcftools.git
|
||||
WORKDIR /home/omic/bcftools
|
||||
# The following is optional:
|
||||
#RUN autoheader && autoconf && ./configure --enable-libgsl --enable-perl-filters
|
||||
RUN make
|
||||
RUN export BCFTOOLS_PLUGINS=/home/omic/bcftools
|
||||
|
||||
WORKDIR /home/omic/borzoi
|
||||
|
||||
COPY ncbiRefSeq_bigger.csv .
|
||||
COPY ncbiRefSeq_subset.pickle .
|
||||
|
||||
#copy MANE data
|
||||
COPY prot_bigger.csv .
|
||||
COPY prot_subset.pickle .
|
||||
COPY TPM_NO_MUTATIONS.csv .
|
||||
# test
|
||||
Reference in New Issue
Block a user