Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
93 lines
3.5 KiB
Plaintext
93 lines
3.5 KiB
Plaintext
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
|
|
|
|
USER root
|
|
|
|
SHELL ["/bin/bash", "-c"]
|
|
WORKDIR /home
|
|
RUN mkdir -p /home/omic
|
|
WORKDIR /home/omic
|
|
ARG DEBIAN_FRONTEND=noninteractive
|
|
|
|
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
cmake \
|
|
curl \
|
|
git \
|
|
wget \
|
|
cuda-toolkit-12-2 \
|
|
ca-certificates \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh \
|
|
&& bash miniconda.sh -b -p /opt/conda \
|
|
&& rm miniconda.sh \
|
|
&& ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \
|
|
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \
|
|
&& echo "conda activate base" >> ~/.bashrc \
|
|
&& find /opt/conda/ -follow -type f -name '*.a' -delete \
|
|
&& find /opt/conda/ -follow -type f -name '*.js.map' -delete \
|
|
&& /opt/conda/bin/conda clean -afy
|
|
|
|
ENV PATH /opt/conda/bin:$PATH
|
|
|
|
# main conda env (borzoi)
|
|
RUN conda create -n borzoi python=3.9
|
|
ENV PATH "$PATH:/opt/conda/envs/borzoi/bin"
|
|
RUN echo "source activate borzoi" >> ~/.bashrc
|
|
RUN conda clean --all -f -y
|
|
|
|
# Install borzoi
|
|
RUN git clone https://github.com/calico/baskerville.git
|
|
WORKDIR /home/omic/baskerville
|
|
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install -e .
|
|
WORKDIR /home/omic
|
|
RUN git clone https://github.com/calico/borzoi.git
|
|
WORKDIR /home/omic/borzoi
|
|
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install -e .
|
|
|
|
# Install pretrained models
|
|
RUN mkdir -p /home/omic/borzoi/saved_models
|
|
WORKDIR /home/omic/borzoi/saved_models
|
|
RUN mkdir -p f0 && mkdir -p f1 && mkdir -p f2 && mkdir -p f3
|
|
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f0/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f0/model0_best.h5
|
|
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f1/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f1/model0_best.h5
|
|
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f2/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5
|
|
RUN wget --no-check-certificate -O /home/omic/borzoi/saved_models/f3/model0_best.h5 https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5
|
|
|
|
WORKDIR /home/omic/borzoi
|
|
|
|
# Get hg38
|
|
RUN wget -O - https://hgdownload2.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38.fa
|
|
|
|
# Get other supporting data
|
|
RUN wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > gencode41_basic_nort.gtf
|
|
RUN wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz
|
|
RUN wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz
|
|
|
|
# Index the reference
|
|
RUN /opt/conda/envs/borzoi/bin/python3 -m pip install pyfaidx
|
|
RUN /opt/conda/envs/borzoi/bin/python3 -c "import pyfaidx; pyfaidx.Faidx('hg38.fa')"
|
|
|
|
#Install samtools
|
|
RUN apt-get update -y && apt-get install -y libz-dev liblzma-dev libbz2-dev libcurl4-gnutls-dev
|
|
WORKDIR /home/omic
|
|
RUN git clone --recurse-submodules https://github.com/samtools/htslib.git
|
|
RUN git clone https://github.com/samtools/bcftools.git
|
|
WORKDIR /home/omic/bcftools
|
|
# The following is optional:
|
|
#RUN autoheader && autoconf && ./configure --enable-libgsl --enable-perl-filters
|
|
RUN make
|
|
RUN export BCFTOOLS_PLUGINS=/home/omic/bcftools
|
|
|
|
WORKDIR /home/omic/borzoi
|
|
|
|
COPY ncbiRefSeq_bigger.csv .
|
|
COPY ncbiRefSeq_subset.pickle .
|
|
|
|
#copy MANE data
|
|
COPY prot_bigger.csv .
|
|
COPY prot_subset.pickle .
|
|
COPY TPM_NO_MUTATIONS.csv .
|
|
# test
|