Initial commit: digital-patients pipeline (clean, no large files)
Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
This commit is contained in:
96
Dockerfile_vcf2prot
Normal file
96
Dockerfile_vcf2prot
Normal file
@@ -0,0 +1,96 @@
|
||||
#FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
|
||||
FROM rust:1.61
|
||||
|
||||
USER root
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
#SHELL [ "/bin/sh", "-s", ".", "$HOME/.cargo/env" ]
|
||||
WORKDIR /home
|
||||
RUN mkdir -p /home/omic
|
||||
WORKDIR /home/omic
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update -y && apt-get install -y software-properties-common --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
wget \
|
||||
ca-certificates \
|
||||
libopenblas-dev \
|
||||
libgsl-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Get Rust
|
||||
#RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain nightly -y
|
||||
#RUN source $HOME/.cargo/env
|
||||
#RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc
|
||||
|
||||
# Install vcf2prot
|
||||
RUN git clone https://github.com/ikmb/vcf2prot.git
|
||||
WORKDIR /home/omic/vcf2prot
|
||||
|
||||
RUN cargo build --release
|
||||
|
||||
WORKDIR /home/omic/vcf2prot/target/release
|
||||
|
||||
RUN export DEBUG_CPU_EXEC=TRUE
|
||||
RUN export INSPECT_TXP=TRUE
|
||||
RUN export INSPECT_INS_GEN=TRUE
|
||||
|
||||
#Test
|
||||
RUN ./vcf2prot -h
|
||||
|
||||
WORKDIR /home/omic/vcf2prot
|
||||
RUN gunzip examples/reference_sequences.fasta.gz
|
||||
|
||||
COPY reference.fasta .
|
||||
|
||||
#install bcftools
|
||||
WORKDIR /home/omic/
|
||||
|
||||
# install conda
|
||||
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
mkdir -p /opt && \
|
||||
bash miniconda.sh -b -p /opt/conda && \
|
||||
rm miniconda.sh && \
|
||||
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
|
||||
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
|
||||
echo "conda activate base" >> ~/.bashrc && \
|
||||
find /opt/conda/ -follow -type f -name '*.a' -delete && \
|
||||
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
|
||||
/opt/conda/bin/conda clean -afy
|
||||
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
|
||||
## update conda
|
||||
RUN conda update -y -n base -c defaults conda
|
||||
|
||||
# main conda env (vcf2prot)
|
||||
RUN conda create -n vcf2prot python=3.9
|
||||
ENV PATH "$PATH:/opt/conda/envs/vcf2prot/bin"
|
||||
RUN echo "source activate vcf2prot" >> ~/.bashrc
|
||||
|
||||
RUN conda install -y -n vcf2prot -c bioconda openssl
|
||||
RUN conda install -y -n vcf2prot -c conda-forge libgcc-ng
|
||||
RUN conda install -y -n vcf2prot -c bioconda bcftools==1.20
|
||||
|
||||
#Install beagle
|
||||
#RUN wget https://faculty.washington.edu/browning/beagle/beagle.28Jun21.220.jar
|
||||
|
||||
#Get data
|
||||
RUN wget https://ftp.ensembl.org/pub/release-112/gff3/homo_sapiens/Homo_sapiens.GRCh38.112.gff3.gz
|
||||
RUN gunzip Homo_sapiens.GRCh38.112.gff3.gz
|
||||
#RUN wget http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
|
||||
#RUN gunzip hg38.fa.gz
|
||||
RUN wget https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.toplevel.fa.gz
|
||||
RUN gunzip Homo_sapiens.GRCh38.dna.toplevel.fa.gz
|
||||
|
||||
WORKDIR /home/omic/vcf2prot
|
||||
|
||||
|
||||
RUN conda install -y -n vcf2prot numpy pandas
|
||||
|
||||
|
||||
COPY MANE_transcipts_reference.fasta .
|
||||
Reference in New Issue
Block a user