Initial commit: digital-patients pipeline (clean, no large files)

Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
This commit is contained in:
2026-03-26 15:15:23 +01:00
commit 9e6a16c19b
45 changed files with 7207 additions and 0 deletions

View File

@@ -0,0 +1,79 @@
ARG CUDA=11.7
FROM nvidia/cuda:${CUDA}.1-cudnn8-devel-ubuntu22.04
USER root
SHELL ["/bin/bash", "-c"]
WORKDIR /home
RUN mkdir -p /home/omic
WORKDIR /home/omic
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y && apt-get install -y --no-install-recommends \
build-essential \
cmake \
curl \
git \
wget \
ca-certificates \
hmmer \
kalign \
tzdata \
python3-pip \
python3-dev \
python3-opencv \
libglib2.0-0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Add the NVIDIA GPG key directly
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
RUN dpkg -i cuda-keyring_1.0-1_all.deb && rm cuda-keyring_1.0-1_all.deb
RUN apt-get -y update && \
apt-get install -y --no-install-recommends cuda-command-line-tools-11-7
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh \
&& bash miniconda.sh -b -p /opt/conda \
&& rm miniconda.sh \
&& ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \
&& echo "conda activate base" >> ~/.bashrc \
&& find /opt/conda/ -follow -type f -name '*.a' -delete \
&& find /opt/conda/ -follow -type f -name '*.js.map' -delete \
&& /opt/conda/bin/conda clean -afy
ENV PATH /opt/conda/bin:$PATH
RUN conda update -y -n base -c defaults conda
# main conda env (rna2protexpresson)
RUN conda create -n rna2protexpresson python=3.9
ENV PATH "$PATH:/opt/conda/envs/rna2protexpresson/bin"
RUN echo "source activate rna2protexpresson" >> ~/.bashrc
RUN conda clean --all -f -y
# Upgrade apt-get
RUN apt-get -y update && apt-get install -y ca-certificates && update-ca-certificates
# Upgrade pip
RUN python3 -m pip install --upgrade pip
# Install PyTorch and torchvision
RUN /opt/conda/envs/rna2protexpresson/bin/python3 -m pip install torch torchvision torchaudio -f https://download.pytorch.org/whl/cu111/torch_stable.html
#Inatall packages
RUN /opt/conda/envs/rna2protexpresson/bin/python3 -m pip install axial_attention thefuzz
RUN conda install -n rna2protexpresson -c anaconda pandas numpy scikit-learn joblib
RUN /opt/conda/envs/rna2protexpresson/bin/python3 -m pip install argparse
# make rna2protexpresson
RUN mkdir -p /home/omic/rna2protexpresson
WORKDIR /home/omic/rna2protexpresson
# Package into python script for running in nextflow
COPY rna2protexpression.py /home/omic/rna2protexpression/rna2protexpression.py
RUN chmod +x /home/omic/rna2protexpression/rna2protexpression.py
#COPY model &˛ preprocessing
COPY tissue2number.joblib .
COPY ensg2number.joblib .
COPY go_df_work.csv .
COPY go_term_protein_expression_model.pth .