Initial commit: digital-patients pipeline (clean, no large files)

Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
2026-03-26 15:15:23 +01:00
commit 9e6a16c19b
45 changed files with 7207 additions and 0 deletions
--- a/79
+++ b/79
@@ -0,0 +1,79 @@
+ARG CUDA=11.7
+FROM nvidia/cuda:${CUDA}.1-cudnn8-devel-ubuntu22.04
+
+USER root
+SHELL ["/bin/bash", "-c"]
+
+WORKDIR /home
+RUN mkdir -p /home/omic
+WORKDIR /home/omic
+
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
+    build-essential \
+    cmake \
+    curl \
+    git \
+    wget \
+    ca-certificates \
+    hmmer \
+    kalign \
+    tzdata \
+    python3-pip \
+    python3-dev \
+    python3-opencv \
+    libglib2.0-0 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Add the NVIDIA GPG key directly
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
+RUN dpkg -i cuda-keyring_1.0-1_all.deb && rm cuda-keyring_1.0-1_all.deb
+RUN apt-get -y update && \
+    apt-get install -y --no-install-recommends cuda-command-line-tools-11-7
+
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh \
+    && bash miniconda.sh -b -p /opt/conda \
+    && rm miniconda.sh \
+    && ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \
+    && echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \
+    && echo "conda activate base" >> ~/.bashrc \
+    && find /opt/conda/ -follow -type f -name '*.a' -delete \
+    && find /opt/conda/ -follow -type f -name '*.js.map' -delete \
+    && /opt/conda/bin/conda clean -afy
+
+ENV PATH /opt/conda/bin:$PATH
+RUN conda update -y -n base -c defaults conda
+
+# main conda env (rna2protexpresson)
+RUN conda create -n rna2protexpresson python=3.9
+ENV PATH "$PATH:/opt/conda/envs/rna2protexpresson/bin"
+RUN echo "source activate rna2protexpresson" >> ~/.bashrc
+RUN conda clean --all -f -y
+
+# Upgrade apt-get                                                                                                                
+RUN apt-get -y update && apt-get install -y ca-certificates && update-ca-certificates   
+
+# Upgrade pip
+RUN python3 -m pip install --upgrade pip
+
+# Install PyTorch and torchvision
+RUN /opt/conda/envs/rna2protexpresson/bin/python3 -m pip install torch torchvision torchaudio -f https://download.pytorch.org/whl/cu111/torch_stable.html
+#Inatall packages
+RUN /opt/conda/envs/rna2protexpresson/bin/python3 -m pip install axial_attention thefuzz
+RUN conda install -n rna2protexpresson -c anaconda pandas numpy scikit-learn joblib
+RUN /opt/conda/envs/rna2protexpresson/bin/python3 -m pip install argparse
+
+# make rna2protexpresson
+RUN mkdir -p /home/omic/rna2protexpresson 
+WORKDIR /home/omic/rna2protexpresson
+
+# Package into python script for running in nextflow
+COPY rna2protexpression.py /home/omic/rna2protexpression/rna2protexpression.py
+RUN chmod +x /home/omic/rna2protexpression/rna2protexpression.py
+
+#COPY model &˛ preprocessing 
+COPY tissue2number.joblib .
+COPY ensg2number.joblib .
+COPY go_df_work.csv . 
+COPY go_term_protein_expression_model.pth .