Files
digital-patients/Dockerfile_synthea
Olamide Isreal 9e6a16c19b Initial commit: digital-patients pipeline (clean, no large files)
Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
2026-03-26 15:15:23 +01:00

65 lines
2.1 KiB
Plaintext

FROM ubuntu:latest
ARG DEBIAN_FRONTEND=noninteractive
USER root
# Install gradle and JDK
RUN apt -y update && apt-get -y update
RUN apt install -y build-essential openjdk-8-jdk wget git python3 python3-pip tabix
# install conda
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
mkdir -p /opt && \
sh miniconda.sh -b -p /opt/conda && \
rm miniconda.sh && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc && \
find /opt/conda/ -follow -type f -name '*.a' -delete && \
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
/opt/conda/bin/conda clean -afy
ENV PATH /opt/conda/bin:$PATH
# main conda env (synthea)
RUN conda create -n synthea
ENV PATH "$PATH:/opt/conda/envs/synthea/bin"
RUN echo "source activate synthea" >> ~/.bashrc
# Install pandas and numpy
RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install pandas numpy fuzzywuzzy python-Levenshtein requests
# Install synthea from latest release
RUN wget https://github.com/synthetichealth/synthea/archive/refs/tags/master-branch-latest.tar.gz
RUN tar xzf master-branch-latest.tar.gz && mv synthea-master-branch-latest synthea
WORKDIR /synthea
ARG PATH $PATH:/synthea
ENV PATH $PATH:/synthea
RUN conda config --add channels bioconda
RUN conda config --add channels conda-forge
# Install samtools
RUN conda install -n synthea -c bioconda samtools
# Install UCSC Liftover
RUN conda install -n synthea -c bioconda ucsc-liftover
RUN conda install -n synthea -c bioconda gatk4
RUN conda install -n synthea -c bioconda bedtools
RUN apt-get -y install vcftools
# DNA to Protein Sequence code
WORKDIR /synthea
RUN git clone https://github.com/prestevez/dna2proteins.git
RUN mv dna2proteins/dna2proteins.py .
RUN rm -r dna2proteins
RUN apt install -y 2to3
RUN 2to3 -w dna2proteins.py
#Test
# RUN ./gradlew build check test
RUN ./run_synthea -p 10 --exporter.csv.export true
# use -a for age range e.g. 30-40 and -g for gender M/F