Initial commit: digital-patients pipeline (clean, no large files)
Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
This commit is contained in:
64
Dockerfile_synthea
Normal file
64
Dockerfile_synthea
Normal file
@@ -0,0 +1,64 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
USER root
|
||||
|
||||
# Install gradle and JDK
|
||||
RUN apt -y update && apt-get -y update
|
||||
RUN apt install -y build-essential openjdk-8-jdk wget git python3 python3-pip tabix
|
||||
|
||||
# install conda
|
||||
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
mkdir -p /opt && \
|
||||
sh miniconda.sh -b -p /opt/conda && \
|
||||
rm miniconda.sh && \
|
||||
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
|
||||
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
|
||||
echo "conda activate base" >> ~/.bashrc && \
|
||||
find /opt/conda/ -follow -type f -name '*.a' -delete && \
|
||||
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
|
||||
/opt/conda/bin/conda clean -afy
|
||||
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
|
||||
# main conda env (synthea)
|
||||
RUN conda create -n synthea
|
||||
ENV PATH "$PATH:/opt/conda/envs/synthea/bin"
|
||||
RUN echo "source activate synthea" >> ~/.bashrc
|
||||
|
||||
# Install pandas and numpy
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN python3 -m pip install pandas numpy fuzzywuzzy python-Levenshtein requests
|
||||
|
||||
# Install synthea from latest release
|
||||
RUN wget https://github.com/synthetichealth/synthea/archive/refs/tags/master-branch-latest.tar.gz
|
||||
RUN tar xzf master-branch-latest.tar.gz && mv synthea-master-branch-latest synthea
|
||||
WORKDIR /synthea
|
||||
|
||||
ARG PATH $PATH:/synthea
|
||||
ENV PATH $PATH:/synthea
|
||||
|
||||
RUN conda config --add channels bioconda
|
||||
RUN conda config --add channels conda-forge
|
||||
|
||||
# Install samtools
|
||||
RUN conda install -n synthea -c bioconda samtools
|
||||
|
||||
# Install UCSC Liftover
|
||||
RUN conda install -n synthea -c bioconda ucsc-liftover
|
||||
RUN conda install -n synthea -c bioconda gatk4
|
||||
RUN conda install -n synthea -c bioconda bedtools
|
||||
RUN apt-get -y install vcftools
|
||||
|
||||
# DNA to Protein Sequence code
|
||||
WORKDIR /synthea
|
||||
RUN git clone https://github.com/prestevez/dna2proteins.git
|
||||
RUN mv dna2proteins/dna2proteins.py .
|
||||
RUN rm -r dna2proteins
|
||||
RUN apt install -y 2to3
|
||||
RUN 2to3 -w dna2proteins.py
|
||||
|
||||
#Test
|
||||
# RUN ./gradlew build check test
|
||||
RUN ./run_synthea -p 10 --exporter.csv.export true
|
||||
# use -a for age range e.g. 30-40 and -g for gender M/F
|
||||
Reference in New Issue
Block a user