Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
65 lines
2.1 KiB
Plaintext
65 lines
2.1 KiB
Plaintext
FROM ubuntu:latest
|
|
|
|
ARG DEBIAN_FRONTEND=noninteractive
|
|
USER root
|
|
|
|
# Install gradle and JDK
|
|
RUN apt -y update && apt-get -y update
|
|
RUN apt install -y build-essential openjdk-8-jdk wget git python3 python3-pip tabix
|
|
|
|
# install conda
|
|
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
|
mkdir -p /opt && \
|
|
sh miniconda.sh -b -p /opt/conda && \
|
|
rm miniconda.sh && \
|
|
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
|
|
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
|
|
echo "conda activate base" >> ~/.bashrc && \
|
|
find /opt/conda/ -follow -type f -name '*.a' -delete && \
|
|
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
|
|
/opt/conda/bin/conda clean -afy
|
|
|
|
ENV PATH /opt/conda/bin:$PATH
|
|
|
|
# main conda env (synthea)
|
|
RUN conda create -n synthea
|
|
ENV PATH "$PATH:/opt/conda/envs/synthea/bin"
|
|
RUN echo "source activate synthea" >> ~/.bashrc
|
|
|
|
# Install pandas and numpy
|
|
RUN python3 -m pip install --upgrade pip
|
|
RUN python3 -m pip install pandas numpy fuzzywuzzy python-Levenshtein requests
|
|
|
|
# Install synthea from latest release
|
|
RUN wget https://github.com/synthetichealth/synthea/archive/refs/tags/master-branch-latest.tar.gz
|
|
RUN tar xzf master-branch-latest.tar.gz && mv synthea-master-branch-latest synthea
|
|
WORKDIR /synthea
|
|
|
|
ARG PATH $PATH:/synthea
|
|
ENV PATH $PATH:/synthea
|
|
|
|
RUN conda config --add channels bioconda
|
|
RUN conda config --add channels conda-forge
|
|
|
|
# Install samtools
|
|
RUN conda install -n synthea -c bioconda samtools
|
|
|
|
# Install UCSC Liftover
|
|
RUN conda install -n synthea -c bioconda ucsc-liftover
|
|
RUN conda install -n synthea -c bioconda gatk4
|
|
RUN conda install -n synthea -c bioconda bedtools
|
|
RUN apt-get -y install vcftools
|
|
|
|
# DNA to Protein Sequence code
|
|
WORKDIR /synthea
|
|
RUN git clone https://github.com/prestevez/dna2proteins.git
|
|
RUN mv dna2proteins/dna2proteins.py .
|
|
RUN rm -r dna2proteins
|
|
RUN apt install -y 2to3
|
|
RUN 2to3 -w dna2proteins.py
|
|
|
|
#Test
|
|
# RUN ./gradlew build check test
|
|
RUN ./run_synthea -p 10 --exporter.csv.export true
|
|
# use -a for age range e.g. 30-40 and -g for gender M/F
|