182 lines
5.3 KiB
Bash
Executable File
182 lines
5.3 KiB
Bash
Executable File
#!/bin/bash
|
|
# run_pipeline.sh
|
|
#
|
|
# This script runs the Nextflow pipeline for generating disease modules and synthetic patients
|
|
|
|
set -e # Exit on error
|
|
|
|
# Default values
|
|
DISEASE_NAME=""
|
|
FORCE_GENERATE=false
|
|
GENERATE_PATIENTS=false
|
|
POPULATION=100
|
|
GENDER=0.5
|
|
MIN_AGE=0
|
|
MAX_AGE=90
|
|
SEED=""
|
|
ANALYZE_DATA=false
|
|
REPORT_FORMAT="html"
|
|
VERIFY_DOCKER=true
|
|
|
|
# Parse command line arguments
|
|
function show_help {
|
|
echo "Usage: $0 [options] --disease \"Disease Name\""
|
|
echo ""
|
|
echo "Options:"
|
|
echo " --disease, -d NAME Disease name to generate a module for (required)"
|
|
echo " --force-generate, -f Force regeneration of disease module even if it exists"
|
|
echo " --patients, -p Generate synthetic patients (default: false)"
|
|
echo " --population, -n NUM Number of patients to generate (default: 100)"
|
|
echo " --gender, -g VALUE Gender distribution (0-1 for % female, default: 0.5)"
|
|
echo " --min-age, -a NUM Minimum patient age (default: 0)"
|
|
echo " --max-age, -m NUM Maximum patient age (default: 90)"
|
|
echo " --seed, -s SEED Random seed for reproducibility"
|
|
echo " --analyze, -A Analyze patient data after generation"
|
|
echo " --report-format, -r FMT Report format for analysis (html, json, csv, default: html)"
|
|
echo " --skip-docker-check Skip Docker container verification"
|
|
echo " --help, -h Show this help message"
|
|
echo ""
|
|
echo "Example:"
|
|
echo " $0 --disease \"Multiple Sclerosis\" --patients --population 50"
|
|
exit 1
|
|
}
|
|
|
|
# Parse arguments
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--disease|-d)
|
|
DISEASE_NAME="$2"
|
|
shift 2
|
|
;;
|
|
--force-generate|-f)
|
|
FORCE_GENERATE=true
|
|
shift
|
|
;;
|
|
--patients|-p)
|
|
GENERATE_PATIENTS=true
|
|
shift
|
|
;;
|
|
--population|-n)
|
|
POPULATION="$2"
|
|
shift 2
|
|
;;
|
|
--gender|-g)
|
|
GENDER="$2"
|
|
shift 2
|
|
;;
|
|
--min-age|-a)
|
|
MIN_AGE="$2"
|
|
shift 2
|
|
;;
|
|
--max-age|-m)
|
|
MAX_AGE="$2"
|
|
shift 2
|
|
;;
|
|
--seed|-s)
|
|
SEED="$2"
|
|
shift 2
|
|
;;
|
|
--analyze|-A)
|
|
ANALYZE_DATA=true
|
|
shift
|
|
;;
|
|
--report-format|-r)
|
|
REPORT_FORMAT="$2"
|
|
shift 2
|
|
;;
|
|
--skip-docker-check)
|
|
VERIFY_DOCKER=false
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
show_help
|
|
;;
|
|
*)
|
|
echo "Unknown option: $1"
|
|
show_help
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Check if disease name is provided
|
|
if [ -z "$DISEASE_NAME" ]; then
|
|
echo "ERROR: Disease name is required!"
|
|
show_help
|
|
fi
|
|
|
|
# Check if Docker container is running if we need to generate patients
|
|
if [ "$VERIFY_DOCKER" = true ] && [ "$GENERATE_PATIENTS" = true ]; then
|
|
CONTAINER_RUNNING=$(docker ps | grep synthea | wc -l)
|
|
if [ "$CONTAINER_RUNNING" -eq 0 ]; then
|
|
echo "No Synthea Docker containers are running!"
|
|
echo "Running environment setup script to start containers..."
|
|
|
|
if [ -f "./scripts/prepare_environment.sh" ]; then
|
|
./scripts/prepare_environment.sh
|
|
elif [ -f "./prepare_environment.sh" ]; then
|
|
./prepare_environment.sh
|
|
else
|
|
echo "ERROR: prepare_environment.sh not found!"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "Synthea Docker container is running. Proceeding with pipeline execution."
|
|
fi
|
|
fi
|
|
|
|
# Create params.json file with our configuration
|
|
echo "Creating params.json file with pipeline configuration..."
|
|
cat > params.json << EOF
|
|
{
|
|
"disease_name": "$DISEASE_NAME",
|
|
"modules_dir": "modules",
|
|
"output_dir": "output",
|
|
"generate_patients": $GENERATE_PATIENTS,
|
|
"population": $POPULATION,
|
|
"gender": $GENDER,
|
|
"min_age": $MIN_AGE,
|
|
"max_age": $MAX_AGE,
|
|
"analyze_patient_data": $ANALYZE_DATA,
|
|
"report_format": "$REPORT_FORMAT",
|
|
"force_generate": $FORCE_GENERATE,
|
|
"publish_dir": "published_output"
|
|
EOF
|
|
|
|
# Add seed if provided
|
|
if [ ! -z "$SEED" ]; then
|
|
echo ", \"seed\": $SEED" >> params.json
|
|
fi
|
|
|
|
# Close JSON object
|
|
echo "}" >> params.json
|
|
|
|
# Display execution details
|
|
echo "=================================================="
|
|
echo "Running Synthea Pipeline for: $DISEASE_NAME"
|
|
echo "Generate patients: $GENERATE_PATIENTS"
|
|
if [ "$GENERATE_PATIENTS" = true ]; then
|
|
echo "Population: $POPULATION"
|
|
echo "Gender ratio (proportion female): $GENDER"
|
|
echo "Age range: $MIN_AGE-$MAX_AGE"
|
|
if [ ! -z "$SEED" ]; then
|
|
echo "Random seed: $SEED"
|
|
fi
|
|
fi
|
|
echo "Force module generation: $FORCE_GENERATE"
|
|
echo "Analyze patient data: $ANALYZE_DATA"
|
|
echo "=================================================="
|
|
|
|
# Execute the Nextflow command
|
|
echo "Starting Nextflow pipeline..."
|
|
nextflow run main.nf
|
|
|
|
echo ""
|
|
echo "Pipeline execution complete!"
|
|
if [ "$GENERATE_PATIENTS" = true ]; then
|
|
echo "Check the 'output' directory and 'published_output/$DISEASE_NAME_NORMALIZED' for generated patient data."
|
|
if [ "$ANALYZE_DATA" = true ]; then
|
|
echo "Analysis reports can be found in 'published_output/$DISEASE_NAME_NORMALIZED/analysis'."
|
|
fi
|
|
fi
|
|
echo "Generated modules can be found in the 'modules' directory and 'published_output/modules'."
|
|
echo "" |