Trying to fix basic functionality again.
This commit is contained in:
182
scripts/run_pipeline.sh
Executable file
182
scripts/run_pipeline.sh
Executable file
@@ -0,0 +1,182 @@
|
||||
#!/bin/bash
|
||||
# run_pipeline.sh
|
||||
#
|
||||
# This script runs the Nextflow pipeline for generating disease modules and synthetic patients
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
# Default values
|
||||
DISEASE_NAME=""
|
||||
FORCE_GENERATE=false
|
||||
GENERATE_PATIENTS=false
|
||||
POPULATION=100
|
||||
GENDER=0.5
|
||||
MIN_AGE=0
|
||||
MAX_AGE=90
|
||||
SEED=""
|
||||
ANALYZE_DATA=false
|
||||
REPORT_FORMAT="html"
|
||||
VERIFY_DOCKER=true
|
||||
|
||||
# Parse command line arguments
|
||||
function show_help {
|
||||
echo "Usage: $0 [options] --disease \"Disease Name\""
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --disease, -d NAME Disease name to generate a module for (required)"
|
||||
echo " --force-generate, -f Force regeneration of disease module even if it exists"
|
||||
echo " --patients, -p Generate synthetic patients (default: false)"
|
||||
echo " --population, -n NUM Number of patients to generate (default: 100)"
|
||||
echo " --gender, -g VALUE Gender distribution (0-1 for % female, default: 0.5)"
|
||||
echo " --min-age, -a NUM Minimum patient age (default: 0)"
|
||||
echo " --max-age, -m NUM Maximum patient age (default: 90)"
|
||||
echo " --seed, -s SEED Random seed for reproducibility"
|
||||
echo " --analyze, -A Analyze patient data after generation"
|
||||
echo " --report-format, -r FMT Report format for analysis (html, json, csv, default: html)"
|
||||
echo " --skip-docker-check Skip Docker container verification"
|
||||
echo " --help, -h Show this help message"
|
||||
echo ""
|
||||
echo "Example:"
|
||||
echo " $0 --disease \"Multiple Sclerosis\" --patients --population 50"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--disease|-d)
|
||||
DISEASE_NAME="$2"
|
||||
shift 2
|
||||
;;
|
||||
--force-generate|-f)
|
||||
FORCE_GENERATE=true
|
||||
shift
|
||||
;;
|
||||
--patients|-p)
|
||||
GENERATE_PATIENTS=true
|
||||
shift
|
||||
;;
|
||||
--population|-n)
|
||||
POPULATION="$2"
|
||||
shift 2
|
||||
;;
|
||||
--gender|-g)
|
||||
GENDER="$2"
|
||||
shift 2
|
||||
;;
|
||||
--min-age|-a)
|
||||
MIN_AGE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--max-age|-m)
|
||||
MAX_AGE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--seed|-s)
|
||||
SEED="$2"
|
||||
shift 2
|
||||
;;
|
||||
--analyze|-A)
|
||||
ANALYZE_DATA=true
|
||||
shift
|
||||
;;
|
||||
--report-format|-r)
|
||||
REPORT_FORMAT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--skip-docker-check)
|
||||
VERIFY_DOCKER=false
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check if disease name is provided
|
||||
if [ -z "$DISEASE_NAME" ]; then
|
||||
echo "ERROR: Disease name is required!"
|
||||
show_help
|
||||
fi
|
||||
|
||||
# Check if Docker container is running if we need to generate patients
|
||||
if [ "$VERIFY_DOCKER" = true ] && [ "$GENERATE_PATIENTS" = true ]; then
|
||||
CONTAINER_RUNNING=$(docker ps | grep synthea | wc -l)
|
||||
if [ "$CONTAINER_RUNNING" -eq 0 ]; then
|
||||
echo "No Synthea Docker containers are running!"
|
||||
echo "Running environment setup script to start containers..."
|
||||
|
||||
if [ -f "./scripts/prepare_environment.sh" ]; then
|
||||
./scripts/prepare_environment.sh
|
||||
elif [ -f "./prepare_environment.sh" ]; then
|
||||
./prepare_environment.sh
|
||||
else
|
||||
echo "ERROR: prepare_environment.sh not found!"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Synthea Docker container is running. Proceeding with pipeline execution."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create params.json file with our configuration
|
||||
echo "Creating params.json file with pipeline configuration..."
|
||||
cat > params.json << EOF
|
||||
{
|
||||
"disease_name": "$DISEASE_NAME",
|
||||
"modules_dir": "modules",
|
||||
"output_dir": "output",
|
||||
"generate_patients": $GENERATE_PATIENTS,
|
||||
"population": $POPULATION,
|
||||
"gender": $GENDER,
|
||||
"min_age": $MIN_AGE,
|
||||
"max_age": $MAX_AGE,
|
||||
"analyze_patient_data": $ANALYZE_DATA,
|
||||
"report_format": "$REPORT_FORMAT",
|
||||
"force_generate": $FORCE_GENERATE,
|
||||
"publish_dir": "published_output"
|
||||
EOF
|
||||
|
||||
# Add seed if provided
|
||||
if [ ! -z "$SEED" ]; then
|
||||
echo ", \"seed\": $SEED" >> params.json
|
||||
fi
|
||||
|
||||
# Close JSON object
|
||||
echo "}" >> params.json
|
||||
|
||||
# Display execution details
|
||||
echo "=================================================="
|
||||
echo "Running Synthea Pipeline for: $DISEASE_NAME"
|
||||
echo "Generate patients: $GENERATE_PATIENTS"
|
||||
if [ "$GENERATE_PATIENTS" = true ]; then
|
||||
echo "Population: $POPULATION"
|
||||
echo "Gender ratio (proportion female): $GENDER"
|
||||
echo "Age range: $MIN_AGE-$MAX_AGE"
|
||||
if [ ! -z "$SEED" ]; then
|
||||
echo "Random seed: $SEED"
|
||||
fi
|
||||
fi
|
||||
echo "Force module generation: $FORCE_GENERATE"
|
||||
echo "Analyze patient data: $ANALYZE_DATA"
|
||||
echo "=================================================="
|
||||
|
||||
# Execute the Nextflow command
|
||||
echo "Starting Nextflow pipeline..."
|
||||
nextflow run main.nf
|
||||
|
||||
echo ""
|
||||
echo "Pipeline execution complete!"
|
||||
if [ "$GENERATE_PATIENTS" = true ]; then
|
||||
echo "Check the 'output' directory and 'published_output/$DISEASE_NAME_NORMALIZED' for generated patient data."
|
||||
if [ "$ANALYZE_DATA" = true ]; then
|
||||
echo "Analysis reports can be found in 'published_output/$DISEASE_NAME_NORMALIZED/analysis'."
|
||||
fi
|
||||
fi
|
||||
echo "Generated modules can be found in the 'modules' directory and 'published_output/modules'."
|
||||
echo ""
|
||||
Reference in New Issue
Block a user