#!/usr/bin/env nextflow nextflow.enable.dsl=2 /* * Synthea Disease Module Generator Pipeline * * A Nextflow pipeline to generate and manage Synthea disease modules */ // Load API key from .env file if it exists def envFile = file('.env') if (envFile.exists()) { envFile.eachLine { line -> def (key, value) = line.tokenize('=') if (key && value && key.trim() == 'ANTHROPIC_API_KEY') { params.anthropic_api_key = value.trim() } } } // Default parameters params.disease_name = null // Disease name to generate patients for params.output_dir = "output" // Output directory params.modules_dir = "src/main/resources/modules" // Directory for module files params.population = 100 // Number of patients to generate params.gender = 0.5 // Decimal representing proportion female (0.0-1.0) params.min_age = 0 // Minimum age of generated patients params.max_age = 90 // Maximum age of generated patients params.seed = null // Random seed for reproducibility params.help = false // Show help message // Show help message if (params.help) { log.info """ Synthea Patient Generator ======================== Usage: nextflow run main.nf --disease_name "Disease Name" Required Arguments: --disease_name Disease name to generate patients for Optional Arguments: --modules_dir Module directory (default: modules) --output_dir Output directory (default: output) --population Number of patients (default: 100) --gender Gender ratio - female proportion 0.0-1.0 (default: 0.5) --min_age Minimum age (default: 0) --max_age Maximum age (default: 90) --seed Random seed (default: random) """ exit 0 } // Validate required parameters if (!params.disease_name && !params.help) { error "Disease name is required. Please specify with --disease_name" } // Process to check if module exists and generate it if needed process checkAndGetModule { container 'synthea-module-generator' publishDir "${params.modules_dir}", mode: 'copy' input: val diseaseName output: path "*.json", emit: module_file script: def moduleFilename = diseaseName.toLowerCase().replaceAll(' ', '_') + '.json' def fullPath = "/app/src/main/resources/modules/${moduleFilename}" """ echo "Looking for module at ${fullPath}" if [ -f "${fullPath}" ]; then echo "Module exists, copying..." cp "${fullPath}" . else echo "Module not found, generating..." python3 /app/module_generator/module_generator.py --disease "${diseaseName}" --output "${moduleFilename}" if [ -f "${moduleFilename}" ]; then echo "Successfully generated module" else echo "Error: Failed to generate module" exit 1 fi fi """ } // Process to generate synthetic patients process generatePatients { container 'synthea-module-generator' publishDir "${params.output_dir}/${diseaseName.toLowerCase().replaceAll(' ', '_')}", mode: 'copy' input: val diseaseName path moduleFile output: path "m/*", optional: true path "f/*", optional: true script: def moduleBasename = diseaseName.toLowerCase().replaceAll(' ', '_') def genderArg = params.gender < 0.5 ? "M" : (params.gender > 0.5 ? "F" : "B") def seedValue = params.seed ?: new Random().nextInt(1000000) """ # Copy module and run Synthea cp "${moduleFile}" /app/modules/ cd /app && ./run_synthea -p ${params.population} -g ${genderArg} -m ${moduleBasename} -a ${params.min_age}-${params.max_age} -s ${seedValue} # Organize output by gender mkdir -p m f find /app/output/fhir -type f -name "*.json" ! -name "*hospital*" ! -name "*practitioner*" | xargs grep -l '"gender":"male"' | xargs -I{} cp {} m/ find /app/output/fhir -type f -name "*.json" ! -name "*hospital*" ! -name "*practitioner*" | xargs grep -l '"gender":"female"' | xargs -I{} cp {} f/ """ } // Define workflow workflow { // First check if the module exists checkAndGetModule(params.disease_name) // Then generate patients generatePatients(params.disease_name, checkAndGetModule.out.module_file) }