#!/usr/bin/env nextflow nextflow.enable.dsl=2 /* * Synthea Disease Module Generator Pipeline * * A Nextflow pipeline to generate and manage Synthea disease modules */ // Load API key from .env file if it exists def envFile = file('.env') if (envFile.exists()) { envFile.eachLine { line -> def (key, value) = line.tokenize('=') if (key && value && key.trim() == 'ANTHROPIC_API_KEY') { params.anthropic_api_key = value.trim() } } } // Default parameters params.disease_name = null // Disease name to generate patients for params.output_dir = "/mnt/OmicNAS/private/old/olamide/synthea/output/new" // Output directory params.modules_dir = "src/main/resources/modules" // Directory for module files params.population = 100 // Number of patients to generate params.gender = 0.5 // Decimal representing proportion female (0.0-1.0) params.min_age = 0 // Minimum age of generated patients params.max_age = 90 // Maximum age of generated patients params.seed = null // Random seed for reproducibility params.help = false // Show help message // Show help message if (params.help) { log.info """ Synthea Patient Generator ======================== Usage: nextflow run main.nf --disease_name "Disease Name" Required Arguments: --disease_name Disease name to generate patients for Optional Arguments: --modules_dir Module directory (default: modules) --output_dir Output directory (default: output) --population Number of patients (default: 100) --gender Gender ratio - female proportion 0.0-1.0 (default: 0.5) --min_age Minimum age (default: 0) --max_age Maximum age (default: 90) --seed Random seed (default: random) """ exit 0 } // Validate required parameters if (!params.disease_name && !params.help) { error "Disease name is required. Please specify with --disease_name" } // Process to check if module exists and generate it if needed process checkAndGetModule { container 'synthea-module-generator' publishDir "${params.modules_dir}", mode: 'copy' input: val diseaseName output: path "*.json", emit: module_file script: // Use sanitized disease name for filenames - replace spaces with underscores and remove special chars def moduleFilename = diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '') + '.json' def fullPath = "/app/src/main/resources/modules/${moduleFilename}" """ echo "Looking for module at ${fullPath}" if [ -f "${fullPath}" ]; then echo "Module exists, copying..." cp "${fullPath}" . else echo "Module not found, generating..." # Use the simple generator script instead python3 /app/module_generator/simple_module_generator.py --disease "${diseaseName}" --output "${moduleFilename}" if [ -f "${moduleFilename}" ]; then echo "Successfully generated module" else echo "Error: Failed to generate module" exit 1 fi fi """ } // Process to generate synthetic patients process generatePatients { container 'synthea-module-generator' publishDir "${params.output_dir}/${diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '')}", mode: 'copy', failOnError: false input: val diseaseName path moduleFile output: path "m", optional: true path "f", optional: true path "module.json", optional: true path "README.txt", optional: true script: def moduleBasename = diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '') """ # Create directories mkdir -p m f # Copy the module file for reference cp "${moduleFile}" module.json # Create a README file with instructions cat > README.txt << EOF This directory contains the module for ${diseaseName}. To generate patients, run: ./generate_patients.sh ${moduleBasename} ${params.output_dir}/${moduleBasename}/patients 20 EOF # Create marker files touch m/.keep touch f/.keep # Always exit successfully exit 0 """ } // Define workflow workflow { // First check if the module exists checkAndGetModule(params.disease_name) // Then generate patients generatePatients(params.disease_name, checkAndGetModule.out.module_file) }