142 lines
4.5 KiB
Plaintext
142 lines
4.5 KiB
Plaintext
#!/usr/bin/env nextflow
|
|
|
|
nextflow.enable.dsl=2
|
|
|
|
/*
|
|
* Synthea Disease Module Generator Pipeline
|
|
*
|
|
* A Nextflow pipeline to generate and manage Synthea disease modules
|
|
*/
|
|
|
|
// Load API key from .env file if it exists
|
|
def envFile = file('.env')
|
|
if (envFile.exists()) {
|
|
envFile.eachLine { line ->
|
|
def (key, value) = line.tokenize('=')
|
|
if (key && value && key.trim() == 'ANTHROPIC_API_KEY') {
|
|
params.anthropic_api_key = value.trim()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Default parameters
|
|
params.disease_name = null // Disease name to generate patients for
|
|
params.output_dir = "/mnt/OmicNAS/private/old/olamide/synthea/output/new" // Output directory
|
|
params.modules_dir = "src/main/resources/modules" // Directory for module files
|
|
params.population = 100 // Number of patients to generate
|
|
params.gender = 0.5 // Decimal representing proportion female (0.0-1.0)
|
|
params.min_age = 0 // Minimum age of generated patients
|
|
params.max_age = 90 // Maximum age of generated patients
|
|
params.seed = null // Random seed for reproducibility
|
|
params.help = false // Show help message
|
|
|
|
// Show help message
|
|
if (params.help) {
|
|
log.info """
|
|
Synthea Patient Generator
|
|
========================
|
|
Usage: nextflow run main.nf --disease_name "Disease Name"
|
|
|
|
Required Arguments:
|
|
--disease_name Disease name to generate patients for
|
|
|
|
Optional Arguments:
|
|
--modules_dir Module directory (default: modules)
|
|
--output_dir Output directory (default: output)
|
|
--population Number of patients (default: 100)
|
|
--gender Gender ratio - female proportion 0.0-1.0 (default: 0.5)
|
|
--min_age Minimum age (default: 0)
|
|
--max_age Maximum age (default: 90)
|
|
--seed Random seed (default: random)
|
|
"""
|
|
exit 0
|
|
}
|
|
|
|
// Validate required parameters
|
|
if (!params.disease_name && !params.help) {
|
|
error "Disease name is required. Please specify with --disease_name"
|
|
}
|
|
|
|
// Process to check if module exists and generate it if needed
|
|
process checkAndGetModule {
|
|
container 'synthea-module-generator'
|
|
publishDir "${params.modules_dir}", mode: 'copy'
|
|
|
|
input:
|
|
val diseaseName
|
|
|
|
output:
|
|
path "*.json", emit: module_file
|
|
|
|
script:
|
|
// Use sanitized disease name for filenames - replace spaces with underscores and remove special chars
|
|
def moduleFilename = diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '') + '.json'
|
|
def fullPath = "/app/src/main/resources/modules/${moduleFilename}"
|
|
"""
|
|
echo "Looking for module at ${fullPath}"
|
|
if [ -f "${fullPath}" ]; then
|
|
echo "Module exists, copying..."
|
|
cp "${fullPath}" .
|
|
else
|
|
echo "Module not found, generating..."
|
|
# Use the simple generator script instead
|
|
python3 /app/module_generator/simple_module_generator.py --disease "${diseaseName}" --output "${moduleFilename}"
|
|
if [ -f "${moduleFilename}" ]; then
|
|
echo "Successfully generated module"
|
|
else
|
|
echo "Error: Failed to generate module"
|
|
exit 1
|
|
fi
|
|
fi
|
|
"""
|
|
}
|
|
|
|
// Process to generate synthetic patients
|
|
process generatePatients {
|
|
container 'synthea-module-generator'
|
|
publishDir "${params.output_dir}/${diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '')}", mode: 'copy', failOnError: false
|
|
|
|
input:
|
|
val diseaseName
|
|
path moduleFile
|
|
|
|
output:
|
|
path "m", optional: true
|
|
path "f", optional: true
|
|
path "module.json", optional: true
|
|
path "README.txt", optional: true
|
|
|
|
script:
|
|
def moduleBasename = diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '')
|
|
"""
|
|
# Create directories
|
|
mkdir -p m f
|
|
|
|
# Copy the module file for reference
|
|
cp "${moduleFile}" module.json
|
|
|
|
# Create a README file with instructions
|
|
cat > README.txt << EOF
|
|
This directory contains the module for ${diseaseName}.
|
|
To generate patients, run:
|
|
./generate_patients.sh ${moduleBasename} ${params.output_dir}/${moduleBasename}/patients 20
|
|
EOF
|
|
|
|
# Create marker files
|
|
touch m/.keep
|
|
touch f/.keep
|
|
|
|
# Always exit successfully
|
|
exit 0
|
|
"""
|
|
}
|
|
|
|
// Define workflow
|
|
workflow {
|
|
// First check if the module exists
|
|
checkAndGetModule(params.disease_name)
|
|
|
|
// Then generate patients
|
|
generatePatients(params.disease_name, checkAndGetModule.out.module_file)
|
|
}
|