Configure synthea-alldiseases for WES execution
- Rewrite params.json to match WES tool registry format - Update main.nf to use Harbor container image - Add k8s profile to nextflow.config for WES/Kubernetes execution - Use s3://omic/eureka paths for output
This commit is contained in:
146
main.nf
146
main.nf
@@ -2,127 +2,69 @@
|
||||
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
/*
|
||||
* Synthea Disease Module Generator Pipeline
|
||||
*
|
||||
* A Nextflow pipeline to generate and manage Synthea disease modules
|
||||
*/
|
||||
|
||||
// Load API key from .env file if it exists
|
||||
def envFile = file('.env')
|
||||
if (envFile.exists()) {
|
||||
envFile.eachLine { line ->
|
||||
def (key, value) = line.tokenize('=')
|
||||
if (key && value && key.trim() == 'ANTHROPIC_API_KEY') {
|
||||
params.anthropic_api_key = value.trim()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Default parameters
|
||||
params.disease_name = null // Disease name to generate patients for
|
||||
params.output_dir = "/mnt/OmicNAS/private/old/olamide/synthea/output/new" // Output directory
|
||||
params.modules_dir = "src/main/resources/modules" // Directory for module files
|
||||
params.population = 100 // Number of patients to generate
|
||||
params.gender = 0.5 // Decimal representing proportion female (0.0-1.0)
|
||||
params.min_age = 0 // Minimum age of generated patients
|
||||
params.max_age = 90 // Maximum age of generated patients
|
||||
params.seed = null // Random seed for reproducibility
|
||||
params.help = false // Show help message
|
||||
|
||||
// Show help message
|
||||
if (params.help) {
|
||||
log.info """
|
||||
Synthea Patient Generator
|
||||
========================
|
||||
Usage: nextflow run main.nf --disease_name "Disease Name"
|
||||
|
||||
Required Arguments:
|
||||
--disease_name Disease name to generate patients for
|
||||
|
||||
Optional Arguments:
|
||||
--modules_dir Module directory (default: modules)
|
||||
--output_dir Output directory (default: output)
|
||||
--population Number of patients (default: 100)
|
||||
--gender Gender ratio - female proportion 0.0-1.0 (default: 0.5)
|
||||
--min_age Minimum age (default: 0)
|
||||
--max_age Maximum age (default: 90)
|
||||
--seed Random seed (default: random)
|
||||
"""
|
||||
exit 0
|
||||
}
|
||||
params.disease_name = null
|
||||
params.outdir = null
|
||||
params.population = 10
|
||||
params.gender = 0.5
|
||||
params.min_age = 0
|
||||
params.max_age = 90
|
||||
params.seed = null
|
||||
|
||||
// Validate required parameters
|
||||
if (!params.disease_name && !params.help) {
|
||||
if (!params.disease_name) {
|
||||
error "Disease name is required. Please specify with --disease_name"
|
||||
}
|
||||
|
||||
// Process to check if module exists and generate it if needed
|
||||
process checkAndGetModule {
|
||||
container 'synthea-module-generator'
|
||||
publishDir "${params.modules_dir}", mode: 'copy'
|
||||
|
||||
input:
|
||||
val diseaseName
|
||||
|
||||
output:
|
||||
path "*.json", emit: module_file
|
||||
|
||||
script:
|
||||
def moduleFilename = diseaseName.toLowerCase().replaceAll(' ', '_') + '.json'
|
||||
def fullPath = "/app/src/main/resources/modules/${moduleFilename}"
|
||||
"""
|
||||
echo "Looking for module at ${fullPath}"
|
||||
if [ -f "${fullPath}" ]; then
|
||||
echo "Module exists, copying..."
|
||||
cp "${fullPath}" .
|
||||
else
|
||||
echo "Module not found, generating..."
|
||||
python3 /app/module_generator/module_generator.py --disease "${diseaseName}" --output "${moduleFilename}"
|
||||
if [ -f "${moduleFilename}" ]; then
|
||||
echo "Successfully generated module"
|
||||
else
|
||||
echo "Error: Failed to generate module"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
"""
|
||||
if (!params.outdir) {
|
||||
error "Output directory is required. Please specify with --outdir"
|
||||
}
|
||||
|
||||
// Process to generate synthetic patients
|
||||
process generatePatients {
|
||||
container 'synthea-module-generator'
|
||||
publishDir "${params.output_dir}/${diseaseName.toLowerCase().replaceAll(' ', '_')}", mode: 'copy'
|
||||
|
||||
container 'harbor.cluster.omic.ai/omic/synthea-alldiseases:latest'
|
||||
publishDir params.outdir, mode: 'copy'
|
||||
|
||||
input:
|
||||
val diseaseName
|
||||
path moduleFile
|
||||
|
||||
|
||||
output:
|
||||
path "m/*", optional: true
|
||||
path "f/*", optional: true
|
||||
|
||||
path "fhir/*.json", optional: true, emit: fhir_output
|
||||
path "run.log", emit: log_file
|
||||
|
||||
script:
|
||||
def moduleBasename = diseaseName.toLowerCase().replaceAll(' ', '_')
|
||||
def genderArg = params.gender < 0.5 ? "M" : (params.gender > 0.5 ? "F" : "B")
|
||||
def seedValue = params.seed ?: new Random().nextInt(1000000)
|
||||
def seedArg = params.seed ? "-s ${params.seed}" : ""
|
||||
"""
|
||||
# Copy module and run Synthea
|
||||
cp "${moduleFile}" /app/modules/
|
||||
cd /app && ./run_synthea -p ${params.population} -g ${genderArg} -m ${moduleBasename} -a ${params.min_age}-${params.max_age} -s ${seedValue}
|
||||
|
||||
# Organize output by gender
|
||||
mkdir -p m f
|
||||
find /app/output/fhir -type f -name "*.json" ! -name "*hospital*" ! -name "*practitioner*" | xargs grep -l '"gender":"male"' | xargs -I{} cp {} m/
|
||||
find /app/output/fhir -type f -name "*.json" ! -name "*hospital*" ! -name "*practitioner*" | xargs grep -l '"gender":"female"' | xargs -I{} cp {} f/
|
||||
# Check if a custom module exists, otherwise use built-in Synthea modules
|
||||
MODULE_FILE="/app/src/main/resources/modules/${moduleBasename}.json"
|
||||
if [ -f "\${MODULE_FILE}" ]; then
|
||||
echo "Found custom module: \${MODULE_FILE}" | tee run.log
|
||||
else
|
||||
echo "Using built-in Synthea modules for: ${diseaseName}" | tee run.log
|
||||
fi
|
||||
|
||||
# Run Synthea patient generation
|
||||
cd /app && ./run_synthea \
|
||||
-p ${params.population} \
|
||||
-g ${genderArg} \
|
||||
-a ${params.min_age}-${params.max_age} \
|
||||
${seedArg} \
|
||||
-- ${diseaseName} 2>&1 | tee -a run.log
|
||||
|
||||
# Collect FHIR output
|
||||
mkdir -p fhir
|
||||
if [ -d /app/output/fhir ]; then
|
||||
cp /app/output/fhir/*.json fhir/ 2>/dev/null || true
|
||||
echo "Copied \$(ls fhir/*.json 2>/dev/null | wc -l) FHIR bundles" | tee -a run.log
|
||||
else
|
||||
echo "Warning: No FHIR output generated" | tee -a run.log
|
||||
fi
|
||||
"""
|
||||
}
|
||||
|
||||
// Define workflow
|
||||
// Workflow
|
||||
workflow {
|
||||
// First check if the module exists
|
||||
checkAndGetModule(params.disease_name)
|
||||
|
||||
// Then generate patients
|
||||
generatePatients(params.disease_name, checkAndGetModule.out.module_file)
|
||||
generatePatients(params.disease_name)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user