diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 46218d9..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,47 +0,0 @@ -version: '3.8' - -services: - synthea: - build: - context: . - dockerfile: Dockerfile - volumes: - - ./modules:/app/modules:ro # Mount modules directory read-only - - ./output:/app/output # Mount output directory for patient data - - ./.env:/app/.env:ro # Mount environment variables file - environment: - - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - working_dir: /app - command: tail -f /dev/null # Keep container running - healthcheck: - test: ["CMD", "/app/healthcheck.sh"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 5s - restart: unless-stopped - ports: - - "8080:8080" # Only needed if you want to access the Synthea web interface - - module-generator: - build: - context: . - dockerfile: Dockerfile - volumes: - - ./modules:/app/modules # Mount modules directory for writing - - ./module_generator:/app/module_generator - - ./src:/app/src - - ./scripts:/app/scripts - - ./.env:/app/.env:ro # Mount environment variables file - environment: - - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - working_dir: /app - command: python3 /app/module_generator/run_module_generator.py --batch-size 5 --max-modules 10 --prioritize - depends_on: - - synthea - profiles: - - generator # This service won't start by default, only when explicitly requested - -volumes: - synthea-output: - driver: local \ No newline at end of file diff --git a/generate_patients.sh b/generate_patients.sh deleted file mode 100755 index 4eb0372..0000000 --- a/generate_patients.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash - -# Script to generate synthetic patients directly using modules created by the pipeline -# Usage: ./generate_patients.sh - -MODULE_NAME=$1 -OUTPUT_DIR=$2 -POPULATION=${3:-10} - -if [ -z "$MODULE_NAME" ] || [ -z "$OUTPUT_DIR" ]; then - echo "Usage: $0 [population_size]" - echo "Example: $0 diabetes /path/to/output 20" - exit 1 -fi - -# Create output directory -mkdir -p "$OUTPUT_DIR/m" "$OUTPUT_DIR/f" - -# Location of module file -MODULE_PATH="/data/olamide/synthea-alldiseases/modules/${MODULE_NAME}.json" - -if [ ! -f "$MODULE_PATH" ]; then - echo "Module file not found: $MODULE_PATH" - exit 1 -fi - -# Create a temporary directory for the container output -TEMP_DIR=$(mktemp -d) -echo "Created temporary directory: $TEMP_DIR" - -# Run for male patients -echo "Generating male patients..." -docker run --rm \ - -v "$MODULE_PATH:/app/modules/${MODULE_NAME}.json" \ - -v "$TEMP_DIR:/app/output" \ - synthea-module-generator \ - bash -c "cd /app && ./run_synthea -p $((POPULATION/2)) -g M -m ${MODULE_NAME} Massachusetts" - -# Copy male patient files to the output directory -echo "Copying male patient files..." -find "$TEMP_DIR/fhir" -name "*.json" ! -name "*hospital*" ! -name "*practitioner*" | while read file; do - # Check if it's a patient file by looking for gender field - if grep -q '"gender"' "$file"; then - cp "$file" "$OUTPUT_DIR/m/" - fi -done - -# Clear the temp directory -rm -rf "$TEMP_DIR/fhir"/* - -# Run for female patients -echo "Generating female patients..." -docker run --rm \ - -v "$MODULE_PATH:/app/modules/${MODULE_NAME}.json" \ - -v "$TEMP_DIR:/app/output" \ - synthea-module-generator \ - bash -c "cd /app && ./run_synthea -p $((POPULATION/2)) -g F -m ${MODULE_NAME} Massachusetts" - -# Copy female patient files to the output directory -echo "Copying female patient files..." -find "$TEMP_DIR/fhir" -name "*.json" ! -name "*hospital*" ! -name "*practitioner*" | while read file; do - # Check if it's a patient file by looking for gender field - if grep -q '"gender"' "$file"; then - cp "$file" "$OUTPUT_DIR/f/" - fi -done - -# Count the results -male_count=$(find "$OUTPUT_DIR/m" -type f -name "*.json" | wc -l) -female_count=$(find "$OUTPUT_DIR/f" -type f -name "*.json" | wc -l) - -# Report results -echo "Patient generation complete. Results saved to $OUTPUT_DIR" -echo "Male patients: $male_count" -echo "Female patients: $female_count" - -# Clean up temp directory -rm -rf "$TEMP_DIR" diff --git a/main.nf b/main.nf index a3c5033..74162f1 100644 --- a/main.nf +++ b/main.nf @@ -2,7 +2,6 @@ nextflow.enable.dsl=2 -// Default parameters params.disease_name = null params.outdir = null params.population = 10 @@ -11,7 +10,6 @@ params.min_age = 0 params.max_age = 90 params.seed = null -// Validate required parameters if (!params.disease_name) { error "Disease name is required. Please specify with --disease_name" } @@ -20,7 +18,6 @@ if (!params.outdir) { error "Output directory is required. Please specify with --outdir" } -// Process to generate synthetic patients process generatePatients { container 'harbor.cluster.omic.ai/omic/synthea-alldiseases:v3' publishDir params.outdir, mode: 'copy' @@ -33,14 +30,13 @@ process generatePatients { path "run.log", emit: log_file script: - def moduleBasename = diseaseName.toLowerCase().replaceAll(' ', '_') def genderArg = params.gender < 0.5 ? "-g M" : (params.gender > 0.5 ? "-g F" : "") def seedArg = params.seed ? "-s ${params.seed}" : "" """ set +e WORKDIR=\$(pwd) - # Use pre-built jar directly (bypasses Gradle which needs write access to .gradle) + # Run Synthea via pre-built jar (Gradle is not writable in K8s) cd /app java -jar /app/build/libs/synthea-with-dependencies.jar \ -p ${params.population} \ @@ -49,18 +45,13 @@ process generatePatients { ${seedArg} 2>&1 | tee \${WORKDIR}/run.log JAVA_EXIT=\${PIPESTATUS[0]} - # Collect FHIR output back into Nextflow work dir cd \${WORKDIR} mkdir -p fhir if [ -d /app/output/fhir ]; then cp /app/output/fhir/*.json fhir/ 2>/dev/null || true - FHIR_COUNT=\$(ls fhir/*.json 2>/dev/null | wc -l) - echo "Copied \${FHIR_COUNT} FHIR bundles" | tee -a run.log - else - echo "Warning: No FHIR output directory found" | tee -a run.log fi - # Exit 0 if we got FHIR output, regardless of java exit code + # Succeed if FHIR output was produced if [ -n "\$(ls fhir/*.json 2>/dev/null)" ]; then exit 0 else @@ -69,7 +60,6 @@ process generatePatients { """ } -// Workflow workflow { generatePatients(params.disease_name) } diff --git a/simple.nf b/simple.nf deleted file mode 100644 index 82a8d94..0000000 --- a/simple.nf +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl=2 - -// Default parameters -params.disease_name = "Diabetes" // Default disease name -params.output_dir = "/mnt/OmicNAS/private/old/olamide/synthea/output/new" // Output directory -params.modules_dir = "modules" // Directory for module files - -// Process to generate synthetic patients -process generatePatients { - publishDir "${params.output_dir}/${params.disease_name.toLowerCase().replaceAll(' ', '_')}", mode: 'copy' - - input: - path moduleFile - - output: - path "**" - - script: - """ - echo "Module file: ${moduleFile}" - echo "Disease: ${params.disease_name}" - - # Check if Docker is available - if command -v docker &>/dev/null; then - echo "Docker is available, looking for Synthea container..." - - # Find the Synthea container - container_id=\$(docker ps --format '{{.ID}}' --filter "name=synthea" | head -1) - - if [ -n "\$container_id" ]; then - echo "Using Synthea container \$container_id" - - # Copy module to container - docker exec \$container_id mkdir -p /app/modules - docker cp "${moduleFile}" \$container_id:/app/modules/ - - # Run Synthea with minimal parameters - docker exec \$container_id bash -c "cd /app && ./run_synthea -p 1 -m ${params.disease_name.toLowerCase().replaceAll(' ', '_')}" - - # Copy output from container - docker cp \$container_id:/app/output/fhir ./ || mkdir -p ./fhir - docker cp \$container_id:/app/output/metadata ./ || mkdir -p ./metadata - - echo "Completed patient generation" - else - echo "No Synthea container found, creating mock output for testing" - mkdir -p ./fhir ./metadata - echo "Mock FHIR data for ${params.disease_name}" > ./fhir/mock_patient.json - echo "Mock metadata for ${params.disease_name}" > ./metadata/mock_stats.json - fi - else - echo "Docker not available, creating mock output for testing" - mkdir -p ./fhir ./metadata - echo "Mock FHIR data for ${params.disease_name}" > ./fhir/mock_patient.json - echo "Mock metadata for ${params.disease_name}" > ./metadata/mock_stats.json - fi - """ -} - -// Define workflow -workflow { - // Prepare module file - moduleFilename = params.disease_name.toLowerCase().replaceAll(' ', '_') + '.json' - moduleFile = file("${params.modules_dir}/${moduleFilename}") - - if (!moduleFile.exists()) { - error "Module file not found: ${moduleFile}" - } - - // Create a channel with the module file - moduleChannel = Channel.fromPath(moduleFile) - - // Generate patients - generatePatients(moduleChannel) -} diff --git a/test.nf b/test.nf deleted file mode 100644 index dfa0df5..0000000 --- a/test.nf +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl=2 - -/* - * Synthea Disease Module Generator Pipeline - * - * A Nextflow pipeline to generate and manage Synthea disease modules - */ - -// Load API key from .env file if it exists -def envFile = file('.env') -if (envFile.exists()) { - envFile.eachLine { line -> - def (key, value) = line.tokenize('=') - if (key && value && key.trim() == 'ANTHROPIC_API_KEY') { - params.anthropic_api_key = value.trim() - } - } -} - -// Default parameters -params.disease_name = null // Disease name to generate patients for -params.output_dir = "/mnt/OmicNAS/private/old/olamide/synthea/output/new" // Output directory -params.modules_dir = "src/main/resources/modules" // Directory for module files -params.population = 100 // Number of patients to generate -params.gender = 0.5 // Decimal representing proportion female (0.0-1.0) -params.min_age = 0 // Minimum age of generated patients -params.max_age = 90 // Maximum age of generated patients -params.seed = null // Random seed for reproducibility -params.help = false // Show help message - -// Show help message -if (params.help) { - log.info """ - Synthea Patient Generator - ======================== - Usage: nextflow run main.nf --disease_name "Disease Name" - - Required Arguments: - --disease_name Disease name to generate patients for - - Optional Arguments: - --modules_dir Module directory (default: modules) - --output_dir Output directory (default: output) - --population Number of patients (default: 100) - --gender Gender ratio - female proportion 0.0-1.0 (default: 0.5) - --min_age Minimum age (default: 0) - --max_age Maximum age (default: 90) - --seed Random seed (default: random) - """ - exit 0 -} - -// Validate required parameters -if (!params.disease_name && !params.help) { - error "Disease name is required. Please specify with --disease_name" -} - -// Process to check if module exists and generate it if needed -process checkAndGetModule { - container 'synthea-module-generator' - publishDir "${params.modules_dir}", mode: 'copy' - - input: - val diseaseName - - output: - path "*.json", emit: module_file - - script: - // Use sanitized disease name for filenames - replace spaces with underscores and remove special chars - def moduleFilename = diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '') + '.json' - def fullPath = "/app/src/main/resources/modules/${moduleFilename}" - """ - echo "Looking for module at ${fullPath}" - if [ -f "${fullPath}" ]; then - echo "Module exists, copying..." - cp "${fullPath}" . - else - echo "Module not found, generating..." - # Use the simple generator script instead - python3 /app/module_generator/simple_module_generator.py --disease "${diseaseName}" --output "${moduleFilename}" - if [ -f "${moduleFilename}" ]; then - echo "Successfully generated module" - else - echo "Error: Failed to generate module" - exit 1 - fi - fi - """ -} - -// Process to generate synthetic patients -process generatePatients { - container 'synthea-module-generator' - publishDir "${params.output_dir}/${diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '')}", mode: 'copy', failOnError: false - - input: - val diseaseName - path moduleFile - - output: - path "m", optional: true - path "f", optional: true - path "module.json", optional: true - path "README.txt", optional: true - - script: - def moduleBasename = diseaseName.toLowerCase().replaceAll(' ', '_').replaceAll('[^a-z0-9_]', '') - """ - # Create directories - mkdir -p m f - - # Copy the module file for reference - cp "${moduleFile}" module.json - - # Create a README file with instructions - cat > README.txt << EOF -This directory contains the module for ${diseaseName}. -To generate patients, run: -./generate_patients.sh ${moduleBasename} ${params.output_dir}/${moduleBasename}/patients 20 -EOF - - # Create marker files - touch m/.keep - touch f/.keep - - # Always exit successfully - exit 0 - """ -} - -// Define workflow -workflow { - // First check if the module exists - checkAndGetModule(params.disease_name) - - // Then generate patients - generatePatients(params.disease_name, checkAndGetModule.out.module_file) -} diff --git a/test_synthea.sh b/test_synthea.sh deleted file mode 100755 index fde8b3d..0000000 --- a/test_synthea.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Set up environment and variables -MODULE_NAME="diabetes" -JSON_PATH="$(pwd)/modules/${MODULE_NAME}.json" - -# Make sure we have the module file -if [ ! -f "$JSON_PATH" ]; then - echo "Module file not found: $JSON_PATH" - exit 1 -fi - -# Run Synthea directly in a container -docker run --rm -v "${JSON_PATH}:/app/modules/${MODULE_NAME}.json" \ - -v "$(pwd)/test_output:/app/output" \ - synthea-module-generator \ - bash -c "cd /app && ./run_synthea -p 10 -g B -m ${MODULE_NAME} -a 0-90 -s 12345 | tee /app/output/synthea_run.log" - -# Check the output -echo "Checking output directory:" -find test_output -type f | sort diff --git a/trace.txt b/trace.txt deleted file mode 100644 index fc0c124..0000000 --- a/trace.txt +++ /dev/null @@ -1,2 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 48/6b3902 74409 checkAndGetModule FAILED 1 2025-03-23 11:46:23.178 1.1s 995ms - - - - -