#!/usr/bin/env python3 """ Synthea Module Condition Structure Validator This script scans all existing Synthea disease modules to check if any violate the expected condition structure format, particularly looking for nested condition_type objects that can cause errors during simulation. Usage: python check_condition_structure.py [--modules_dir DIRECTORY] [--verbose] Arguments: --modules_dir DIRECTORY Path to the modules directory (default: src/main/resources/modules) --verbose Enable verbose output with detailed issue descriptions --fix Attempt to automatically fix simple structure issues (experimental) Example: python check_condition_structure.py --modules_dir ../modules --verbose """ import os import sys import json import glob import argparse import logging from typing import Dict, List, Any, Tuple # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout) ] ) logger = logging.getLogger(__name__) def validate_condition_format(module_json): """Validate that conditions in the module follow Synthea's expected format""" try: module_dict = json.loads(module_json) if isinstance(module_json, str) else module_json # Function to recursively check objects for improper condition structure def check_conditions(obj, path=[]): issues = [] if isinstance(obj, dict): # Check if this is a condition object with nested condition_type if "condition" in obj and isinstance(obj["condition"], dict): condition = obj["condition"] current_path = path + ["condition"] # Look for the improper nested structure if "condition_type" in condition and isinstance(condition["condition_type"], dict): issue_path = '.'.join(current_path + ["condition_type"]) issues.append(f"Found nested condition_type in a condition object at path: {issue_path}") # Recursively check all dictionary values for key, value in obj.items(): child_issues = check_conditions(value, path + [key]) issues.extend(child_issues) elif isinstance(obj, list): # Recursively check all list items for i, item in enumerate(obj): child_issues = check_conditions(item, path + [f"[{i}]"]) issues.extend(child_issues) return issues # Check the entire module issues = check_conditions(module_dict) return len(issues) == 0, issues except Exception as e: return False, [f"Validation error: {str(e)}"] def fix_condition_structure(module_json): """Attempt to fix common condition structure issues""" try: # Parse the module if it's a string module_dict = json.loads(module_json) if isinstance(module_json, str) else module_json fixed = False def fix_conditions(obj): nonlocal fixed if isinstance(obj, dict): # Check if this is a condition object with nested condition_type if "condition" in obj and isinstance(obj["condition"], dict): condition = obj["condition"] # Fix the improper nested structure: if "condition_type" in condition and isinstance(condition["condition_type"], dict): # Replace nested dict with its first key as a string first_key = list(condition["condition_type"].keys())[0] condition["condition_type"] = first_key fixed = True # Recursively fix all dictionary values for key, value in list(obj.items()): if isinstance(value, (dict, list)): fix_conditions(value) elif isinstance(obj, list): # Recursively fix all list items for item in obj: if isinstance(item, (dict, list)): fix_conditions(item) # Fix the entire module fix_conditions(module_dict) # Return the fixed module and whether changes were made return module_dict, fixed except Exception as e: logger.error(f"Error fixing module structure: {e}") return module_json, False def main(): parser = argparse.ArgumentParser(description='Validate condition structure in Synthea modules') parser.add_argument('--modules_dir', type=str, default='src/main/resources/modules', help='Path to the modules directory') parser.add_argument('--verbose', action='store_true', help='Enable verbose output with detailed issue descriptions') parser.add_argument('--fix', action='store_true', help='Attempt to automatically fix simple structure issues (experimental)') args = parser.parse_args() # Check if modules directory exists if not os.path.exists(args.modules_dir): logger.error(f"Modules directory not found: {args.modules_dir}") sys.exit(1) # Get all module files module_files = glob.glob(os.path.join(args.modules_dir, "*.json")) logger.info(f"Found {len(module_files)} module files to check") # Track statistics valid_modules = 0 invalid_modules = 0 fixed_modules = 0 # Check each module for module_path in module_files: module_name = os.path.basename(module_path) try: # Load module content with open(module_path, 'r') as f: module_content = f.read() # Validate condition structure try: module_json = json.loads(module_content) valid, issues = validate_condition_format(module_json) if valid: valid_modules += 1 if args.verbose: logger.info(f"✅ {module_name}: Valid condition structure") else: invalid_modules += 1 if args.fix: # Attempt to fix the module fixed_module, was_fixed = fix_condition_structure(module_json) if was_fixed: # Write the fixed module back with open(module_path, 'w') as f: json.dump(fixed_module, f, indent=2) # Validate again to confirm valid_after_fix, remaining_issues = validate_condition_format(fixed_module) if valid_after_fix: fixed_modules += 1 logger.info(f"🔧 {module_name}: Fixed condition structure issues") else: logger.warning(f"⚠️ {module_name}: Could not fix all condition structure issues") if args.verbose: for issue in remaining_issues: logger.warning(f" - {issue}") else: logger.warning(f"⚠️ {module_name}: Could not fix condition structure issues") if args.verbose: for issue in issues: logger.warning(f" - {issue}") else: logger.warning(f"⚠️ {module_name}: Invalid condition structure") if args.verbose: for issue in issues: logger.warning(f" - {issue}") except json.JSONDecodeError as e: invalid_modules += 1 logger.error(f"❌ {module_name}: Invalid JSON format - {str(e)}") except Exception as e: invalid_modules += 1 logger.error(f"❌ {module_name}: Error processing file - {str(e)}") # Print summary logger.info("\nSummary:") logger.info(f"Total modules checked: {len(module_files)}") logger.info(f"Valid modules: {valid_modules}") logger.info(f"Invalid modules: {invalid_modules}") if args.fix: logger.info(f"Modules fixed: {fixed_modules}") if invalid_modules > 0: logger.warning("Some modules have condition structure issues that may cause problems in Synthea") if not args.fix: logger.info("Run with --fix to attempt automatic fixes for the issues") else: logger.info("All modules have valid condition structure") if __name__ == "__main__": main()