Trying to fix basic functionality again.

This commit is contained in:
2025-03-23 11:53:47 -07:00
parent ebda48190a
commit 2141e81f42
406 changed files with 173963 additions and 69 deletions

View File

@@ -0,0 +1,223 @@
#!/usr/bin/env python3
"""
Synthea Module Condition Structure Validator
This script scans all existing Synthea disease modules to check if any violate
the expected condition structure format, particularly looking for nested condition_type
objects that can cause errors during simulation.
Usage:
python check_condition_structure.py [--modules_dir DIRECTORY] [--verbose]
Arguments:
--modules_dir DIRECTORY Path to the modules directory (default: src/main/resources/modules)
--verbose Enable verbose output with detailed issue descriptions
--fix Attempt to automatically fix simple structure issues (experimental)
Example:
python check_condition_structure.py --modules_dir ../modules --verbose
"""
import os
import sys
import json
import glob
import argparse
import logging
from typing import Dict, List, Any, Tuple
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
def validate_condition_format(module_json):
"""Validate that conditions in the module follow Synthea's expected format"""
try:
module_dict = json.loads(module_json) if isinstance(module_json, str) else module_json
# Function to recursively check objects for improper condition structure
def check_conditions(obj, path=[]):
issues = []
if isinstance(obj, dict):
# Check if this is a condition object with nested condition_type
if "condition" in obj and isinstance(obj["condition"], dict):
condition = obj["condition"]
current_path = path + ["condition"]
# Look for the improper nested structure
if "condition_type" in condition and isinstance(condition["condition_type"], dict):
issue_path = '.'.join(current_path + ["condition_type"])
issues.append(f"Found nested condition_type in a condition object at path: {issue_path}")
# Recursively check all dictionary values
for key, value in obj.items():
child_issues = check_conditions(value, path + [key])
issues.extend(child_issues)
elif isinstance(obj, list):
# Recursively check all list items
for i, item in enumerate(obj):
child_issues = check_conditions(item, path + [f"[{i}]"])
issues.extend(child_issues)
return issues
# Check the entire module
issues = check_conditions(module_dict)
return len(issues) == 0, issues
except Exception as e:
return False, [f"Validation error: {str(e)}"]
def fix_condition_structure(module_json):
"""Attempt to fix common condition structure issues"""
try:
# Parse the module if it's a string
module_dict = json.loads(module_json) if isinstance(module_json, str) else module_json
fixed = False
def fix_conditions(obj):
nonlocal fixed
if isinstance(obj, dict):
# Check if this is a condition object with nested condition_type
if "condition" in obj and isinstance(obj["condition"], dict):
condition = obj["condition"]
# Fix the improper nested structure:
if "condition_type" in condition and isinstance(condition["condition_type"], dict):
# Replace nested dict with its first key as a string
first_key = list(condition["condition_type"].keys())[0]
condition["condition_type"] = first_key
fixed = True
# Recursively fix all dictionary values
for key, value in list(obj.items()):
if isinstance(value, (dict, list)):
fix_conditions(value)
elif isinstance(obj, list):
# Recursively fix all list items
for item in obj:
if isinstance(item, (dict, list)):
fix_conditions(item)
# Fix the entire module
fix_conditions(module_dict)
# Return the fixed module and whether changes were made
return module_dict, fixed
except Exception as e:
logger.error(f"Error fixing module structure: {e}")
return module_json, False
def main():
parser = argparse.ArgumentParser(description='Validate condition structure in Synthea modules')
parser.add_argument('--modules_dir', type=str, default='src/main/resources/modules',
help='Path to the modules directory')
parser.add_argument('--verbose', action='store_true',
help='Enable verbose output with detailed issue descriptions')
parser.add_argument('--fix', action='store_true',
help='Attempt to automatically fix simple structure issues (experimental)')
args = parser.parse_args()
# Check if modules directory exists
if not os.path.exists(args.modules_dir):
logger.error(f"Modules directory not found: {args.modules_dir}")
sys.exit(1)
# Get all module files
module_files = glob.glob(os.path.join(args.modules_dir, "*.json"))
logger.info(f"Found {len(module_files)} module files to check")
# Track statistics
valid_modules = 0
invalid_modules = 0
fixed_modules = 0
# Check each module
for module_path in module_files:
module_name = os.path.basename(module_path)
try:
# Load module content
with open(module_path, 'r') as f:
module_content = f.read()
# Validate condition structure
try:
module_json = json.loads(module_content)
valid, issues = validate_condition_format(module_json)
if valid:
valid_modules += 1
if args.verbose:
logger.info(f"{module_name}: Valid condition structure")
else:
invalid_modules += 1
if args.fix:
# Attempt to fix the module
fixed_module, was_fixed = fix_condition_structure(module_json)
if was_fixed:
# Write the fixed module back
with open(module_path, 'w') as f:
json.dump(fixed_module, f, indent=2)
# Validate again to confirm
valid_after_fix, remaining_issues = validate_condition_format(fixed_module)
if valid_after_fix:
fixed_modules += 1
logger.info(f"🔧 {module_name}: Fixed condition structure issues")
else:
logger.warning(f"⚠️ {module_name}: Could not fix all condition structure issues")
if args.verbose:
for issue in remaining_issues:
logger.warning(f" - {issue}")
else:
logger.warning(f"⚠️ {module_name}: Could not fix condition structure issues")
if args.verbose:
for issue in issues:
logger.warning(f" - {issue}")
else:
logger.warning(f"⚠️ {module_name}: Invalid condition structure")
if args.verbose:
for issue in issues:
logger.warning(f" - {issue}")
except json.JSONDecodeError as e:
invalid_modules += 1
logger.error(f"{module_name}: Invalid JSON format - {str(e)}")
except Exception as e:
invalid_modules += 1
logger.error(f"{module_name}: Error processing file - {str(e)}")
# Print summary
logger.info("\nSummary:")
logger.info(f"Total modules checked: {len(module_files)}")
logger.info(f"Valid modules: {valid_modules}")
logger.info(f"Invalid modules: {invalid_modules}")
if args.fix:
logger.info(f"Modules fixed: {fixed_modules}")
if invalid_modules > 0:
logger.warning("Some modules have condition structure issues that may cause problems in Synthea")
if not args.fix:
logger.info("Run with --fix to attempt automatic fixes for the issues")
else:
logger.info("All modules have valid condition structure")
if __name__ == "__main__":
main()