Trying to fix basic functionality again.
This commit is contained in:
223
scripts/check_condition_structure.py
Executable file
223
scripts/check_condition_structure.py
Executable file
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Synthea Module Condition Structure Validator
|
||||
|
||||
This script scans all existing Synthea disease modules to check if any violate
|
||||
the expected condition structure format, particularly looking for nested condition_type
|
||||
objects that can cause errors during simulation.
|
||||
|
||||
Usage:
|
||||
python check_condition_structure.py [--modules_dir DIRECTORY] [--verbose]
|
||||
|
||||
Arguments:
|
||||
--modules_dir DIRECTORY Path to the modules directory (default: src/main/resources/modules)
|
||||
--verbose Enable verbose output with detailed issue descriptions
|
||||
--fix Attempt to automatically fix simple structure issues (experimental)
|
||||
|
||||
Example:
|
||||
python check_condition_structure.py --modules_dir ../modules --verbose
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import glob
|
||||
import argparse
|
||||
import logging
|
||||
from typing import Dict, List, Any, Tuple
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def validate_condition_format(module_json):
|
||||
"""Validate that conditions in the module follow Synthea's expected format"""
|
||||
try:
|
||||
module_dict = json.loads(module_json) if isinstance(module_json, str) else module_json
|
||||
|
||||
# Function to recursively check objects for improper condition structure
|
||||
def check_conditions(obj, path=[]):
|
||||
issues = []
|
||||
|
||||
if isinstance(obj, dict):
|
||||
# Check if this is a condition object with nested condition_type
|
||||
if "condition" in obj and isinstance(obj["condition"], dict):
|
||||
condition = obj["condition"]
|
||||
current_path = path + ["condition"]
|
||||
|
||||
# Look for the improper nested structure
|
||||
if "condition_type" in condition and isinstance(condition["condition_type"], dict):
|
||||
issue_path = '.'.join(current_path + ["condition_type"])
|
||||
issues.append(f"Found nested condition_type in a condition object at path: {issue_path}")
|
||||
|
||||
# Recursively check all dictionary values
|
||||
for key, value in obj.items():
|
||||
child_issues = check_conditions(value, path + [key])
|
||||
issues.extend(child_issues)
|
||||
|
||||
elif isinstance(obj, list):
|
||||
# Recursively check all list items
|
||||
for i, item in enumerate(obj):
|
||||
child_issues = check_conditions(item, path + [f"[{i}]"])
|
||||
issues.extend(child_issues)
|
||||
|
||||
return issues
|
||||
|
||||
# Check the entire module
|
||||
issues = check_conditions(module_dict)
|
||||
return len(issues) == 0, issues
|
||||
|
||||
except Exception as e:
|
||||
return False, [f"Validation error: {str(e)}"]
|
||||
|
||||
def fix_condition_structure(module_json):
|
||||
"""Attempt to fix common condition structure issues"""
|
||||
try:
|
||||
# Parse the module if it's a string
|
||||
module_dict = json.loads(module_json) if isinstance(module_json, str) else module_json
|
||||
fixed = False
|
||||
|
||||
def fix_conditions(obj):
|
||||
nonlocal fixed
|
||||
|
||||
if isinstance(obj, dict):
|
||||
# Check if this is a condition object with nested condition_type
|
||||
if "condition" in obj and isinstance(obj["condition"], dict):
|
||||
condition = obj["condition"]
|
||||
|
||||
# Fix the improper nested structure:
|
||||
if "condition_type" in condition and isinstance(condition["condition_type"], dict):
|
||||
# Replace nested dict with its first key as a string
|
||||
first_key = list(condition["condition_type"].keys())[0]
|
||||
condition["condition_type"] = first_key
|
||||
fixed = True
|
||||
|
||||
# Recursively fix all dictionary values
|
||||
for key, value in list(obj.items()):
|
||||
if isinstance(value, (dict, list)):
|
||||
fix_conditions(value)
|
||||
|
||||
elif isinstance(obj, list):
|
||||
# Recursively fix all list items
|
||||
for item in obj:
|
||||
if isinstance(item, (dict, list)):
|
||||
fix_conditions(item)
|
||||
|
||||
# Fix the entire module
|
||||
fix_conditions(module_dict)
|
||||
|
||||
# Return the fixed module and whether changes were made
|
||||
return module_dict, fixed
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fixing module structure: {e}")
|
||||
return module_json, False
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Validate condition structure in Synthea modules')
|
||||
parser.add_argument('--modules_dir', type=str, default='src/main/resources/modules',
|
||||
help='Path to the modules directory')
|
||||
parser.add_argument('--verbose', action='store_true',
|
||||
help='Enable verbose output with detailed issue descriptions')
|
||||
parser.add_argument('--fix', action='store_true',
|
||||
help='Attempt to automatically fix simple structure issues (experimental)')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check if modules directory exists
|
||||
if not os.path.exists(args.modules_dir):
|
||||
logger.error(f"Modules directory not found: {args.modules_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get all module files
|
||||
module_files = glob.glob(os.path.join(args.modules_dir, "*.json"))
|
||||
logger.info(f"Found {len(module_files)} module files to check")
|
||||
|
||||
# Track statistics
|
||||
valid_modules = 0
|
||||
invalid_modules = 0
|
||||
fixed_modules = 0
|
||||
|
||||
# Check each module
|
||||
for module_path in module_files:
|
||||
module_name = os.path.basename(module_path)
|
||||
|
||||
try:
|
||||
# Load module content
|
||||
with open(module_path, 'r') as f:
|
||||
module_content = f.read()
|
||||
|
||||
# Validate condition structure
|
||||
try:
|
||||
module_json = json.loads(module_content)
|
||||
valid, issues = validate_condition_format(module_json)
|
||||
|
||||
if valid:
|
||||
valid_modules += 1
|
||||
if args.verbose:
|
||||
logger.info(f"✅ {module_name}: Valid condition structure")
|
||||
else:
|
||||
invalid_modules += 1
|
||||
|
||||
if args.fix:
|
||||
# Attempt to fix the module
|
||||
fixed_module, was_fixed = fix_condition_structure(module_json)
|
||||
|
||||
if was_fixed:
|
||||
# Write the fixed module back
|
||||
with open(module_path, 'w') as f:
|
||||
json.dump(fixed_module, f, indent=2)
|
||||
|
||||
# Validate again to confirm
|
||||
valid_after_fix, remaining_issues = validate_condition_format(fixed_module)
|
||||
|
||||
if valid_after_fix:
|
||||
fixed_modules += 1
|
||||
logger.info(f"🔧 {module_name}: Fixed condition structure issues")
|
||||
else:
|
||||
logger.warning(f"⚠️ {module_name}: Could not fix all condition structure issues")
|
||||
if args.verbose:
|
||||
for issue in remaining_issues:
|
||||
logger.warning(f" - {issue}")
|
||||
else:
|
||||
logger.warning(f"⚠️ {module_name}: Could not fix condition structure issues")
|
||||
if args.verbose:
|
||||
for issue in issues:
|
||||
logger.warning(f" - {issue}")
|
||||
else:
|
||||
logger.warning(f"⚠️ {module_name}: Invalid condition structure")
|
||||
if args.verbose:
|
||||
for issue in issues:
|
||||
logger.warning(f" - {issue}")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
invalid_modules += 1
|
||||
logger.error(f"❌ {module_name}: Invalid JSON format - {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
invalid_modules += 1
|
||||
logger.error(f"❌ {module_name}: Error processing file - {str(e)}")
|
||||
|
||||
# Print summary
|
||||
logger.info("\nSummary:")
|
||||
logger.info(f"Total modules checked: {len(module_files)}")
|
||||
logger.info(f"Valid modules: {valid_modules}")
|
||||
logger.info(f"Invalid modules: {invalid_modules}")
|
||||
|
||||
if args.fix:
|
||||
logger.info(f"Modules fixed: {fixed_modules}")
|
||||
|
||||
if invalid_modules > 0:
|
||||
logger.warning("Some modules have condition structure issues that may cause problems in Synthea")
|
||||
if not args.fix:
|
||||
logger.info("Run with --fix to attempt automatic fixes for the issues")
|
||||
else:
|
||||
logger.info("All modules have valid condition structure")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user