#!/usr/bin/env python3 """ Disease Module Validator for Synthea This script validates a Synthea disease module for JSON correctness and checks for common issues in the module structure. Usage: python validate_module.py """ import json import sys import os import re def color_text(text, color_code): """Add color to terminal output""" return f"\033[{color_code}m{text}\033[0m" def red(text): return color_text(text, "91") def green(text): return color_text(text, "92") def yellow(text): return color_text(text, "93") def check_required_fields(module_json): """Check if the module has all required fields""" required_fields = ['name', 'states', 'gmf_version'] missing_fields = [] for field in required_fields: if field not in module_json: missing_fields.append(field) return missing_fields def check_transitions(module_json): """Check if all transitions are valid""" errors = [] # Skip if no states if 'states' not in module_json: return ["No 'states' field found"] states = module_json['states'] state_names = set(states.keys()) # Check each state for state_name, state_config in states.items(): # Skip Terminal states (they should not have transitions) if state_config.get('type') == 'Terminal': continue # Check if the state has any transition transition_found = False transition_types = ['direct_transition', 'distributed_transition', 'conditional_transition', 'complex_transition'] for transition_type in transition_types: if transition_type in state_config: transition_found = True # Check direct transitions if transition_type == 'direct_transition': target = state_config[transition_type] if target not in state_names: errors.append(f"State '{state_name}' has invalid direct_transition to non-existent state '{target}'") # Check distributed transitions elif transition_type == 'distributed_transition': for transition in state_config[transition_type]: if 'transition' in transition and transition['transition'] not in state_names: errors.append(f"State '{state_name}' has invalid distributed_transition to non-existent state '{transition['transition']}'") # Check conditional transitions elif transition_type == 'conditional_transition': for transition in state_config[transition_type]: if 'transition' in transition and transition['transition'] not in state_names: errors.append(f"State '{state_name}' has invalid conditional_transition to non-existent state '{transition['transition']}'") # Check complex transitions elif transition_type == 'complex_transition': for transition in state_config[transition_type]: if 'transition' in transition and transition['transition'] not in state_names: errors.append(f"State '{state_name}' has invalid complex_transition to non-existent state '{transition['transition']}'") if 'distributions' in transition: for dist in transition['distributions']: if 'transition' in dist and dist['transition'] not in state_names: errors.append(f"State '{state_name}' has invalid complex_transition distribution to non-existent state '{dist['transition']}'") if not transition_found and state_config.get('type') != 'Terminal': errors.append(f"State '{state_name}' has no transition defined") return errors def check_codes(module_json): """Check if medical codes are properly formatted""" warnings = [] json_str = json.dumps(module_json) # Check for codes in common formats code_patterns = { 'SNOMED-CT': r'"system":\s*"SNOMED-CT",\s*"code":\s*"[0-9]+"', 'LOINC': r'"system":\s*"LOINC",\s*"code":\s*"[0-9\\-]+"', 'RxNorm': r'"system":\s*"RxNorm",\s*"code":\s*"[0-9]+"', 'ICD-10': r'"system":\s*"ICD-10",\s*"code":\s*"[A-Z][0-9]+"' } for code_type, pattern in code_patterns.items(): if not re.search(pattern, json_str): warnings.append(f"No {code_type} codes found. This may be normal depending on the module.") return warnings def check_prevalence(module_json): """Check if prevalence information exists""" warnings = [] json_str = json.dumps(module_json) # Check for prevalence patterns if not any(pat in json_str for pat in ['"prevalence"', 'incidence', 'probability']): warnings.append("No prevalence, incidence, or probability data found. This may affect realism.") return warnings def check_circular_references(module_json): """Check for circular references between states""" errors = [] # Skip if no states if 'states' not in module_json: return [] states = module_json['states'] # Build a directed graph representation graph = {} for state_name in states: graph[state_name] = [] # Add edges to the graph for state_name, state_config in states.items(): # Add direct transitions if 'direct_transition' in state_config: target = state_config['direct_transition'] graph[state_name].append(target) # Add distributed transitions if 'distributed_transition' in state_config: for transition in state_config['distributed_transition']: if 'transition' in transition: graph[state_name].append(transition['transition']) # Add conditional transitions if 'conditional_transition' in state_config: for transition in state_config['conditional_transition']: if 'transition' in transition: graph[state_name].append(transition['transition']) # Add complex transitions if 'complex_transition' in state_config: for transition in state_config['complex_transition']: if 'transition' in transition: graph[state_name].append(transition['transition']) # Check for cycles in the graph (simplified check) visited = set() path = [] def dfs(node): visited.add(node) path.append(node) for neighbor in graph[node]: if neighbor in path: # Found a cycle cycle_start = path.index(neighbor) errors.append(f"Circular reference detected: {' -> '.join(path[cycle_start:] + [neighbor])}") elif neighbor not in visited: dfs(neighbor) path.pop() # Run DFS from all states to find cycles for state_name in states: if state_name not in visited: dfs(state_name) return errors def main(): if len(sys.argv) != 2: print(f"Usage: python {sys.argv[0]} ") sys.exit(1) module_path = sys.argv[1] if not os.path.exists(module_path): print(red(f"Error: File {module_path} does not exist")) sys.exit(1) try: with open(module_path, 'r') as f: content = f.read() # First check for valid JSON try: module_json = json.loads(content) print(green("✓ Valid JSON structure")) except json.JSONDecodeError as e: print(red(f"✗ Invalid JSON: {e}")) # Try to fix common issues print(yellow("Attempting to fix common JSON issues...")) # Fix trailing commas fixed_content = re.sub(r',\s*}', '}', content) fixed_content = re.sub(r',\s*]', ']', fixed_content) # Count braces open_braces = fixed_content.count('{') close_braces = fixed_content.count('}') if open_braces > close_braces: print(yellow(f"Adding {open_braces - close_braces} missing closing braces")) fixed_content += '}' * (open_braces - close_braces) elif close_braces > open_braces: print(yellow(f"Removing {close_braces - open_braces} excess closing braces")) for _ in range(close_braces - open_braces): fixed_content = fixed_content.rstrip().rstrip('}') + '}' try: module_json = json.loads(fixed_content) print(green("✓ Fixed JSON issues successfully")) # Write the fixed content back to the file with open(module_path, 'w') as f: f.write(json.dumps(module_json, indent=2)) except json.JSONDecodeError as e: print(red(f"✗ Could not fix JSON: {e}")) sys.exit(1) # Check required fields missing_fields = check_required_fields(module_json) if missing_fields: print(red(f"✗ Missing required fields: {', '.join(missing_fields)}")) else: print(green("✓ All required fields present")) # Check for invalid transitions transition_errors = check_transitions(module_json) if transition_errors: print(red("✗ Invalid transitions found:")) for error in transition_errors: print(red(f" - {error}")) else: print(green("✓ All transitions valid")) # Check for circular references circular_errors = check_circular_references(module_json) if circular_errors: print(red("✗ Circular references found:")) for error in circular_errors: print(red(f" - {error}")) else: print(green("✓ No circular references detected")) # Check for medical codes code_warnings = check_codes(module_json) if code_warnings: print(yellow("⚠ Possible code issues:")) for warning in code_warnings: print(yellow(f" - {warning}")) else: print(green("✓ Medical codes look good")) # Check for prevalence information prevalence_warnings = check_prevalence(module_json) if prevalence_warnings: print(yellow("⚠ Possible prevalence issues:")) for warning in prevalence_warnings: print(yellow(f" - {warning}")) else: print(green("✓ Prevalence information looks good")) # Display module stats print("\nModule Statistics:") print(f"- Name: {module_json.get('name', 'Unknown')}") print(f"- GMF Version: {module_json.get('gmf_version', 'Unknown')}") print(f"- States: {len(module_json.get('states', {}))}") print(f"- Remarks: {len(module_json.get('remarks', []))}") except Exception as e: print(red(f"Error: {e}")) sys.exit(1) if __name__ == "__main__": main()