305 lines
11 KiB
Python
Executable File
305 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Disease Module Validator for Synthea
|
|
|
|
This script validates a Synthea disease module for JSON correctness and
|
|
checks for common issues in the module structure.
|
|
|
|
Usage:
|
|
python validate_module.py <path_to_module.json>
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import os
|
|
import re
|
|
|
|
def color_text(text, color_code):
|
|
"""Add color to terminal output"""
|
|
return f"\033[{color_code}m{text}\033[0m"
|
|
|
|
def red(text):
|
|
return color_text(text, "91")
|
|
|
|
def green(text):
|
|
return color_text(text, "92")
|
|
|
|
def yellow(text):
|
|
return color_text(text, "93")
|
|
|
|
def check_required_fields(module_json):
|
|
"""Check if the module has all required fields"""
|
|
required_fields = ['name', 'states', 'gmf_version']
|
|
missing_fields = []
|
|
|
|
for field in required_fields:
|
|
if field not in module_json:
|
|
missing_fields.append(field)
|
|
|
|
return missing_fields
|
|
|
|
def check_transitions(module_json):
|
|
"""Check if all transitions are valid"""
|
|
errors = []
|
|
|
|
# Skip if no states
|
|
if 'states' not in module_json:
|
|
return ["No 'states' field found"]
|
|
|
|
states = module_json['states']
|
|
state_names = set(states.keys())
|
|
|
|
# Check each state
|
|
for state_name, state_config in states.items():
|
|
# Skip Terminal states (they should not have transitions)
|
|
if state_config.get('type') == 'Terminal':
|
|
continue
|
|
|
|
# Check if the state has any transition
|
|
transition_found = False
|
|
transition_types = ['direct_transition', 'distributed_transition',
|
|
'conditional_transition', 'complex_transition']
|
|
|
|
for transition_type in transition_types:
|
|
if transition_type in state_config:
|
|
transition_found = True
|
|
|
|
# Check direct transitions
|
|
if transition_type == 'direct_transition':
|
|
target = state_config[transition_type]
|
|
if target not in state_names:
|
|
errors.append(f"State '{state_name}' has invalid direct_transition to non-existent state '{target}'")
|
|
|
|
# Check distributed transitions
|
|
elif transition_type == 'distributed_transition':
|
|
for transition in state_config[transition_type]:
|
|
if 'transition' in transition and transition['transition'] not in state_names:
|
|
errors.append(f"State '{state_name}' has invalid distributed_transition to non-existent state '{transition['transition']}'")
|
|
|
|
# Check conditional transitions
|
|
elif transition_type == 'conditional_transition':
|
|
for transition in state_config[transition_type]:
|
|
if 'transition' in transition and transition['transition'] not in state_names:
|
|
errors.append(f"State '{state_name}' has invalid conditional_transition to non-existent state '{transition['transition']}'")
|
|
|
|
# Check complex transitions
|
|
elif transition_type == 'complex_transition':
|
|
for transition in state_config[transition_type]:
|
|
if 'transition' in transition and transition['transition'] not in state_names:
|
|
errors.append(f"State '{state_name}' has invalid complex_transition to non-existent state '{transition['transition']}'")
|
|
if 'distributions' in transition:
|
|
for dist in transition['distributions']:
|
|
if 'transition' in dist and dist['transition'] not in state_names:
|
|
errors.append(f"State '{state_name}' has invalid complex_transition distribution to non-existent state '{dist['transition']}'")
|
|
|
|
if not transition_found and state_config.get('type') != 'Terminal':
|
|
errors.append(f"State '{state_name}' has no transition defined")
|
|
|
|
return errors
|
|
|
|
def check_codes(module_json):
|
|
"""Check if medical codes are properly formatted"""
|
|
warnings = []
|
|
|
|
json_str = json.dumps(module_json)
|
|
|
|
# Check for codes in common formats
|
|
code_patterns = {
|
|
'SNOMED-CT': r'"system":\s*"SNOMED-CT",\s*"code":\s*"[0-9]+"',
|
|
'LOINC': r'"system":\s*"LOINC",\s*"code":\s*"[0-9\\-]+"',
|
|
'RxNorm': r'"system":\s*"RxNorm",\s*"code":\s*"[0-9]+"',
|
|
'ICD-10': r'"system":\s*"ICD-10",\s*"code":\s*"[A-Z][0-9]+"'
|
|
}
|
|
|
|
for code_type, pattern in code_patterns.items():
|
|
if not re.search(pattern, json_str):
|
|
warnings.append(f"No {code_type} codes found. This may be normal depending on the module.")
|
|
|
|
return warnings
|
|
|
|
def check_prevalence(module_json):
|
|
"""Check if prevalence information exists"""
|
|
warnings = []
|
|
|
|
json_str = json.dumps(module_json)
|
|
|
|
# Check for prevalence patterns
|
|
if not any(pat in json_str for pat in ['"prevalence"', 'incidence', 'probability']):
|
|
warnings.append("No prevalence, incidence, or probability data found. This may affect realism.")
|
|
|
|
return warnings
|
|
|
|
def check_circular_references(module_json):
|
|
"""Check for circular references between states"""
|
|
errors = []
|
|
|
|
# Skip if no states
|
|
if 'states' not in module_json:
|
|
return []
|
|
|
|
states = module_json['states']
|
|
|
|
# Build a directed graph representation
|
|
graph = {}
|
|
for state_name in states:
|
|
graph[state_name] = []
|
|
|
|
# Add edges to the graph
|
|
for state_name, state_config in states.items():
|
|
# Add direct transitions
|
|
if 'direct_transition' in state_config:
|
|
target = state_config['direct_transition']
|
|
graph[state_name].append(target)
|
|
|
|
# Add distributed transitions
|
|
if 'distributed_transition' in state_config:
|
|
for transition in state_config['distributed_transition']:
|
|
if 'transition' in transition:
|
|
graph[state_name].append(transition['transition'])
|
|
|
|
# Add conditional transitions
|
|
if 'conditional_transition' in state_config:
|
|
for transition in state_config['conditional_transition']:
|
|
if 'transition' in transition:
|
|
graph[state_name].append(transition['transition'])
|
|
|
|
# Add complex transitions
|
|
if 'complex_transition' in state_config:
|
|
for transition in state_config['complex_transition']:
|
|
if 'transition' in transition:
|
|
graph[state_name].append(transition['transition'])
|
|
|
|
# Check for cycles in the graph (simplified check)
|
|
visited = set()
|
|
path = []
|
|
|
|
def dfs(node):
|
|
visited.add(node)
|
|
path.append(node)
|
|
|
|
for neighbor in graph[node]:
|
|
if neighbor in path:
|
|
# Found a cycle
|
|
cycle_start = path.index(neighbor)
|
|
errors.append(f"Circular reference detected: {' -> '.join(path[cycle_start:] + [neighbor])}")
|
|
elif neighbor not in visited:
|
|
dfs(neighbor)
|
|
|
|
path.pop()
|
|
|
|
# Run DFS from all states to find cycles
|
|
for state_name in states:
|
|
if state_name not in visited:
|
|
dfs(state_name)
|
|
|
|
return errors
|
|
|
|
def main():
|
|
if len(sys.argv) != 2:
|
|
print(f"Usage: python {sys.argv[0]} <path_to_module.json>")
|
|
sys.exit(1)
|
|
|
|
module_path = sys.argv[1]
|
|
|
|
if not os.path.exists(module_path):
|
|
print(red(f"Error: File {module_path} does not exist"))
|
|
sys.exit(1)
|
|
|
|
try:
|
|
with open(module_path, 'r') as f:
|
|
content = f.read()
|
|
|
|
# First check for valid JSON
|
|
try:
|
|
module_json = json.loads(content)
|
|
print(green("✓ Valid JSON structure"))
|
|
except json.JSONDecodeError as e:
|
|
print(red(f"✗ Invalid JSON: {e}"))
|
|
|
|
# Try to fix common issues
|
|
print(yellow("Attempting to fix common JSON issues..."))
|
|
|
|
# Fix trailing commas
|
|
fixed_content = re.sub(r',\s*}', '}', content)
|
|
fixed_content = re.sub(r',\s*]', ']', fixed_content)
|
|
|
|
# Count braces
|
|
open_braces = fixed_content.count('{')
|
|
close_braces = fixed_content.count('}')
|
|
|
|
if open_braces > close_braces:
|
|
print(yellow(f"Adding {open_braces - close_braces} missing closing braces"))
|
|
fixed_content += '}' * (open_braces - close_braces)
|
|
elif close_braces > open_braces:
|
|
print(yellow(f"Removing {close_braces - open_braces} excess closing braces"))
|
|
for _ in range(close_braces - open_braces):
|
|
fixed_content = fixed_content.rstrip().rstrip('}') + '}'
|
|
|
|
try:
|
|
module_json = json.loads(fixed_content)
|
|
print(green("✓ Fixed JSON issues successfully"))
|
|
|
|
# Write the fixed content back to the file
|
|
with open(module_path, 'w') as f:
|
|
f.write(json.dumps(module_json, indent=2))
|
|
|
|
except json.JSONDecodeError as e:
|
|
print(red(f"✗ Could not fix JSON: {e}"))
|
|
sys.exit(1)
|
|
|
|
# Check required fields
|
|
missing_fields = check_required_fields(module_json)
|
|
if missing_fields:
|
|
print(red(f"✗ Missing required fields: {', '.join(missing_fields)}"))
|
|
else:
|
|
print(green("✓ All required fields present"))
|
|
|
|
# Check for invalid transitions
|
|
transition_errors = check_transitions(module_json)
|
|
if transition_errors:
|
|
print(red("✗ Invalid transitions found:"))
|
|
for error in transition_errors:
|
|
print(red(f" - {error}"))
|
|
else:
|
|
print(green("✓ All transitions valid"))
|
|
|
|
# Check for circular references
|
|
circular_errors = check_circular_references(module_json)
|
|
if circular_errors:
|
|
print(red("✗ Circular references found:"))
|
|
for error in circular_errors:
|
|
print(red(f" - {error}"))
|
|
else:
|
|
print(green("✓ No circular references detected"))
|
|
|
|
# Check for medical codes
|
|
code_warnings = check_codes(module_json)
|
|
if code_warnings:
|
|
print(yellow("⚠ Possible code issues:"))
|
|
for warning in code_warnings:
|
|
print(yellow(f" - {warning}"))
|
|
else:
|
|
print(green("✓ Medical codes look good"))
|
|
|
|
# Check for prevalence information
|
|
prevalence_warnings = check_prevalence(module_json)
|
|
if prevalence_warnings:
|
|
print(yellow("⚠ Possible prevalence issues:"))
|
|
for warning in prevalence_warnings:
|
|
print(yellow(f" - {warning}"))
|
|
else:
|
|
print(green("✓ Prevalence information looks good"))
|
|
|
|
# Display module stats
|
|
print("\nModule Statistics:")
|
|
print(f"- Name: {module_json.get('name', 'Unknown')}")
|
|
print(f"- GMF Version: {module_json.get('gmf_version', 'Unknown')}")
|
|
print(f"- States: {len(module_json.get('states', {}))}")
|
|
print(f"- Remarks: {len(module_json.get('remarks', []))}")
|
|
|
|
except Exception as e:
|
|
print(red(f"Error: {e}"))
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main() |