Trying to fix basic functionality again.
This commit is contained in:
305
scripts/validate_module.py
Executable file
305
scripts/validate_module.py
Executable file
@@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Disease Module Validator for Synthea
|
||||
|
||||
This script validates a Synthea disease module for JSON correctness and
|
||||
checks for common issues in the module structure.
|
||||
|
||||
Usage:
|
||||
python validate_module.py <path_to_module.json>
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
def color_text(text, color_code):
|
||||
"""Add color to terminal output"""
|
||||
return f"\033[{color_code}m{text}\033[0m"
|
||||
|
||||
def red(text):
|
||||
return color_text(text, "91")
|
||||
|
||||
def green(text):
|
||||
return color_text(text, "92")
|
||||
|
||||
def yellow(text):
|
||||
return color_text(text, "93")
|
||||
|
||||
def check_required_fields(module_json):
|
||||
"""Check if the module has all required fields"""
|
||||
required_fields = ['name', 'states', 'gmf_version']
|
||||
missing_fields = []
|
||||
|
||||
for field in required_fields:
|
||||
if field not in module_json:
|
||||
missing_fields.append(field)
|
||||
|
||||
return missing_fields
|
||||
|
||||
def check_transitions(module_json):
|
||||
"""Check if all transitions are valid"""
|
||||
errors = []
|
||||
|
||||
# Skip if no states
|
||||
if 'states' not in module_json:
|
||||
return ["No 'states' field found"]
|
||||
|
||||
states = module_json['states']
|
||||
state_names = set(states.keys())
|
||||
|
||||
# Check each state
|
||||
for state_name, state_config in states.items():
|
||||
# Skip Terminal states (they should not have transitions)
|
||||
if state_config.get('type') == 'Terminal':
|
||||
continue
|
||||
|
||||
# Check if the state has any transition
|
||||
transition_found = False
|
||||
transition_types = ['direct_transition', 'distributed_transition',
|
||||
'conditional_transition', 'complex_transition']
|
||||
|
||||
for transition_type in transition_types:
|
||||
if transition_type in state_config:
|
||||
transition_found = True
|
||||
|
||||
# Check direct transitions
|
||||
if transition_type == 'direct_transition':
|
||||
target = state_config[transition_type]
|
||||
if target not in state_names:
|
||||
errors.append(f"State '{state_name}' has invalid direct_transition to non-existent state '{target}'")
|
||||
|
||||
# Check distributed transitions
|
||||
elif transition_type == 'distributed_transition':
|
||||
for transition in state_config[transition_type]:
|
||||
if 'transition' in transition and transition['transition'] not in state_names:
|
||||
errors.append(f"State '{state_name}' has invalid distributed_transition to non-existent state '{transition['transition']}'")
|
||||
|
||||
# Check conditional transitions
|
||||
elif transition_type == 'conditional_transition':
|
||||
for transition in state_config[transition_type]:
|
||||
if 'transition' in transition and transition['transition'] not in state_names:
|
||||
errors.append(f"State '{state_name}' has invalid conditional_transition to non-existent state '{transition['transition']}'")
|
||||
|
||||
# Check complex transitions
|
||||
elif transition_type == 'complex_transition':
|
||||
for transition in state_config[transition_type]:
|
||||
if 'transition' in transition and transition['transition'] not in state_names:
|
||||
errors.append(f"State '{state_name}' has invalid complex_transition to non-existent state '{transition['transition']}'")
|
||||
if 'distributions' in transition:
|
||||
for dist in transition['distributions']:
|
||||
if 'transition' in dist and dist['transition'] not in state_names:
|
||||
errors.append(f"State '{state_name}' has invalid complex_transition distribution to non-existent state '{dist['transition']}'")
|
||||
|
||||
if not transition_found and state_config.get('type') != 'Terminal':
|
||||
errors.append(f"State '{state_name}' has no transition defined")
|
||||
|
||||
return errors
|
||||
|
||||
def check_codes(module_json):
|
||||
"""Check if medical codes are properly formatted"""
|
||||
warnings = []
|
||||
|
||||
json_str = json.dumps(module_json)
|
||||
|
||||
# Check for codes in common formats
|
||||
code_patterns = {
|
||||
'SNOMED-CT': r'"system":\s*"SNOMED-CT",\s*"code":\s*"[0-9]+"',
|
||||
'LOINC': r'"system":\s*"LOINC",\s*"code":\s*"[0-9\\-]+"',
|
||||
'RxNorm': r'"system":\s*"RxNorm",\s*"code":\s*"[0-9]+"',
|
||||
'ICD-10': r'"system":\s*"ICD-10",\s*"code":\s*"[A-Z][0-9]+"'
|
||||
}
|
||||
|
||||
for code_type, pattern in code_patterns.items():
|
||||
if not re.search(pattern, json_str):
|
||||
warnings.append(f"No {code_type} codes found. This may be normal depending on the module.")
|
||||
|
||||
return warnings
|
||||
|
||||
def check_prevalence(module_json):
|
||||
"""Check if prevalence information exists"""
|
||||
warnings = []
|
||||
|
||||
json_str = json.dumps(module_json)
|
||||
|
||||
# Check for prevalence patterns
|
||||
if not any(pat in json_str for pat in ['"prevalence"', 'incidence', 'probability']):
|
||||
warnings.append("No prevalence, incidence, or probability data found. This may affect realism.")
|
||||
|
||||
return warnings
|
||||
|
||||
def check_circular_references(module_json):
|
||||
"""Check for circular references between states"""
|
||||
errors = []
|
||||
|
||||
# Skip if no states
|
||||
if 'states' not in module_json:
|
||||
return []
|
||||
|
||||
states = module_json['states']
|
||||
|
||||
# Build a directed graph representation
|
||||
graph = {}
|
||||
for state_name in states:
|
||||
graph[state_name] = []
|
||||
|
||||
# Add edges to the graph
|
||||
for state_name, state_config in states.items():
|
||||
# Add direct transitions
|
||||
if 'direct_transition' in state_config:
|
||||
target = state_config['direct_transition']
|
||||
graph[state_name].append(target)
|
||||
|
||||
# Add distributed transitions
|
||||
if 'distributed_transition' in state_config:
|
||||
for transition in state_config['distributed_transition']:
|
||||
if 'transition' in transition:
|
||||
graph[state_name].append(transition['transition'])
|
||||
|
||||
# Add conditional transitions
|
||||
if 'conditional_transition' in state_config:
|
||||
for transition in state_config['conditional_transition']:
|
||||
if 'transition' in transition:
|
||||
graph[state_name].append(transition['transition'])
|
||||
|
||||
# Add complex transitions
|
||||
if 'complex_transition' in state_config:
|
||||
for transition in state_config['complex_transition']:
|
||||
if 'transition' in transition:
|
||||
graph[state_name].append(transition['transition'])
|
||||
|
||||
# Check for cycles in the graph (simplified check)
|
||||
visited = set()
|
||||
path = []
|
||||
|
||||
def dfs(node):
|
||||
visited.add(node)
|
||||
path.append(node)
|
||||
|
||||
for neighbor in graph[node]:
|
||||
if neighbor in path:
|
||||
# Found a cycle
|
||||
cycle_start = path.index(neighbor)
|
||||
errors.append(f"Circular reference detected: {' -> '.join(path[cycle_start:] + [neighbor])}")
|
||||
elif neighbor not in visited:
|
||||
dfs(neighbor)
|
||||
|
||||
path.pop()
|
||||
|
||||
# Run DFS from all states to find cycles
|
||||
for state_name in states:
|
||||
if state_name not in visited:
|
||||
dfs(state_name)
|
||||
|
||||
return errors
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
print(f"Usage: python {sys.argv[0]} <path_to_module.json>")
|
||||
sys.exit(1)
|
||||
|
||||
module_path = sys.argv[1]
|
||||
|
||||
if not os.path.exists(module_path):
|
||||
print(red(f"Error: File {module_path} does not exist"))
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
with open(module_path, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# First check for valid JSON
|
||||
try:
|
||||
module_json = json.loads(content)
|
||||
print(green("✓ Valid JSON structure"))
|
||||
except json.JSONDecodeError as e:
|
||||
print(red(f"✗ Invalid JSON: {e}"))
|
||||
|
||||
# Try to fix common issues
|
||||
print(yellow("Attempting to fix common JSON issues..."))
|
||||
|
||||
# Fix trailing commas
|
||||
fixed_content = re.sub(r',\s*}', '}', content)
|
||||
fixed_content = re.sub(r',\s*]', ']', fixed_content)
|
||||
|
||||
# Count braces
|
||||
open_braces = fixed_content.count('{')
|
||||
close_braces = fixed_content.count('}')
|
||||
|
||||
if open_braces > close_braces:
|
||||
print(yellow(f"Adding {open_braces - close_braces} missing closing braces"))
|
||||
fixed_content += '}' * (open_braces - close_braces)
|
||||
elif close_braces > open_braces:
|
||||
print(yellow(f"Removing {close_braces - open_braces} excess closing braces"))
|
||||
for _ in range(close_braces - open_braces):
|
||||
fixed_content = fixed_content.rstrip().rstrip('}') + '}'
|
||||
|
||||
try:
|
||||
module_json = json.loads(fixed_content)
|
||||
print(green("✓ Fixed JSON issues successfully"))
|
||||
|
||||
# Write the fixed content back to the file
|
||||
with open(module_path, 'w') as f:
|
||||
f.write(json.dumps(module_json, indent=2))
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(red(f"✗ Could not fix JSON: {e}"))
|
||||
sys.exit(1)
|
||||
|
||||
# Check required fields
|
||||
missing_fields = check_required_fields(module_json)
|
||||
if missing_fields:
|
||||
print(red(f"✗ Missing required fields: {', '.join(missing_fields)}"))
|
||||
else:
|
||||
print(green("✓ All required fields present"))
|
||||
|
||||
# Check for invalid transitions
|
||||
transition_errors = check_transitions(module_json)
|
||||
if transition_errors:
|
||||
print(red("✗ Invalid transitions found:"))
|
||||
for error in transition_errors:
|
||||
print(red(f" - {error}"))
|
||||
else:
|
||||
print(green("✓ All transitions valid"))
|
||||
|
||||
# Check for circular references
|
||||
circular_errors = check_circular_references(module_json)
|
||||
if circular_errors:
|
||||
print(red("✗ Circular references found:"))
|
||||
for error in circular_errors:
|
||||
print(red(f" - {error}"))
|
||||
else:
|
||||
print(green("✓ No circular references detected"))
|
||||
|
||||
# Check for medical codes
|
||||
code_warnings = check_codes(module_json)
|
||||
if code_warnings:
|
||||
print(yellow("⚠ Possible code issues:"))
|
||||
for warning in code_warnings:
|
||||
print(yellow(f" - {warning}"))
|
||||
else:
|
||||
print(green("✓ Medical codes look good"))
|
||||
|
||||
# Check for prevalence information
|
||||
prevalence_warnings = check_prevalence(module_json)
|
||||
if prevalence_warnings:
|
||||
print(yellow("⚠ Possible prevalence issues:"))
|
||||
for warning in prevalence_warnings:
|
||||
print(yellow(f" - {warning}"))
|
||||
else:
|
||||
print(green("✓ Prevalence information looks good"))
|
||||
|
||||
# Display module stats
|
||||
print("\nModule Statistics:")
|
||||
print(f"- Name: {module_json.get('name', 'Unknown')}")
|
||||
print(f"- GMF Version: {module_json.get('gmf_version', 'Unknown')}")
|
||||
print(f"- States: {len(module_json.get('states', {}))}")
|
||||
print(f"- Remarks: {len(module_json.get('remarks', []))}")
|
||||
|
||||
except Exception as e:
|
||||
print(red(f"Error: {e}"))
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user