Trying to fix basic functionality again.
This commit is contained in:
262
scripts/cleanup.sh
Executable file
262
scripts/cleanup.sh
Executable file
@@ -0,0 +1,262 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Synthea-All-Diseases Repository Cleanup Script
|
||||
# This script removes unnecessary files and keeps only the essential ones for the Synthea module generator
|
||||
|
||||
echo "Starting repository cleanup..."
|
||||
|
||||
# Create backup directory
|
||||
mkdir -p backup
|
||||
mkdir -p backup/scripts # Create a directory for Python scripts
|
||||
mkdir -p backup/modules # Create a directory for modules
|
||||
mkdir -p backup/module_generator # Create a directory for module generator scripts
|
||||
|
||||
# 1. Keep essential Nextflow files, remove others
|
||||
echo "Handling Nextflow files..."
|
||||
cp main.nf backup/
|
||||
rm -f build_synthea.nf synthea_module_generator_old.nf minimal_test.nf fresh_synthea.nf simple_synthea.nf test_workflow.nf new_synthea.nf minimal_working.nf
|
||||
# Keep synthea_module_generator.nf around for reference but not used
|
||||
mv synthea_module_generator.nf backup/
|
||||
|
||||
# 2. Keep essential Python scripts, Docker files, and configuration files
|
||||
echo "Keeping essential files..."
|
||||
if [ -d "src/main/python" ]; then
|
||||
cp -r src/main/python backup/module_generator/
|
||||
fi
|
||||
if [ -d "src/main/resources/modules" ]; then
|
||||
cp -r src/main/resources/modules/* backup/modules/
|
||||
fi
|
||||
cp Dockerfile docker-compose.yml CLAUDE.md .env.example nextflow.config README.md backup/
|
||||
cp DOCKER_README.md SYNTHEA_GUIDE.md backup/ 2>/dev/null || : # Keep additional README files
|
||||
# Backup .env file if it exists
|
||||
if [ -f ".env" ]; then
|
||||
cp .env backup/
|
||||
fi
|
||||
|
||||
# 3. Back up all Python scripts in the root directory
|
||||
echo "Backing up Python scripts..."
|
||||
# Key scripts - these will be kept in the scripts directory after cleanup
|
||||
# The only key script we'll keep in the root is check_condition_structure.py for convenience
|
||||
key_scripts=("check_condition_structure.py")
|
||||
for script in "${key_scripts[@]}"; do
|
||||
if [ -f "$script" ]; then
|
||||
cp "$script" backup/
|
||||
echo " Backed up key script for root: $script"
|
||||
fi
|
||||
done
|
||||
|
||||
# All other Python scripts - these will be moved to scripts/ directory
|
||||
for script in *.py; do
|
||||
if [ -f "$script" ] && [[ ! " ${key_scripts[@]} " =~ " ${script} " ]]; then
|
||||
cp "$script" backup/scripts/
|
||||
echo " Backed up utility script: $script"
|
||||
fi
|
||||
done
|
||||
|
||||
# 4. Remove ALL Synthea code - we'll clone it in Docker
|
||||
echo "Removing ALL Synthea source code..."
|
||||
echo "This will be cloned during Docker build based on Dockerfile..."
|
||||
# Remove the entire src directory - we'll recreate what we need
|
||||
rm -rf src/
|
||||
# Remove any Synthea build files/directories
|
||||
rm -rf build/ output/ simulator/ lib/ logs/
|
||||
# Remove any Synthea run files
|
||||
rm -f run_synthea run_synthea.bat synthea *.jar
|
||||
|
||||
# 5. Remove other experimental or temporary files
|
||||
echo "Removing experimental and temporary files..."
|
||||
rm -f *.log *.txt trace.txt
|
||||
rm -f error_output.txt generate_module.sh run_synthetic_data_generation.sh
|
||||
rm -f build_docker.sh entrypoint.sh run_module_generator.sh generate_samples.sh test_run.sh
|
||||
rm -f run_flexporter .DS_Store
|
||||
|
||||
# 6. Remove gradle files and other unnecessary files
|
||||
echo "Removing additional unnecessary files..."
|
||||
rm -f build.gradle gradlew gradlew.bat settings.gradle
|
||||
rm -rf gradle/ .gradle/
|
||||
# Note: Keeping .git, .gitignore, .github for version control
|
||||
rm -rf .nextflow/ .nextflow.log* nextflow-*.zip
|
||||
# Keep LICENSE files but remove other documentation that will be included from Synthea
|
||||
rm -f CODE_OF_CONDUCT.md NOTICE
|
||||
rm -f *.bak *~
|
||||
|
||||
# 7. Create clean directory structure for the repository
|
||||
echo "Creating clean directory structure..."
|
||||
mkdir -p modules # Top-level modules directory
|
||||
mkdir -p module_generator # Module generator directory (renamed from python)
|
||||
mkdir -p scripts # Utility scripts directory
|
||||
|
||||
# 8. Restore files to the cleaned structure
|
||||
echo "Restoring files to cleaned structure..."
|
||||
|
||||
# Restore modules
|
||||
if [ -d "backup/modules" ]; then
|
||||
cp -r backup/modules/* modules/ 2>/dev/null || :
|
||||
echo " Restored modules to modules/ directory"
|
||||
fi
|
||||
|
||||
# Restore Module generator core scripts
|
||||
if [ -d "backup/module_generator" ]; then
|
||||
cp -r backup/module_generator/* module_generator/ 2>/dev/null || :
|
||||
echo " Restored core generator scripts to module_generator/ directory"
|
||||
fi
|
||||
|
||||
# Restore key Python scripts to root directory
|
||||
for script in "${key_scripts[@]}"; do
|
||||
if [ -f "backup/$script" ]; then
|
||||
cp "backup/$script" ./
|
||||
echo " Restored key script to root: $script"
|
||||
fi
|
||||
done
|
||||
|
||||
# Restore utility Python scripts to scripts/ directory
|
||||
if [ -d "backup/scripts" ]; then
|
||||
cp backup/scripts/* scripts/ 2>/dev/null || :
|
||||
echo " Restored utility scripts to scripts/ directory"
|
||||
fi
|
||||
|
||||
# Restore root files
|
||||
cp backup/main.nf backup/Dockerfile backup/docker-compose.yml backup/CLAUDE.md backup/nextflow.config backup/.env.example backup/README.md ./ 2>/dev/null || :
|
||||
if [ -f "backup/DOCKER_README.md" ]; then
|
||||
cp backup/DOCKER_README.md ./
|
||||
fi
|
||||
if [ -f "backup/SYNTHEA_GUIDE.md" ]; then
|
||||
cp backup/SYNTHEA_GUIDE.md ./
|
||||
fi
|
||||
# Restore .env if it existed
|
||||
if [ -f "backup/.env" ]; then
|
||||
cp backup/.env ./
|
||||
echo " Restored .env file"
|
||||
fi
|
||||
|
||||
# 9. Update the Dockerfile to reference the new directory structure
|
||||
echo "Updating Dockerfile references if needed..."
|
||||
if [ -f "Dockerfile" ]; then
|
||||
# Update the Dockerfile to use the new directory structure
|
||||
sed -i.bak 's|COPY src/main/python/|COPY module_generator/|g' Dockerfile
|
||||
sed -i.bak 's|COPY python/|COPY module_generator/|g' Dockerfile
|
||||
sed -i.bak 's|COPY src/main/resources/modules/|COPY modules/|g' Dockerfile
|
||||
sed -i.bak 's|src/main/python/|module_generator/|g' Dockerfile
|
||||
sed -i.bak 's|src/main/resources/modules|modules|g' Dockerfile
|
||||
# Update script paths if needed
|
||||
sed -i.bak 's|/app/src/main/python/|/app/module_generator/|g' Dockerfile
|
||||
rm -f Dockerfile.bak
|
||||
fi
|
||||
|
||||
# 10. Update the main.nf file to reference the new directory structure
|
||||
echo "Updating main.nf references if needed..."
|
||||
if [ -f "main.nf" ]; then
|
||||
# Update the main.nf file to use the new directory structure
|
||||
sed -i.bak 's|src/main/resources/modules|modules|g' main.nf
|
||||
sed -i.bak 's|src/main/python|module_generator|g' main.nf
|
||||
sed -i.bak 's|python/|module_generator/|g' main.nf
|
||||
rm -f main.nf.bak
|
||||
fi
|
||||
|
||||
# 11. Create a simple README for the scripts directory
|
||||
echo "Creating README for scripts directory..."
|
||||
cat > scripts/README.md << 'EOF'
|
||||
# Utility Scripts
|
||||
|
||||
This directory contains utility scripts for working with Synthea modules:
|
||||
|
||||
- `analyze_patient_data.py` - Analyzes patient data generated by Synthea
|
||||
- `check_json.py` - Validates JSON structure of module files
|
||||
- `validate_module.py` - Performs comprehensive validation of modules
|
||||
- `test_module_exists.py` - Checks if a module exists for a given disease
|
||||
- `run_module.py` - Script to run a specific module with Synthea
|
||||
- Other utility scripts for module generation and testing
|
||||
|
||||
These scripts complement the core module generator scripts found in the `module_generator/` directory.
|
||||
EOF
|
||||
|
||||
# Update README.md with instructions on using the cleaned repo
|
||||
echo "Updating README..."
|
||||
cat > README.md << 'EOF'
|
||||
# Synthea All Diseases
|
||||
|
||||
This repository contains a workflow for generating Synthea disease modules using Claude AI and generating synthetic patient data.
|
||||
|
||||
## Repository Structure
|
||||
|
||||
- `module_generator/` - Core module generation scripts
|
||||
- `modules/` - Generated disease modules
|
||||
- `scripts/` - Utility Python scripts and tools
|
||||
- `main.nf` - Nextflow workflow for module generation and patient data generation
|
||||
- `Dockerfile` and `docker-compose.yml` - Docker configuration
|
||||
|
||||
## Key Files
|
||||
|
||||
- `check_condition_structure.py` - Validates condition structure in modules
|
||||
- `main.nf` - Main workflow file for generating modules and patient data
|
||||
|
||||
## Running the Pipeline
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Docker and Docker Compose
|
||||
- Nextflow
|
||||
- Anthropic API key (for Claude AI)
|
||||
|
||||
### Setup
|
||||
|
||||
1. Clone this repository
|
||||
2. Copy `.env.example` to `.env` and add your Anthropic API key
|
||||
3. Run with Docker Compose:
|
||||
|
||||
```
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
### Running the Workflow
|
||||
|
||||
```
|
||||
nextflow run main.nf --disease_name "Disease Name" [OPTIONS]
|
||||
```
|
||||
|
||||
For a full list of options, run:
|
||||
|
||||
```
|
||||
nextflow run main.nf --help
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
- Generate modules for diseases that don't exist in Synthea
|
||||
- Validate condition structure in generated modules
|
||||
- Generate synthetic patient data using the modules
|
||||
- Analyze generated patient data
|
||||
|
||||
## Documentation
|
||||
|
||||
- See `CLAUDE.md` for additional development guidelines
|
||||
- See `DOCKER_README.md` for Docker setup and usage
|
||||
- See `SYNTHEA_GUIDE.md` for detailed usage examples and scenarios
|
||||
- See `scripts/README.md` for information about utility scripts
|
||||
|
||||
## License
|
||||
|
||||
This project uses the same license as Synthea.
|
||||
EOF
|
||||
|
||||
# 12. Update docker-compose.yml to reference the new directory structure
|
||||
echo "Updating docker-compose.yml references if needed..."
|
||||
if [ -f "docker-compose.yml" ]; then
|
||||
# No changes needed as it uses relative paths and volume mounts
|
||||
echo " No changes needed for docker-compose.yml"
|
||||
fi
|
||||
|
||||
echo "Cleanup complete! All essential files have been kept, and unnecessary files have been removed."
|
||||
echo "Repository structure has been simplified to:"
|
||||
echo " - module_generator/ (core module generation scripts)"
|
||||
echo " - modules/ (disease modules)"
|
||||
echo " - scripts/ (utility scripts)"
|
||||
echo " - check_condition_structure.py (in root for easy access)"
|
||||
echo ""
|
||||
echo "The following important files were preserved:"
|
||||
echo " - .git/ (version control repository)"
|
||||
echo " - .gitignore (version control configuration)"
|
||||
echo " - .env (environment configuration if it existed)"
|
||||
echo " - .github/ (GitHub configuration if it existed)"
|
||||
echo ""
|
||||
echo "Optional: Remove the backup directory after verifying everything works: rm -rf backup"
|
||||
Reference in New Issue
Block a user