Files
digital-patients/stage_data.nf

90 lines
3.4 KiB
Plaintext

nextflow.enable.dsl=2
process STAGE_DATA {
container 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf'
memory '2 GB'
script:
"""
BASE="/omic/eureka/digital-patients"
echo "=== Cleaning up old _parts directories ==="
find \$BASE -name "*_parts" -type d -exec rm -rf {} + 2>/dev/null || true
echo "Cleanup done"
echo "=== Current state of PVC ==="
ls -la \$BASE/ 2>/dev/null || echo "Base dir missing"
ls -la \$BASE/imputed/ 2>/dev/null || echo "imputed dir empty/missing"
ls -la \$BASE/healthy/ 2>/dev/null || echo "healthy dir empty/missing"
ls -la \$BASE/supporting-data/genome/ 2>/dev/null || echo "genome dir empty/missing"
echo "=== Downloading files from alien server ==="
apt-get update -qq && apt-get install -y -qq sshpass > /dev/null 2>&1 || true
# Function to download via sshpass+scp
download() {
local remote="\$1"
local local_path="\$2"
mkdir -p \$(dirname "\$local_path")
echo "Downloading: \$remote -> \$local_path"
sshpass -p 'bl3rg3r5' scp -o StrictHostKeyChecking=no -P 9100 "omic@nucleus.omic.ai:\$remote" "\$local_path"
ls -lh "\$local_path"
}
# Imputed files (schizophrenia)
if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" ]; then
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz"
else
echo "SKIP: F5_SCHIZO both_sexes already present"
fi
if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" ]; then
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz"
else
echo "SKIP: F5_SCHIZO female already present"
fi
if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" ]; then
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz"
else
echo "SKIP: F5_SCHIZO male already present"
fi
# Gnomad files
if [ ! -f "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt" ]; then
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.female.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt"
else
echo "SKIP: gnomad female already present"
fi
if [ ! -f "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt" ]; then
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.male.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt"
else
echo "SKIP: gnomad male already present"
fi
# hg38.fa (3.3GB)
if [ ! -f "\$BASE/supporting-data/genome/hg38.fa" ]; then
download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.fa" "\$BASE/supporting-data/genome/hg38.fa"
else
echo "SKIP: hg38.fa already present"
fi
echo "=== Final verification ==="
echo "Imputed:"
ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " NONE"
echo "Healthy:"
ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " NONE"
echo "Genome:"
ls -lh \$BASE/supporting-data/genome/hg38.* 2>/dev/null || echo " NONE"
echo "Small files:"
ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " NONE"
echo "=== STAGING COMPLETE ==="
"""
}
workflow {
STAGE_DATA()
}