From d1c471fa2a1018ed4f8fc478f4df7bef91691149 Mon Sep 17 00:00:00 2001 From: Olamide Isreal Date: Wed, 1 Apr 2026 09:10:53 +0100 Subject: [PATCH] Add stage_data.nf to download large files directly to PVC from alien server --- stage_data.nf | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 stage_data.nf diff --git a/stage_data.nf b/stage_data.nf new file mode 100644 index 0000000..0125987 --- /dev/null +++ b/stage_data.nf @@ -0,0 +1,89 @@ +nextflow.enable.dsl=2 + +process STAGE_DATA { + container 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf' + memory '2 GB' + + script: + """ + BASE="/omic/eureka/digital-patients" + + echo "=== Cleaning up old _parts directories ===" + find \$BASE -name "*_parts" -type d -exec rm -rf {} + 2>/dev/null || true + echo "Cleanup done" + + echo "=== Current state of PVC ===" + ls -la \$BASE/ 2>/dev/null || echo "Base dir missing" + ls -la \$BASE/imputed/ 2>/dev/null || echo "imputed dir empty/missing" + ls -la \$BASE/healthy/ 2>/dev/null || echo "healthy dir empty/missing" + ls -la \$BASE/supporting-data/genome/ 2>/dev/null || echo "genome dir empty/missing" + + echo "=== Downloading files from alien server ===" + apt-get update -qq && apt-get install -y -qq sshpass > /dev/null 2>&1 || true + + # Function to download via sshpass+scp + download() { + local remote="\$1" + local local_path="\$2" + mkdir -p \$(dirname "\$local_path") + echo "Downloading: \$remote -> \$local_path" + sshpass -p 'bl3rg3r5' scp -o StrictHostKeyChecking=no -P 9100 "omic@nucleus.omic.ai:\$remote" "\$local_path" + ls -lh "\$local_path" + } + + # Imputed files (schizophrenia) + if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" ]; then + download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" + else + echo "SKIP: F5_SCHIZO both_sexes already present" + fi + + if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" ]; then + download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" + else + echo "SKIP: F5_SCHIZO female already present" + fi + + if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" ]; then + download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" + else + echo "SKIP: F5_SCHIZO male already present" + fi + + # Gnomad files + if [ ! -f "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt" ]; then + download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.female.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt" + else + echo "SKIP: gnomad female already present" + fi + + if [ ! -f "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt" ]; then + download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.male.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt" + else + echo "SKIP: gnomad male already present" + fi + + # hg38.fa (3.3GB) + if [ ! -f "\$BASE/supporting-data/genome/hg38.fa" ]; then + download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.fa" "\$BASE/supporting-data/genome/hg38.fa" + else + echo "SKIP: hg38.fa already present" + fi + + echo "=== Final verification ===" + echo "Imputed:" + ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " NONE" + echo "Healthy:" + ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " NONE" + echo "Genome:" + ls -lh \$BASE/supporting-data/genome/hg38.* 2>/dev/null || echo " NONE" + echo "Small files:" + ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " NONE" + + echo "=== STAGING COMPLETE ===" + """ +} + +workflow { + STAGE_DATA() +}