nextflow.enable.dsl=2 process STAGE_DATA { container 'ubuntu:22.04' memory '4 GB' cpus 2 script: """ BASE="/omic/eureka/digital-patients" echo "=== Installing dependencies ===" apt-get update -qq && apt-get install -y -qq openssh-client sshpass curl > /dev/null 2>&1 echo "=== Cleaning up old _parts directories ===" find \$BASE -name "*_parts" -type d -exec rm -rf {} + 2>/dev/null || true echo "Cleanup done" echo "=== Creating directory structure ===" mkdir -p \$BASE/imputed \$BASE/healthy \$BASE/supporting-data/vcf \$BASE/supporting-data/ucsc-liftover \$BASE/supporting-data/genome \$BASE/output echo "=== Current state of PVC ===" ls -la \$BASE/ 2>/dev/null download() { local remote="\$1" local local_path="\$2" if [ -f "\$local_path" ]; then echo "SKIP (exists): \$local_path" ls -lh "\$local_path" return fi echo "Downloading: \$remote -> \$local_path" sshpass -p 'bl3rg3r5' scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -P 9100 "omic@nucleus.omic.ai:\$remote" "\$local_path" ls -lh "\$local_path" } echo "=== Downloading imputed files ===" download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" echo "=== Downloading gnomad files ===" download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.female.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt" download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.male.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt" echo "=== Downloading supporting-data files ===" download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.fa" "\$BASE/supporting-data/genome/hg38.fa" download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.dict" "\$BASE/supporting-data/genome/hg38.dict" download "/mnt/Avatar/dd/synthea/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz" "\$BASE/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz" download "/mnt/Avatar/dd/synthea/supporting-data/vcf/vcf_template.vcf" "\$BASE/supporting-data/vcf/vcf_template.vcf" echo "=== Downloading phenotype file ===" download "/mnt/Avatar/imputed/ukbb/metadata/ukbb_phenotypes.csv" "\$BASE/ukbb_phenotypes_filtered.csv" echo "=== Copying small files from DRS (already on PVC via DRS upload) ===" # These were uploaded via DRS and should already be on PVC # If not, they're in the git repo that WES cloned for f in MANE.GRCh38.v1.3.update.tsv regulon.rda LM22_sourceGEP_ensg.txt; do if [ ! -f "\$BASE/\$f" ]; then echo "Small file missing on PVC, checking if available from WES workdir..." # WES clones the repo, so the file might be in the current workdir's repo else echo "SKIP (exists): \$BASE/\$f" fi done echo "=== Final verification ===" echo "Imputed:" ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " NONE" echo "Healthy:" ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " NONE" echo "Genome:" ls -lh \$BASE/supporting-data/genome/* 2>/dev/null || echo " NONE" echo "Supporting-data:" ls -lh \$BASE/supporting-data/vcf/* \$BASE/supporting-data/ucsc-liftover/* 2>/dev/null || echo " NONE" echo "Small files:" ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " NONE" echo "=== STAGING COMPLETE ===" """ } workflow { STAGE_DATA() }