Files
digital-patients/stage_data.nf

83 lines
3.5 KiB
Plaintext

nextflow.enable.dsl=2
process STAGE_DATA {
container 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf'
memory '4 GB'
cpus 2
script:
"""
set +e
BASE="/omic/eureka/digital-patients"
echo "=== Cleaning up old _parts directories ==="
find \$BASE -name "*_parts" -type d -exec rm -rf {} + 2>/dev/null
echo "Cleanup done"
echo "=== Creating directory structure ==="
mkdir -p \$BASE/imputed \$BASE/healthy \$BASE/supporting-data/vcf \$BASE/supporting-data/ucsc-liftover \$BASE/supporting-data/genome \$BASE/output
echo "Dirs created"
echo "=== Current PVC state ==="
ls -la \$BASE/ 2>/dev/null || echo "Base dir issue"
echo "=== Installing sshpass ==="
conda install -y -c conda-forge sshpass 2>&1 | tail -3
which sshpass && echo "sshpass OK" || echo "sshpass NOT FOUND"
download() {
remote="\$1"
local_path="\$2"
if [ -f "\$local_path" ]; then
echo "SKIP (exists): \$local_path"
return 0
fi
echo "Downloading: \$remote -> \$local_path"
sshpass -p 'bl3rg3r5' scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -P 9100 "omic@nucleus.omic.ai:\$remote" "\$local_path" 2>&1
if [ -f "\$local_path" ]; then
ls -lh "\$local_path"
return 0
else
echo "FAILED: \$local_path"
return 1
fi
}
echo "=== Downloading imputed files ==="
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz"
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz"
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz"
echo "=== Downloading gnomad files ==="
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.female.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt"
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.male.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt"
echo "=== Downloading supporting-data files ==="
download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.fa" "\$BASE/supporting-data/genome/hg38.fa"
download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.dict" "\$BASE/supporting-data/genome/hg38.dict"
download "/mnt/Avatar/dd/synthea/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz" "\$BASE/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz"
download "/mnt/Avatar/dd/synthea/supporting-data/vcf/vcf_template.vcf" "\$BASE/supporting-data/vcf/vcf_template.vcf"
echo "=== Downloading phenotype file ==="
download "/mnt/Avatar/imputed/ukbb/metadata/ukbb_phenotypes.csv" "\$BASE/ukbb_phenotypes_filtered.csv"
echo "=== Final verification ==="
echo "Imputed:"
ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " NONE"
echo "Healthy:"
ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " NONE"
echo "Genome:"
ls -lh \$BASE/supporting-data/genome/* 2>/dev/null || echo " NONE"
echo "Support:"
ls -lh \$BASE/supporting-data/vcf/* \$BASE/supporting-data/ucsc-liftover/* 2>/dev/null || echo " NONE"
echo "Small:"
ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " NONE"
echo "=== STAGING COMPLETE ==="
"""
}
workflow {
STAGE_DATA()
}