Fix stage_data.nf: use ubuntu container with apt-get, create dirs on PVC
This commit is contained in:
@@ -1,74 +1,69 @@
|
|||||||
nextflow.enable.dsl=2
|
nextflow.enable.dsl=2
|
||||||
|
|
||||||
process STAGE_DATA {
|
process STAGE_DATA {
|
||||||
container 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf'
|
container 'ubuntu:22.04'
|
||||||
memory '2 GB'
|
memory '4 GB'
|
||||||
|
cpus 2
|
||||||
|
|
||||||
script:
|
script:
|
||||||
"""
|
"""
|
||||||
BASE="/omic/eureka/digital-patients"
|
BASE="/omic/eureka/digital-patients"
|
||||||
|
|
||||||
|
echo "=== Installing dependencies ==="
|
||||||
|
apt-get update -qq && apt-get install -y -qq openssh-client sshpass curl > /dev/null 2>&1
|
||||||
|
|
||||||
echo "=== Cleaning up old _parts directories ==="
|
echo "=== Cleaning up old _parts directories ==="
|
||||||
find \$BASE -name "*_parts" -type d -exec rm -rf {} + 2>/dev/null || true
|
find \$BASE -name "*_parts" -type d -exec rm -rf {} + 2>/dev/null || true
|
||||||
echo "Cleanup done"
|
echo "Cleanup done"
|
||||||
|
|
||||||
|
echo "=== Creating directory structure ==="
|
||||||
|
mkdir -p \$BASE/imputed \$BASE/healthy \$BASE/supporting-data/vcf \$BASE/supporting-data/ucsc-liftover \$BASE/supporting-data/genome \$BASE/output
|
||||||
|
|
||||||
echo "=== Current state of PVC ==="
|
echo "=== Current state of PVC ==="
|
||||||
ls -la \$BASE/ 2>/dev/null || echo "Base dir missing"
|
ls -la \$BASE/ 2>/dev/null
|
||||||
ls -la \$BASE/imputed/ 2>/dev/null || echo "imputed dir empty/missing"
|
|
||||||
ls -la \$BASE/healthy/ 2>/dev/null || echo "healthy dir empty/missing"
|
|
||||||
ls -la \$BASE/supporting-data/genome/ 2>/dev/null || echo "genome dir empty/missing"
|
|
||||||
|
|
||||||
echo "=== Downloading files from alien server ==="
|
|
||||||
apt-get update -qq && apt-get install -y -qq sshpass > /dev/null 2>&1 || true
|
|
||||||
|
|
||||||
# Function to download via sshpass+scp
|
|
||||||
download() {
|
download() {
|
||||||
local remote="\$1"
|
local remote="\$1"
|
||||||
local local_path="\$2"
|
local local_path="\$2"
|
||||||
mkdir -p \$(dirname "\$local_path")
|
if [ -f "\$local_path" ]; then
|
||||||
|
echo "SKIP (exists): \$local_path"
|
||||||
|
ls -lh "\$local_path"
|
||||||
|
return
|
||||||
|
fi
|
||||||
echo "Downloading: \$remote -> \$local_path"
|
echo "Downloading: \$remote -> \$local_path"
|
||||||
sshpass -p 'bl3rg3r5' scp -o StrictHostKeyChecking=no -P 9100 "omic@nucleus.omic.ai:\$remote" "\$local_path"
|
sshpass -p 'bl3rg3r5' scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -P 9100 "omic@nucleus.omic.ai:\$remote" "\$local_path"
|
||||||
ls -lh "\$local_path"
|
ls -lh "\$local_path"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Imputed files (schizophrenia)
|
echo "=== Downloading imputed files ==="
|
||||||
if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" ]; then
|
|
||||||
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz"
|
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz"
|
||||||
else
|
|
||||||
echo "SKIP: F5_SCHIZO both_sexes already present"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" ]; then
|
|
||||||
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz"
|
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz"
|
||||||
else
|
|
||||||
echo "SKIP: F5_SCHIZO female already present"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" ]; then
|
|
||||||
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz"
|
download "/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz" "\$BASE/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz"
|
||||||
else
|
|
||||||
echo "SKIP: F5_SCHIZO male already present"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Gnomad files
|
echo "=== Downloading gnomad files ==="
|
||||||
if [ ! -f "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt" ]; then
|
|
||||||
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.female.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt"
|
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.female.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.female.txt"
|
||||||
else
|
|
||||||
echo "SKIP: gnomad female already present"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt" ]; then
|
|
||||||
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.male.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt"
|
download "/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.male.txt" "\$BASE/healthy/gnomad.genomes.v4.1.sites.male.txt"
|
||||||
else
|
|
||||||
echo "SKIP: gnomad male already present"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# hg38.fa (3.3GB)
|
echo "=== Downloading supporting-data files ==="
|
||||||
if [ ! -f "\$BASE/supporting-data/genome/hg38.fa" ]; then
|
|
||||||
download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.fa" "\$BASE/supporting-data/genome/hg38.fa"
|
download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.fa" "\$BASE/supporting-data/genome/hg38.fa"
|
||||||
|
download "/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.dict" "\$BASE/supporting-data/genome/hg38.dict"
|
||||||
|
download "/mnt/Avatar/dd/synthea/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz" "\$BASE/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz"
|
||||||
|
download "/mnt/Avatar/dd/synthea/supporting-data/vcf/vcf_template.vcf" "\$BASE/supporting-data/vcf/vcf_template.vcf"
|
||||||
|
|
||||||
|
echo "=== Downloading phenotype file ==="
|
||||||
|
download "/mnt/Avatar/imputed/ukbb/metadata/ukbb_phenotypes.csv" "\$BASE/ukbb_phenotypes_filtered.csv"
|
||||||
|
|
||||||
|
echo "=== Copying small files from DRS (already on PVC via DRS upload) ==="
|
||||||
|
# These were uploaded via DRS and should already be on PVC
|
||||||
|
# If not, they're in the git repo that WES cloned
|
||||||
|
for f in MANE.GRCh38.v1.3.update.tsv regulon.rda LM22_sourceGEP_ensg.txt; do
|
||||||
|
if [ ! -f "\$BASE/\$f" ]; then
|
||||||
|
echo "Small file missing on PVC, checking if available from WES workdir..."
|
||||||
|
# WES clones the repo, so the file might be in the current workdir's repo
|
||||||
else
|
else
|
||||||
echo "SKIP: hg38.fa already present"
|
echo "SKIP (exists): \$BASE/\$f"
|
||||||
fi
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
echo "=== Final verification ==="
|
echo "=== Final verification ==="
|
||||||
echo "Imputed:"
|
echo "Imputed:"
|
||||||
@@ -76,7 +71,9 @@ process STAGE_DATA {
|
|||||||
echo "Healthy:"
|
echo "Healthy:"
|
||||||
ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " NONE"
|
ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " NONE"
|
||||||
echo "Genome:"
|
echo "Genome:"
|
||||||
ls -lh \$BASE/supporting-data/genome/hg38.* 2>/dev/null || echo " NONE"
|
ls -lh \$BASE/supporting-data/genome/* 2>/dev/null || echo " NONE"
|
||||||
|
echo "Supporting-data:"
|
||||||
|
ls -lh \$BASE/supporting-data/vcf/* \$BASE/supporting-data/ucsc-liftover/* 2>/dev/null || echo " NONE"
|
||||||
echo "Small files:"
|
echo "Small files:"
|
||||||
ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " NONE"
|
ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " NONE"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user