Fix reassemble.nf: use find to discover _parts dirs dynamically
This commit is contained in:
@@ -1,62 +1,49 @@
|
||||
#!/usr/bin/env nextflow
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// One-time script to reassemble chunked files on the PVC
|
||||
// Run once, then delete. Not part of the main pipeline.
|
||||
|
||||
params.base_dir = '/omic/eureka/digital-patients'
|
||||
params.outdir = '/omic/eureka/digital-patients'
|
||||
|
||||
process REASSEMBLE {
|
||||
container 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest'
|
||||
memory '8 GB'
|
||||
|
||||
script:
|
||||
"""
|
||||
echo "=== Reassembling chunked files ==="
|
||||
BASE="${params.base_dir}"
|
||||
|
||||
# hg38.fa (35 parts)
|
||||
if [ -d "${params.base_dir}/supporting-data/genome/hg38.fa_parts" ]; then
|
||||
echo "Reassembling hg38.fa..."
|
||||
cat ${params.base_dir}/supporting-data/genome/hg38.fa_parts/part_* > ${params.base_dir}/supporting-data/genome/hg38.fa
|
||||
echo " Done: \$(du -h ${params.base_dir}/supporting-data/genome/hg38.fa | cut -f1)"
|
||||
fi
|
||||
echo "=== Listing _parts directories ==="
|
||||
find \$BASE -name "*_parts" -type d 2>/dev/null
|
||||
|
||||
# gnomad male (12 parts)
|
||||
if [ -d "${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt_parts" ]; then
|
||||
echo "Reassembling gnomad male..."
|
||||
cat ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt_parts/part_* > ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt
|
||||
echo " Done: \$(du -h ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt | cut -f1)"
|
||||
fi
|
||||
echo "=== Reassembling all chunked files ==="
|
||||
for parts_dir in \$(find \$BASE -name "*_parts" -type d 2>/dev/null); do
|
||||
# Derive the output filename by stripping _parts suffix
|
||||
out_file=\$(echo "\$parts_dir" | sed 's/_parts\$//')
|
||||
echo "Reassembling: \$parts_dir -> \$out_file"
|
||||
|
||||
# gnomad female (12 parts)
|
||||
if [ -d "${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt_parts" ]; then
|
||||
echo "Reassembling gnomad female..."
|
||||
cat ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt_parts/part_* > ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt
|
||||
echo " Done: \$(du -h ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt | cut -f1)"
|
||||
fi
|
||||
# Count parts
|
||||
n_parts=\$(ls \$parts_dir/part_* 2>/dev/null | wc -l)
|
||||
echo " Found \$n_parts parts"
|
||||
|
||||
# F5_SCHIZO male (7 parts)
|
||||
if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz_parts" ]; then
|
||||
echo "Reassembling F5_SCHIZO male..."
|
||||
cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz
|
||||
echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz | cut -f1)"
|
||||
fi
|
||||
if [ "\$n_parts" -gt 0 ]; then
|
||||
cat \$parts_dir/part_* > "\$out_file"
|
||||
out_size=\$(du -h "\$out_file" | cut -f1)
|
||||
echo " Created: \$out_file (\$out_size)"
|
||||
else
|
||||
echo " WARNING: No parts found in \$parts_dir"
|
||||
fi
|
||||
done
|
||||
|
||||
# F5_SCHIZO female (7 parts)
|
||||
if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz_parts" ]; then
|
||||
echo "Reassembling F5_SCHIZO female..."
|
||||
cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz
|
||||
echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz | cut -f1)"
|
||||
fi
|
||||
echo "=== Verification ==="
|
||||
echo "Imputed files:"
|
||||
ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " None"
|
||||
echo "Healthy files:"
|
||||
ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " None"
|
||||
echo "Genome files:"
|
||||
ls -lh \$BASE/supporting-data/genome/hg38.* 2>/dev/null || echo " None"
|
||||
echo "Small files:"
|
||||
ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " None"
|
||||
|
||||
# F5_SCHIZO both_sexes (7 parts)
|
||||
if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz_parts" ]; then
|
||||
echo "Reassembling F5_SCHIZO both_sexes..."
|
||||
cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz
|
||||
echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz | cut -f1)"
|
||||
fi
|
||||
|
||||
echo "=== All reassembly complete ==="
|
||||
echo "=== REASSEMBLY COMPLETE ==="
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user