From 45b634acf08becb2d3dd3611c0d3e0a4e1a2c2f0 Mon Sep 17 00:00:00 2001 From: Olamide Isreal Date: Thu, 26 Mar 2026 19:33:44 +0100 Subject: [PATCH] Fix reassemble.nf: use find to discover _parts dirs dynamically --- reassemble.nf | 73 +++++++++++++++++++++------------------------------ 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/reassemble.nf b/reassemble.nf index 86fab43..08559ab 100644 --- a/reassemble.nf +++ b/reassemble.nf @@ -1,62 +1,49 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 -// One-time script to reassemble chunked files on the PVC -// Run once, then delete. Not part of the main pipeline. - params.base_dir = '/omic/eureka/digital-patients' -params.outdir = '/omic/eureka/digital-patients' process REASSEMBLE { container 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest' + memory '8 GB' script: """ - echo "=== Reassembling chunked files ===" + BASE="${params.base_dir}" - # hg38.fa (35 parts) - if [ -d "${params.base_dir}/supporting-data/genome/hg38.fa_parts" ]; then - echo "Reassembling hg38.fa..." - cat ${params.base_dir}/supporting-data/genome/hg38.fa_parts/part_* > ${params.base_dir}/supporting-data/genome/hg38.fa - echo " Done: \$(du -h ${params.base_dir}/supporting-data/genome/hg38.fa | cut -f1)" - fi + echo "=== Listing _parts directories ===" + find \$BASE -name "*_parts" -type d 2>/dev/null - # gnomad male (12 parts) - if [ -d "${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt_parts" ]; then - echo "Reassembling gnomad male..." - cat ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt_parts/part_* > ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt - echo " Done: \$(du -h ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.male.txt | cut -f1)" - fi + echo "=== Reassembling all chunked files ===" + for parts_dir in \$(find \$BASE -name "*_parts" -type d 2>/dev/null); do + # Derive the output filename by stripping _parts suffix + out_file=\$(echo "\$parts_dir" | sed 's/_parts\$//') + echo "Reassembling: \$parts_dir -> \$out_file" - # gnomad female (12 parts) - if [ -d "${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt_parts" ]; then - echo "Reassembling gnomad female..." - cat ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt_parts/part_* > ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt - echo " Done: \$(du -h ${params.base_dir}/healthy/gnomad.genomes.v4.1.sites.female.txt | cut -f1)" - fi + # Count parts + n_parts=\$(ls \$parts_dir/part_* 2>/dev/null | wc -l) + echo " Found \$n_parts parts" - # F5_SCHIZO male (7 parts) - if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz_parts" ]; then - echo "Reassembling F5_SCHIZO male..." - cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz - echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz | cut -f1)" - fi + if [ "\$n_parts" -gt 0 ]; then + cat \$parts_dir/part_* > "\$out_file" + out_size=\$(du -h "\$out_file" | cut -f1) + echo " Created: \$out_file (\$out_size)" + else + echo " WARNING: No parts found in \$parts_dir" + fi + done - # F5_SCHIZO female (7 parts) - if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz_parts" ]; then - echo "Reassembling F5_SCHIZO female..." - cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz - echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz | cut -f1)" - fi + echo "=== Verification ===" + echo "Imputed files:" + ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " None" + echo "Healthy files:" + ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " None" + echo "Genome files:" + ls -lh \$BASE/supporting-data/genome/hg38.* 2>/dev/null || echo " None" + echo "Small files:" + ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " None" - # F5_SCHIZO both_sexes (7 parts) - if [ -d "${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz_parts" ]; then - echo "Reassembling F5_SCHIZO both_sexes..." - cat ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz_parts/part_* > ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz - echo " Done: \$(du -h ${params.base_dir}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz | cut -f1)" - fi - - echo "=== All reassembly complete ===" + echo "=== REASSEMBLY COMPLETE ===" """ }