#!/usr/bin/env nextflow nextflow.enable.dsl=2 params.base_dir = '/omic/eureka/digital-patients' process REASSEMBLE { container 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest' memory '8 GB' script: """ BASE="${params.base_dir}" echo "=== Listing _parts directories ===" find \$BASE -name "*_parts" -type d 2>/dev/null echo "=== Reassembling all chunked files ===" for parts_dir in \$(find \$BASE -name "*_parts" -type d 2>/dev/null); do # Derive the output filename by stripping _parts suffix out_file=\$(echo "\$parts_dir" | sed 's/_parts\$//') echo "Reassembling: \$parts_dir -> \$out_file" # Count parts n_parts=\$(ls \$parts_dir/part_* 2>/dev/null | wc -l) echo " Found \$n_parts parts" if [ "\$n_parts" -gt 0 ]; then cat \$parts_dir/part_* > "\$out_file" out_size=\$(du -h "\$out_file" | cut -f1) echo " Created: \$out_file (\$out_size)" else echo " WARNING: No parts found in \$parts_dir" fi done echo "=== Verification ===" echo "Imputed files:" ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " None" echo "Healthy files:" ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " None" echo "Genome files:" ls -lh \$BASE/supporting-data/genome/hg38.* 2>/dev/null || echo " None" echo "Small files:" ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " None" echo "=== REASSEMBLY COMPLETE ===" """ } workflow { REASSEMBLE() }