53 lines
1.6 KiB
Plaintext
53 lines
1.6 KiB
Plaintext
#!/usr/bin/env nextflow
|
|
nextflow.enable.dsl=2
|
|
|
|
params.base_dir = '/omic/eureka/digital-patients'
|
|
|
|
process REASSEMBLE {
|
|
container 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf'
|
|
memory '8 GB'
|
|
|
|
script:
|
|
"""
|
|
BASE="${params.base_dir}"
|
|
|
|
echo "=== Listing _parts directories ==="
|
|
find \$BASE -name "*_parts" -type d 2>/dev/null
|
|
|
|
echo "=== Reassembling all chunked files ==="
|
|
for parts_dir in \$(find \$BASE -name "*_parts" -type d 2>/dev/null); do
|
|
# Derive the output filename by stripping _parts suffix
|
|
out_file=\$(echo "\$parts_dir" | sed 's/_parts\$//')
|
|
echo "Reassembling: \$parts_dir -> \$out_file"
|
|
|
|
# Count parts
|
|
n_parts=\$(ls \$parts_dir/part_* 2>/dev/null | wc -l)
|
|
echo " Found \$n_parts parts"
|
|
|
|
if [ "\$n_parts" -gt 0 ]; then
|
|
cat \$parts_dir/part_* > "\$out_file"
|
|
out_size=\$(du -h "\$out_file" | cut -f1)
|
|
echo " Created: \$out_file (\$out_size)"
|
|
else
|
|
echo " WARNING: No parts found in \$parts_dir"
|
|
fi
|
|
done
|
|
|
|
echo "=== Verification ==="
|
|
echo "Imputed files:"
|
|
ls -lh \$BASE/imputed/*.bgz 2>/dev/null || echo " None"
|
|
echo "Healthy files:"
|
|
ls -lh \$BASE/healthy/*.txt 2>/dev/null || echo " None"
|
|
echo "Genome files:"
|
|
ls -lh \$BASE/supporting-data/genome/hg38.* 2>/dev/null || echo " None"
|
|
echo "Small files:"
|
|
ls -lh \$BASE/MANE* \$BASE/regulon* \$BASE/LM22* \$BASE/ukbb* 2>/dev/null || echo " None"
|
|
|
|
echo "=== REASSEMBLY COMPLETE ==="
|
|
"""
|
|
}
|
|
|
|
workflow {
|
|
REASSEMBLE()
|
|
}
|