nextflow.enable.dsl=2 process STAGE_DATA { container 'harbor.cluster.omic.ai/omic/digital-patients/synthea:cudf' memory '4 GB' cpus 2 script: """ #!/opt/conda/envs/synthea/bin/python3 import subprocess, os, sys BASE = "/omic/eureka/digital-patients" def run(cmd): print(f"RUN: {cmd}", flush=True) r = subprocess.run(cmd, shell=True, capture_output=True, text=True) if r.stdout: print(r.stdout, flush=True) if r.stderr: print(r.stderr, flush=True) return r.returncode print("=== Cleaning up old _parts directories ===", flush=True) run(f"find {BASE} -name '*_parts' -type d -exec rm -rf {{}} + 2>/dev/null || true") print("=== Creating directory structure ===", flush=True) for d in ["imputed", "healthy", "supporting-data/vcf", "supporting-data/ucsc-liftover", "supporting-data/genome", "output"]: os.makedirs(f"{BASE}/{d}", exist_ok=True) print("=== Current PVC state ===", flush=True) run(f"ls -la {BASE}/") print("=== Installing sshpass ===", flush=True) # Try conda first, then apt rc = run("conda install -y -c conda-forge sshpass 2>/dev/null || apt-get update -qq && apt-get install -y -qq sshpass 2>/dev/null || pip install sshpass 2>/dev/null") # Check if sshpass is available rc = run("which sshpass || echo 'sshpass not found, trying expect'") # If sshpass not available, use expect or python paramiko has_sshpass = subprocess.run("which sshpass", shell=True, capture_output=True).returncode == 0 def download(remote, local_path): if os.path.isfile(local_path): size = os.path.getsize(local_path) print(f"SKIP (exists, {size} bytes): {local_path}", flush=True) return True print(f"Downloading: {remote} -> {local_path}", flush=True) if has_sshpass: cmd = f"sshpass -p 'bl3rg3r5' scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -P 9100 'omic@nucleus.omic.ai:{remote}' '{local_path}'" else: # Fallback: use expect cmd = f'''expect -c " set timeout 3600 spawn scp -o StrictHostKeyChecking=no -P 9100 omic@nucleus.omic.ai:{remote} {local_path} expect \\"password:\\" send \\"bl3rg3r5\\r\\" expect eof "''' rc = run(cmd) if rc == 0 and os.path.isfile(local_path): size = os.path.getsize(local_path) print(f" OK: {size} bytes", flush=True) return True else: print(f" FAILED (rc={rc})", flush=True) return False files = [ ("/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz", f"{BASE}/imputed/F5_SCHIZO.gwas.imputed_v3.both_sexes.tsv.bgz"), ("/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz", f"{BASE}/imputed/F5_SCHIZO.gwas.imputed_v3.female.tsv.bgz"), ("/mnt/Avatar/imputed/ukbb/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz", f"{BASE}/imputed/F5_SCHIZO.gwas.imputed_v3.male.tsv.bgz"), ("/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.female.txt", f"{BASE}/healthy/gnomad.genomes.v4.1.sites.female.txt"), ("/mnt/Avatar/digital_patient/gnomad.genomes.v4.1.sites.male.txt", f"{BASE}/healthy/gnomad.genomes.v4.1.sites.male.txt"), ("/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.fa", f"{BASE}/supporting-data/genome/hg38.fa"), ("/mnt/Avatar/dd/synthea/supporting-data/genome/hg38.dict", f"{BASE}/supporting-data/genome/hg38.dict"), ("/mnt/Avatar/dd/synthea/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz", f"{BASE}/supporting-data/ucsc-liftover/hg19ToHg38.over.chain.gz"), ("/mnt/Avatar/dd/synthea/supporting-data/vcf/vcf_template.vcf", f"{BASE}/supporting-data/vcf/vcf_template.vcf"), ("/mnt/Avatar/imputed/ukbb/metadata/ukbb_phenotypes.csv", f"{BASE}/ukbb_phenotypes_filtered.csv"), ] print("=== Downloading files ===", flush=True) ok = 0 fail = 0 for remote, local in files: if download(remote, local): ok += 1 else: fail += 1 print(f"\\n=== Download results: {ok} ok, {fail} failed ===", flush=True) print("\\n=== Final verification ===", flush=True) run(f"echo 'Imputed:' && ls -lh {BASE}/imputed/*.bgz 2>/dev/null || echo ' NONE'") run(f"echo 'Healthy:' && ls -lh {BASE}/healthy/*.txt 2>/dev/null || echo ' NONE'") run(f"echo 'Genome:' && ls -lh {BASE}/supporting-data/genome/* 2>/dev/null || echo ' NONE'") run(f"echo 'Support:' && ls -lh {BASE}/supporting-data/vcf/* {BASE}/supporting-data/ucsc-liftover/* 2>/dev/null || echo ' NONE'") run(f"echo 'Small:' && ls -lh {BASE}/MANE* {BASE}/regulon* {BASE}/LM22* {BASE}/ukbb* 2>/dev/null || echo ' NONE'") if fail > 0: print("STAGING FAILED - some files missing", flush=True) sys.exit(1) print("=== STAGING COMPLETE ===", flush=True) """ } workflow { STAGE_DATA() }