#!/usr/bin/env nextflow nextflow.enable.dsl=2 // Pipeline parameters params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb' params.outdir = 's3://omic/eureka/Pocketminer/output' params.model_path = '/workspace/gvp/models/pocketminer' params.debug = false process POCKETMINER { container 'harbor.cluster.omic.ai/omic/pocketminer:latest' publishDir params.outdir, mode: 'copy' input: val pdb_path output: path "*-preds.npy", emit: predictions_npy path "*-predictions.txt", emit: predictions_txt path "*-summary.json", emit: summary path "*_X.npy", optional: true, emit: features_debug path "*_S.npy", optional: true, emit: sequence_debug path "*_mask.npy", optional: true, emit: mask_debug path "run.log", emit: log script: def debug_flag = params.debug ? '--debug' : '' """ touch run.log echo "=== Environment Debug ===" | tee -a run.log echo "Input path: ${pdb_path}" | tee -a run.log echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" | tee -a run.log echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" | tee -a run.log # Try multiple methods to get the PDB file PDB_FILE="" # Method 1: Direct PVC path (if mounted) if [ -f "${pdb_path}" ]; then echo "Found file at PVC path: ${pdb_path}" | tee -a run.log cp "${pdb_path}" input.pdb PDB_FILE="input.pdb" fi # Method 2: Convert s3://omic/eureka/... to /omic/eureka/... (PVC mount) if [ -z "\$PDB_FILE" ]; then PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')" echo "Trying PVC path: \$PVC_PATH" | tee -a run.log if [ -f "\$PVC_PATH" ]; then echo "Found file at PVC mount: \$PVC_PATH" | tee -a run.log cp "\$PVC_PATH" input.pdb PDB_FILE="input.pdb" fi fi # Method 3: Download from MinIO via S3 API if [ -z "\$PDB_FILE" ]; then echo "PVC not available, downloading from MinIO S3..." | tee -a run.log pip install awscli 2>/dev/null || conda install -n base -c conda-forge awscli -y 2>/dev/null || true if command -v aws &> /dev/null; then aws --endpoint-url \${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000} \\ s3 cp "${pdb_path}" input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb" fi fi # Method 4: Download via curl from MinIO if [ -z "\$PDB_FILE" ]; then echo "Trying curl download from MinIO..." | tee -a run.log S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')" curl -sf "http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH" -o input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb" fi if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then echo "ERROR: Could not retrieve PDB file from any source" | tee -a run.log exit 1 fi echo "PDB file ready (\$(wc -c < input.pdb) bytes)" | tee -a run.log PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" python /workspace/entrypoint.py \\ --pdb input.pdb \\ --output-folder . \\ --output-name "\$PDB_BASENAME" \\ --model-path ${params.model_path} \\ ${debug_flag} 2>&1 | tee -a run.log echo "Pipeline completed successfully" | tee -a run.log """ } workflow { POCKETMINER(params.pdb) }