diff --git a/main.nf b/main.nf index ee7163b..b8094f0 100644 --- a/main.nf +++ b/main.nf @@ -27,58 +27,74 @@ process POCKETMINER { script: def debug_flag = params.debug ? '--debug' : '' """ + set +e touch run.log - echo "=== Environment Debug ===" | tee -a run.log - echo "Input path: ${pdb_path}" | tee -a run.log - echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" | tee -a run.log - echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" | tee -a run.log + echo "=== Environment Debug ===" >> run.log + echo "Input path: ${pdb_path}" >> run.log + echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" >> run.log + echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" >> run.log + ls /omic/eureka/ >> run.log 2>&1 || echo "/omic/eureka not mounted" >> run.log + echo "=== End Debug ===" >> run.log - # Try multiple methods to get the PDB file PDB_FILE="" - # Method 1: Direct PVC path (if mounted) + # Method 1: Direct path (if PVC mounted or local) if [ -f "${pdb_path}" ]; then - echo "Found file at PVC path: ${pdb_path}" | tee -a run.log - cp "${pdb_path}" input.pdb - PDB_FILE="input.pdb" + echo "Found at direct path" >> run.log + cp "${pdb_path}" input.pdb && PDB_FILE="input.pdb" fi - # Method 2: Convert s3://omic/eureka/... to /omic/eureka/... (PVC mount) + # Method 2: Convert s3://omic/... to /omic/... (PVC mount path) if [ -z "\$PDB_FILE" ]; then PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')" - echo "Trying PVC path: \$PVC_PATH" | tee -a run.log if [ -f "\$PVC_PATH" ]; then - echo "Found file at PVC mount: \$PVC_PATH" | tee -a run.log - cp "\$PVC_PATH" input.pdb - PDB_FILE="input.pdb" + echo "Found at PVC path: \$PVC_PATH" >> run.log + cp "\$PVC_PATH" input.pdb && PDB_FILE="input.pdb" + else + echo "Not found at PVC path: \$PVC_PATH" >> run.log fi fi - # Method 3: Download from MinIO via S3 API + # Method 3: Download from MinIO via curl (no auth needed for public) if [ -z "\$PDB_FILE" ]; then - echo "PVC not available, downloading from MinIO S3..." | tee -a run.log - pip install awscli 2>/dev/null || conda install -n base -c conda-forge awscli -y 2>/dev/null || true - - if command -v aws &> /dev/null; then - aws --endpoint-url \${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000} \\ - s3 cp "${pdb_path}" input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb" - fi - fi - - # Method 4: Download via curl from MinIO - if [ -z "\$PDB_FILE" ]; then - echo "Trying curl download from MinIO..." | tee -a run.log S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')" - curl -sf "http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH" -o input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb" + MINIO_URL="http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH" + echo "Downloading from MinIO: \$MINIO_URL" >> run.log + curl -sf "\$MINIO_URL" -o input.pdb 2>> run.log + if [ -f input.pdb ] && [ -s input.pdb ]; then + echo "Downloaded from MinIO (\$(wc -c < input.pdb) bytes)" >> run.log + PDB_FILE="input.pdb" + else + echo "MinIO download failed or empty" >> run.log + rm -f input.pdb + fi fi + # Method 4: Download from MinIO using AWS env vars + if [ -z "\$PDB_FILE" ] && [ -n "\${AWS_ACCESS_KEY_ID:-}" ]; then + S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')" + ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}" + echo "Downloading with AWS creds from: \$ENDPOINT/\$S3_PATH" >> run.log + curl -sf -u "\${AWS_ACCESS_KEY_ID}:\${AWS_SECRET_ACCESS_KEY}" "\$ENDPOINT/\$S3_PATH" -o input.pdb 2>> run.log + if [ -f input.pdb ] && [ -s input.pdb ]; then + echo "Downloaded with AWS creds (\$(wc -c < input.pdb) bytes)" >> run.log + PDB_FILE="input.pdb" + else + echo "AWS cred download failed" >> run.log + rm -f input.pdb + fi + fi + + set -e + if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then - echo "ERROR: Could not retrieve PDB file from any source" | tee -a run.log + echo "ERROR: Could not retrieve PDB file from any source" >> run.log + cat run.log exit 1 fi - echo "PDB file ready (\$(wc -c < input.pdb) bytes)" | tee -a run.log + echo "PDB file ready (\$(wc -c < input.pdb) bytes)" >> run.log PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" python /workspace/entrypoint.py \\ @@ -88,7 +104,7 @@ process POCKETMINER { --model-path ${params.model_path} \\ ${debug_flag} 2>&1 | tee -a run.log - echo "Pipeline completed successfully" | tee -a run.log + echo "Pipeline completed successfully" >> run.log """ }