Fix input resolution: disable bash strict mode, quiet logging, add auth fallback

This commit is contained in:
2026-03-23 15:50:52 +01:00
parent 35dd4ab976
commit e74a9441a4

78
main.nf
View File

@@ -27,58 +27,74 @@ process POCKETMINER {
script: script:
def debug_flag = params.debug ? '--debug' : '' def debug_flag = params.debug ? '--debug' : ''
""" """
set +e
touch run.log touch run.log
echo "=== Environment Debug ===" | tee -a run.log echo "=== Environment Debug ===" >> run.log
echo "Input path: ${pdb_path}" | tee -a run.log echo "Input path: ${pdb_path}" >> run.log
echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" | tee -a run.log echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" >> run.log
echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" | tee -a run.log echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" >> run.log
ls /omic/eureka/ >> run.log 2>&1 || echo "/omic/eureka not mounted" >> run.log
echo "=== End Debug ===" >> run.log
# Try multiple methods to get the PDB file
PDB_FILE="" PDB_FILE=""
# Method 1: Direct PVC path (if mounted) # Method 1: Direct path (if PVC mounted or local)
if [ -f "${pdb_path}" ]; then if [ -f "${pdb_path}" ]; then
echo "Found file at PVC path: ${pdb_path}" | tee -a run.log echo "Found at direct path" >> run.log
cp "${pdb_path}" input.pdb cp "${pdb_path}" input.pdb && PDB_FILE="input.pdb"
PDB_FILE="input.pdb"
fi fi
# Method 2: Convert s3://omic/eureka/... to /omic/eureka/... (PVC mount) # Method 2: Convert s3://omic/... to /omic/... (PVC mount path)
if [ -z "\$PDB_FILE" ]; then if [ -z "\$PDB_FILE" ]; then
PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')" PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')"
echo "Trying PVC path: \$PVC_PATH" | tee -a run.log
if [ -f "\$PVC_PATH" ]; then if [ -f "\$PVC_PATH" ]; then
echo "Found file at PVC mount: \$PVC_PATH" | tee -a run.log echo "Found at PVC path: \$PVC_PATH" >> run.log
cp "\$PVC_PATH" input.pdb cp "\$PVC_PATH" input.pdb && PDB_FILE="input.pdb"
PDB_FILE="input.pdb" else
echo "Not found at PVC path: \$PVC_PATH" >> run.log
fi fi
fi fi
# Method 3: Download from MinIO via S3 API # Method 3: Download from MinIO via curl (no auth needed for public)
if [ -z "\$PDB_FILE" ]; then if [ -z "\$PDB_FILE" ]; then
echo "PVC not available, downloading from MinIO S3..." | tee -a run.log
pip install awscli 2>/dev/null || conda install -n base -c conda-forge awscli -y 2>/dev/null || true
if command -v aws &> /dev/null; then
aws --endpoint-url \${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000} \\
s3 cp "${pdb_path}" input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb"
fi
fi
# Method 4: Download via curl from MinIO
if [ -z "\$PDB_FILE" ]; then
echo "Trying curl download from MinIO..." | tee -a run.log
S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')" S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')"
curl -sf "http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH" -o input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb" MINIO_URL="http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH"
echo "Downloading from MinIO: \$MINIO_URL" >> run.log
curl -sf "\$MINIO_URL" -o input.pdb 2>> run.log
if [ -f input.pdb ] && [ -s input.pdb ]; then
echo "Downloaded from MinIO (\$(wc -c < input.pdb) bytes)" >> run.log
PDB_FILE="input.pdb"
else
echo "MinIO download failed or empty" >> run.log
rm -f input.pdb
fi
fi fi
# Method 4: Download from MinIO using AWS env vars
if [ -z "\$PDB_FILE" ] && [ -n "\${AWS_ACCESS_KEY_ID:-}" ]; then
S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')"
ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}"
echo "Downloading with AWS creds from: \$ENDPOINT/\$S3_PATH" >> run.log
curl -sf -u "\${AWS_ACCESS_KEY_ID}:\${AWS_SECRET_ACCESS_KEY}" "\$ENDPOINT/\$S3_PATH" -o input.pdb 2>> run.log
if [ -f input.pdb ] && [ -s input.pdb ]; then
echo "Downloaded with AWS creds (\$(wc -c < input.pdb) bytes)" >> run.log
PDB_FILE="input.pdb"
else
echo "AWS cred download failed" >> run.log
rm -f input.pdb
fi
fi
set -e
if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then
echo "ERROR: Could not retrieve PDB file from any source" | tee -a run.log echo "ERROR: Could not retrieve PDB file from any source" >> run.log
cat run.log
exit 1 exit 1
fi fi
echo "PDB file ready (\$(wc -c < input.pdb) bytes)" | tee -a run.log echo "PDB file ready (\$(wc -c < input.pdb) bytes)" >> run.log
PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" PDB_BASENAME="\$(basename '${pdb_path}' .pdb)"
python /workspace/entrypoint.py \\ python /workspace/entrypoint.py \\
@@ -88,7 +104,7 @@ process POCKETMINER {
--model-path ${params.model_path} \\ --model-path ${params.model_path} \\
${debug_flag} 2>&1 | tee -a run.log ${debug_flag} 2>&1 | tee -a run.log
echo "Pipeline completed successfully" | tee -a run.log echo "Pipeline completed successfully" >> run.log
""" """
} }