Add multi-method input file resolution for WES

PVC mount is not available in k8s pods. Try multiple fallback
methods to retrieve input PDB: PVC path, s3-to-PVC conversion,
AWS CLI from MinIO, and curl from MinIO. Also add nf-amazon
plugin and s3:// default paths.
This commit is contained in:
2026-03-23 15:41:13 +01:00
parent 14091fce87
commit 35dd4ab976
2 changed files with 63 additions and 15 deletions

62
main.nf
View File

@@ -3,8 +3,8 @@
nextflow.enable.dsl=2 nextflow.enable.dsl=2
// Pipeline parameters // Pipeline parameters
params.pdb = '/omic/eureka/Pocketminer/1HSG.pdb' params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb'
params.outdir = '/omic/eureka/Pocketminer/output' params.outdir = 's3://omic/eureka/Pocketminer/output'
params.model_path = '/workspace/gvp/models/pocketminer' params.model_path = '/workspace/gvp/models/pocketminer'
params.debug = false params.debug = false
@@ -25,30 +25,66 @@ process POCKETMINER {
path "run.log", emit: log path "run.log", emit: log
script: script:
def pdb_basename = file(pdb_path).baseName
def debug_flag = params.debug ? '--debug' : '' def debug_flag = params.debug ? '--debug' : ''
""" """
touch run.log touch run.log
echo "=== Debugging PVC mount ===" | tee -a run.log echo "=== Environment Debug ===" | tee -a run.log
echo "Input path: ${pdb_path}" | tee -a run.log echo "Input path: ${pdb_path}" | tee -a run.log
ls -la /omic/eureka/ 2>&1 | head -20 | tee -a run.log echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" | tee -a run.log
ls -la /omic/eureka/Pocketminer/ 2>&1 | tee -a run.log || true echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" | tee -a run.log
echo "=== End Debug ===" | tee -a run.log
if [ ! -f "${pdb_path}" ]; then # Try multiple methods to get the PDB file
echo "ERROR: PDB file not found at ${pdb_path}" | tee -a run.log PDB_FILE=""
echo "Available files in /omic/eureka/Pocketminer/:" | tee -a run.log
ls /omic/eureka/Pocketminer/ 2>&1 | tee -a run.log || true # Method 1: Direct PVC path (if mounted)
if [ -f "${pdb_path}" ]; then
echo "Found file at PVC path: ${pdb_path}" | tee -a run.log
cp "${pdb_path}" input.pdb
PDB_FILE="input.pdb"
fi
# Method 2: Convert s3://omic/eureka/... to /omic/eureka/... (PVC mount)
if [ -z "\$PDB_FILE" ]; then
PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')"
echo "Trying PVC path: \$PVC_PATH" | tee -a run.log
if [ -f "\$PVC_PATH" ]; then
echo "Found file at PVC mount: \$PVC_PATH" | tee -a run.log
cp "\$PVC_PATH" input.pdb
PDB_FILE="input.pdb"
fi
fi
# Method 3: Download from MinIO via S3 API
if [ -z "\$PDB_FILE" ]; then
echo "PVC not available, downloading from MinIO S3..." | tee -a run.log
pip install awscli 2>/dev/null || conda install -n base -c conda-forge awscli -y 2>/dev/null || true
if command -v aws &> /dev/null; then
aws --endpoint-url \${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000} \\
s3 cp "${pdb_path}" input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb"
fi
fi
# Method 4: Download via curl from MinIO
if [ -z "\$PDB_FILE" ]; then
echo "Trying curl download from MinIO..." | tee -a run.log
S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')"
curl -sf "http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH" -o input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb"
fi
if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then
echo "ERROR: Could not retrieve PDB file from any source" | tee -a run.log
exit 1 exit 1
fi fi
cp "${pdb_path}" input.pdb echo "PDB file ready (\$(wc -c < input.pdb) bytes)" | tee -a run.log
PDB_BASENAME="\$(basename '${pdb_path}' .pdb)"
python /workspace/entrypoint.py \\ python /workspace/entrypoint.py \\
--pdb input.pdb \\ --pdb input.pdb \\
--output-folder . \\ --output-folder . \\
--output-name ${pdb_basename} \\ --output-name "\$PDB_BASENAME" \\
--model-path ${params.model_path} \\ --model-path ${params.model_path} \\
${debug_flag} 2>&1 | tee -a run.log ${debug_flag} 2>&1 | tee -a run.log

View File

@@ -8,10 +8,22 @@ manifest {
version = '1.0.0' version = '1.0.0'
} }
// S3/MinIO plugin for direct S3 access (bypasses PVC mount issues)
plugins {
id 'nf-amazon'
}
aws {
client {
endpoint = 'http://datalake-hl.datalake.svc.cluster.local:9000'
s3PathStyleAccess = true
}
}
// Global default parameters // Global default parameters
params { params {
pdb = "/omic/eureka/Pocketminer/1HSG.pdb" pdb = "s3://omic/eureka/Pocketminer/1HSG.pdb"
outdir = "/omic/eureka/Pocketminer/output" outdir = "s3://omic/eureka/Pocketminer/output"
debug = false debug = false
} }