PVC mount is not available in k8s pods. Try multiple fallback methods to retrieve input PDB: PVC path, s3-to-PVC conversion, AWS CLI from MinIO, and curl from MinIO. Also add nf-amazon plugin and s3:// default paths.
98 lines
3.5 KiB
Plaintext
98 lines
3.5 KiB
Plaintext
#!/usr/bin/env nextflow
|
|
|
|
nextflow.enable.dsl=2
|
|
|
|
// Pipeline parameters
|
|
params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb'
|
|
params.outdir = 's3://omic/eureka/Pocketminer/output'
|
|
params.model_path = '/workspace/gvp/models/pocketminer'
|
|
params.debug = false
|
|
|
|
process POCKETMINER {
|
|
container 'harbor.cluster.omic.ai/omic/pocketminer:latest'
|
|
publishDir params.outdir, mode: 'copy'
|
|
|
|
input:
|
|
val pdb_path
|
|
|
|
output:
|
|
path "*-preds.npy", emit: predictions_npy
|
|
path "*-predictions.txt", emit: predictions_txt
|
|
path "*-summary.json", emit: summary
|
|
path "*_X.npy", optional: true, emit: features_debug
|
|
path "*_S.npy", optional: true, emit: sequence_debug
|
|
path "*_mask.npy", optional: true, emit: mask_debug
|
|
path "run.log", emit: log
|
|
|
|
script:
|
|
def debug_flag = params.debug ? '--debug' : ''
|
|
"""
|
|
touch run.log
|
|
|
|
echo "=== Environment Debug ===" | tee -a run.log
|
|
echo "Input path: ${pdb_path}" | tee -a run.log
|
|
echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" | tee -a run.log
|
|
echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" | tee -a run.log
|
|
|
|
# Try multiple methods to get the PDB file
|
|
PDB_FILE=""
|
|
|
|
# Method 1: Direct PVC path (if mounted)
|
|
if [ -f "${pdb_path}" ]; then
|
|
echo "Found file at PVC path: ${pdb_path}" | tee -a run.log
|
|
cp "${pdb_path}" input.pdb
|
|
PDB_FILE="input.pdb"
|
|
fi
|
|
|
|
# Method 2: Convert s3://omic/eureka/... to /omic/eureka/... (PVC mount)
|
|
if [ -z "\$PDB_FILE" ]; then
|
|
PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')"
|
|
echo "Trying PVC path: \$PVC_PATH" | tee -a run.log
|
|
if [ -f "\$PVC_PATH" ]; then
|
|
echo "Found file at PVC mount: \$PVC_PATH" | tee -a run.log
|
|
cp "\$PVC_PATH" input.pdb
|
|
PDB_FILE="input.pdb"
|
|
fi
|
|
fi
|
|
|
|
# Method 3: Download from MinIO via S3 API
|
|
if [ -z "\$PDB_FILE" ]; then
|
|
echo "PVC not available, downloading from MinIO S3..." | tee -a run.log
|
|
pip install awscli 2>/dev/null || conda install -n base -c conda-forge awscli -y 2>/dev/null || true
|
|
|
|
if command -v aws &> /dev/null; then
|
|
aws --endpoint-url \${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000} \\
|
|
s3 cp "${pdb_path}" input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb"
|
|
fi
|
|
fi
|
|
|
|
# Method 4: Download via curl from MinIO
|
|
if [ -z "\$PDB_FILE" ]; then
|
|
echo "Trying curl download from MinIO..." | tee -a run.log
|
|
S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')"
|
|
curl -sf "http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH" -o input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb"
|
|
fi
|
|
|
|
if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then
|
|
echo "ERROR: Could not retrieve PDB file from any source" | tee -a run.log
|
|
exit 1
|
|
fi
|
|
|
|
echo "PDB file ready (\$(wc -c < input.pdb) bytes)" | tee -a run.log
|
|
PDB_BASENAME="\$(basename '${pdb_path}' .pdb)"
|
|
|
|
python /workspace/entrypoint.py \\
|
|
--pdb input.pdb \\
|
|
--output-folder . \\
|
|
--output-name "\$PDB_BASENAME" \\
|
|
--model-path ${params.model_path} \\
|
|
${debug_flag} 2>&1 | tee -a run.log
|
|
|
|
echo "Pipeline completed successfully" | tee -a run.log
|
|
"""
|
|
}
|
|
|
|
workflow {
|
|
POCKETMINER(params.pdb)
|
|
}
|