diff --git a/main.nf b/main.nf index d7d4057..ee7163b 100644 --- a/main.nf +++ b/main.nf @@ -3,8 +3,8 @@ nextflow.enable.dsl=2 // Pipeline parameters -params.pdb = '/omic/eureka/Pocketminer/1HSG.pdb' -params.outdir = '/omic/eureka/Pocketminer/output' +params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb' +params.outdir = 's3://omic/eureka/Pocketminer/output' params.model_path = '/workspace/gvp/models/pocketminer' params.debug = false @@ -25,30 +25,66 @@ process POCKETMINER { path "run.log", emit: log script: - def pdb_basename = file(pdb_path).baseName def debug_flag = params.debug ? '--debug' : '' """ touch run.log - echo "=== Debugging PVC mount ===" | tee -a run.log + echo "=== Environment Debug ===" | tee -a run.log echo "Input path: ${pdb_path}" | tee -a run.log - ls -la /omic/eureka/ 2>&1 | head -20 | tee -a run.log - ls -la /omic/eureka/Pocketminer/ 2>&1 | tee -a run.log || true - echo "=== End Debug ===" | tee -a run.log + echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" | tee -a run.log + echo "AWS_ACCESS_KEY_ID set: \${AWS_ACCESS_KEY_ID:+yes}" | tee -a run.log - if [ ! -f "${pdb_path}" ]; then - echo "ERROR: PDB file not found at ${pdb_path}" | tee -a run.log - echo "Available files in /omic/eureka/Pocketminer/:" | tee -a run.log - ls /omic/eureka/Pocketminer/ 2>&1 | tee -a run.log || true + # Try multiple methods to get the PDB file + PDB_FILE="" + + # Method 1: Direct PVC path (if mounted) + if [ -f "${pdb_path}" ]; then + echo "Found file at PVC path: ${pdb_path}" | tee -a run.log + cp "${pdb_path}" input.pdb + PDB_FILE="input.pdb" + fi + + # Method 2: Convert s3://omic/eureka/... to /omic/eureka/... (PVC mount) + if [ -z "\$PDB_FILE" ]; then + PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')" + echo "Trying PVC path: \$PVC_PATH" | tee -a run.log + if [ -f "\$PVC_PATH" ]; then + echo "Found file at PVC mount: \$PVC_PATH" | tee -a run.log + cp "\$PVC_PATH" input.pdb + PDB_FILE="input.pdb" + fi + fi + + # Method 3: Download from MinIO via S3 API + if [ -z "\$PDB_FILE" ]; then + echo "PVC not available, downloading from MinIO S3..." | tee -a run.log + pip install awscli 2>/dev/null || conda install -n base -c conda-forge awscli -y 2>/dev/null || true + + if command -v aws &> /dev/null; then + aws --endpoint-url \${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000} \\ + s3 cp "${pdb_path}" input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb" + fi + fi + + # Method 4: Download via curl from MinIO + if [ -z "\$PDB_FILE" ]; then + echo "Trying curl download from MinIO..." | tee -a run.log + S3_PATH="\$(echo '${pdb_path}' | sed 's|^s3://||')" + curl -sf "http://datalake-hl.datalake.svc.cluster.local:9000/\$S3_PATH" -o input.pdb 2>&1 | tee -a run.log && PDB_FILE="input.pdb" + fi + + if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then + echo "ERROR: Could not retrieve PDB file from any source" | tee -a run.log exit 1 fi - cp "${pdb_path}" input.pdb + echo "PDB file ready (\$(wc -c < input.pdb) bytes)" | tee -a run.log + PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" python /workspace/entrypoint.py \\ --pdb input.pdb \\ --output-folder . \\ - --output-name ${pdb_basename} \\ + --output-name "\$PDB_BASENAME" \\ --model-path ${params.model_path} \\ ${debug_flag} 2>&1 | tee -a run.log diff --git a/nextflow.config b/nextflow.config index ad0ad98..90a4924 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,10 +8,22 @@ manifest { version = '1.0.0' } +// S3/MinIO plugin for direct S3 access (bypasses PVC mount issues) +plugins { + id 'nf-amazon' +} + +aws { + client { + endpoint = 'http://datalake-hl.datalake.svc.cluster.local:9000' + s3PathStyleAccess = true + } +} + // Global default parameters params { - pdb = "/omic/eureka/Pocketminer/1HSG.pdb" - outdir = "/omic/eureka/Pocketminer/output" + pdb = "s3://omic/eureka/Pocketminer/1HSG.pdb" + outdir = "s3://omic/eureka/Pocketminer/output" debug = false }