#!/usr/bin/env nextflow nextflow.enable.dsl=2 // Pipeline parameters params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb' params.outdir = 's3://omic/eureka/Pocketminer/output' params.model_path = '/workspace/gvp/models/pocketminer' params.debug = false process POCKETMINER { container 'harbor.cluster.omic.ai/omic/pocketminer:v2' input: val pdb_path output: path "results/*", emit: all_results path "run.log", emit: log script: def debug_flag = params.debug ? '--debug' : '' def outdir = params.outdir """ set +e touch run.log mkdir -p results echo "=== Environment Debug ===" >> run.log echo "Input path: ${pdb_path}" >> run.log echo "Output path: ${outdir}" >> run.log PDB_FILE="" # Method 1: Direct path (if PVC mounted or local) if [ -f "${pdb_path}" ]; then echo "Found at direct path" >> run.log cp "${pdb_path}" input.pdb && PDB_FILE="input.pdb" fi # Method 2: Convert s3://omic/... to /omic/... (PVC mount path) if [ -z "\$PDB_FILE" ]; then PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')" if [ -f "\$PVC_PATH" ]; then echo "Found at PVC path: \$PVC_PATH" >> run.log cp "\$PVC_PATH" input.pdb && PDB_FILE="input.pdb" else echo "Not found at PVC path: \$PVC_PATH" >> run.log fi fi # Method 3: Download from MinIO using boto3 if [ -z "\$PDB_FILE" ]; then ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}" echo "Downloading via boto3 from: \$ENDPOINT" >> run.log python -c " import os, sys, boto3 from botocore.client import Config s3_path = '${pdb_path}'.replace('s3://', '') bucket, key = s3_path.split('/', 1) endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000') print(f'Bucket: {bucket}, Key: {key}') s3 = boto3.client('s3', endpoint_url=endpoint, aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), config=Config(signature_version='s3v4'), region_name='us-east-1') s3.download_file(bucket, key, 'input.pdb') print(f'Downloaded {os.path.getsize(\"input.pdb\")} bytes') " >> run.log 2>&1 if [ -f input.pdb ] && [ -s input.pdb ]; then PDB_FILE="input.pdb" fi fi set -e if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then echo "ERROR: Could not retrieve PDB file" >> run.log cat run.log exit 1 fi PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" python /workspace/entrypoint.py \\ --pdb input.pdb \\ --output-folder results \\ --output-name "\$PDB_BASENAME" \\ --model-path ${params.model_path} \\ ${debug_flag} 2>&1 | tee -a run.log # Upload results to S3 echo "Uploading results to ${outdir}..." >> run.log python -c " import os, boto3 from botocore.client import Config outdir = '${outdir}'.replace('s3://', '') bucket, prefix = outdir.split('/', 1) endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000') s3 = boto3.client('s3', endpoint_url=endpoint, aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), config=Config(signature_version='s3v4'), region_name='us-east-1') for f in os.listdir('results'): filepath = os.path.join('results', f) if os.path.isfile(filepath): key = f'{prefix}/{f}' s3.upload_file(filepath, bucket, key) print(f'Uploaded {f} -> s3://{bucket}/{key}') " >> run.log 2>&1 echo "Pipeline completed successfully" >> run.log """ } workflow { POCKETMINER(params.pdb) }