#!/usr/bin/env nextflow nextflow.enable.dsl=2 params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb' params.outdir = 's3://omic/eureka/Pocketminer/output' params.model_path = '/workspace/gvp/models/pocketminer' params.debug = false process POCKETMINER { container 'harbor.cluster.omic.ai/omic/pocketminer:v2' input: val pdb_path output: path "results/*", emit: all_results path "run.log", emit: log script: def debug_flag = params.debug ? '--debug' : '' def outdir = params.outdir """ mkdir -p results touch run.log # Download input PDB from S3/MinIO python -c " import os, boto3 from botocore.client import Config def get_s3_client(): return boto3.client('s3', endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), config=Config(signature_version='s3v4'), region_name='us-east-1') bucket, key = '${pdb_path}'.replace('s3://', '').split('/', 1) s3 = get_s3_client() s3.download_file(bucket, key, 'input.pdb') print(f'Downloaded input ({os.path.getsize(\"input.pdb\")} bytes)') " 2>&1 | tee -a run.log if [ ! -s input.pdb ]; then echo "ERROR: Failed to download PDB file" | tee -a run.log exit 1 fi PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" # Run PocketMiner prediction python /workspace/entrypoint.py \\ --pdb input.pdb \\ --output-folder results \\ --output-name "\$PDB_BASENAME" \\ --model-path ${params.model_path} \\ ${debug_flag} 2>&1 | tee -a run.log # Upload results to S3/MinIO python -c " import os, boto3 from botocore.client import Config def get_s3_client(): return boto3.client('s3', endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), config=Config(signature_version='s3v4'), region_name='us-east-1') bucket, prefix = '${outdir}'.replace('s3://', '').split('/', 1) s3 = get_s3_client() for f in os.listdir('results'): filepath = os.path.join('results', f) if os.path.isfile(filepath): key = f'{prefix}/{f}' s3.upload_file(filepath, bucket, key) print(f'Uploaded {f} -> s3://{bucket}/{key}') " 2>&1 | tee -a run.log """ } workflow { POCKETMINER(params.pdb) }