diff --git a/main.nf b/main.nf index 1e499e2..07c087a 100644 --- a/main.nf +++ b/main.nf @@ -2,7 +2,6 @@ nextflow.enable.dsl=2 -// Pipeline parameters params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb' params.outdir = 's3://omic/eureka/Pocketminer/output' params.model_path = '/workspace/gvp/models/pocketminer' @@ -22,66 +21,36 @@ process POCKETMINER { def debug_flag = params.debug ? '--debug' : '' def outdir = params.outdir """ - set +e - touch run.log mkdir -p results + touch run.log - echo "=== Environment Debug ===" >> run.log - echo "Input path: ${pdb_path}" >> run.log - echo "Output path: ${outdir}" >> run.log - - PDB_FILE="" - - # Method 1: Direct path (if PVC mounted or local) - if [ -f "${pdb_path}" ]; then - echo "Found at direct path" >> run.log - cp "${pdb_path}" input.pdb && PDB_FILE="input.pdb" - fi - - # Method 2: Convert s3://omic/... to /omic/... (PVC mount path) - if [ -z "\$PDB_FILE" ]; then - PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')" - if [ -f "\$PVC_PATH" ]; then - echo "Found at PVC path: \$PVC_PATH" >> run.log - cp "\$PVC_PATH" input.pdb && PDB_FILE="input.pdb" - else - echo "Not found at PVC path: \$PVC_PATH" >> run.log - fi - fi - - # Method 3: Download from MinIO using boto3 - if [ -z "\$PDB_FILE" ]; then - ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}" - echo "Downloading via boto3 from: \$ENDPOINT" >> run.log - python -c " -import os, sys, boto3 + # Download input PDB from S3/MinIO + python -c " +import os, boto3 from botocore.client import Config -s3_path = '${pdb_path}'.replace('s3://', '') -bucket, key = s3_path.split('/', 1) -endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000') -print(f'Bucket: {bucket}, Key: {key}') -s3 = boto3.client('s3', endpoint_url=endpoint, - aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), - aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), - config=Config(signature_version='s3v4'), region_name='us-east-1') + +def get_s3_client(): + return boto3.client('s3', + endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'), + aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), + aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), + config=Config(signature_version='s3v4'), + region_name='us-east-1') + +bucket, key = '${pdb_path}'.replace('s3://', '').split('/', 1) +s3 = get_s3_client() s3.download_file(bucket, key, 'input.pdb') -print(f'Downloaded {os.path.getsize(\"input.pdb\")} bytes') -" >> run.log 2>&1 - if [ -f input.pdb ] && [ -s input.pdb ]; then - PDB_FILE="input.pdb" - fi - fi +print(f'Downloaded input ({os.path.getsize(\"input.pdb\")} bytes)') +" 2>&1 | tee -a run.log - set -e - - if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then - echo "ERROR: Could not retrieve PDB file" >> run.log - cat run.log + if [ ! -s input.pdb ]; then + echo "ERROR: Failed to download PDB file" | tee -a run.log exit 1 fi PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" + # Run PocketMiner prediction python /workspace/entrypoint.py \\ --pdb input.pdb \\ --output-folder results \\ @@ -89,27 +58,28 @@ print(f'Downloaded {os.path.getsize(\"input.pdb\")} bytes') --model-path ${params.model_path} \\ ${debug_flag} 2>&1 | tee -a run.log - # Upload results to S3 - echo "Uploading results to ${outdir}..." >> run.log + # Upload results to S3/MinIO python -c " import os, boto3 from botocore.client import Config -outdir = '${outdir}'.replace('s3://', '') -bucket, prefix = outdir.split('/', 1) -endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000') -s3 = boto3.client('s3', endpoint_url=endpoint, - aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), - aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), - config=Config(signature_version='s3v4'), region_name='us-east-1') + +def get_s3_client(): + return boto3.client('s3', + endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'), + aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), + aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), + config=Config(signature_version='s3v4'), + region_name='us-east-1') + +bucket, prefix = '${outdir}'.replace('s3://', '').split('/', 1) +s3 = get_s3_client() for f in os.listdir('results'): filepath = os.path.join('results', f) if os.path.isfile(filepath): key = f'{prefix}/{f}' s3.upload_file(filepath, bucket, key) print(f'Uploaded {f} -> s3://{bucket}/{key}') -" >> run.log 2>&1 - - echo "Pipeline completed successfully" >> run.log +" 2>&1 | tee -a run.log """ } diff --git a/nextflow.config b/nextflow.config index c8279b0..4355466 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,4 +1,3 @@ -// Manifest for Nextflow metadata manifest { name = 'pocketminer-Nextflow' author = 'Olamide' @@ -8,26 +7,12 @@ manifest { version = '1.0.0' } -// S3/MinIO plugin for direct S3 access (bypasses PVC mount issues) -plugins { - id 'nf-amazon' -} - -aws { - client { - endpoint = 'http://datalake-hl.datalake.svc.cluster.local:9000' - s3PathStyleAccess = true - } -} - -// Global default parameters params { pdb = "s3://omic/eureka/Pocketminer/1HSG.pdb" outdir = "s3://omic/eureka/Pocketminer/output" debug = false } -// Profiles for different execution environments profiles { standard { docker { @@ -44,14 +29,9 @@ profiles { docker { enabled = true } - k8s { - storageClaimName = 'eureka-pvc' - storageMountPath = '/omic/eureka' - } } } -// Process configurations process { cpus = 2 memory = '8 GB'