Clean up pipeline configuration

- Remove debug logging and dead PVC/direct-path fallbacks from main.nf
- Remove nf-amazon plugin and PVC storage config from nextflow.config
  (S3 access is handled by boto3 in the script)
- Simplify input download and output upload to single boto3 blocks
This commit is contained in:
2026-03-23 18:27:50 +01:00
parent a430caaf96
commit 8e3b2170e9
2 changed files with 33 additions and 83 deletions

86
main.nf
View File

@@ -2,7 +2,6 @@
nextflow.enable.dsl=2 nextflow.enable.dsl=2
// Pipeline parameters
params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb' params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb'
params.outdir = 's3://omic/eureka/Pocketminer/output' params.outdir = 's3://omic/eureka/Pocketminer/output'
params.model_path = '/workspace/gvp/models/pocketminer' params.model_path = '/workspace/gvp/models/pocketminer'
@@ -22,66 +21,36 @@ process POCKETMINER {
def debug_flag = params.debug ? '--debug' : '' def debug_flag = params.debug ? '--debug' : ''
def outdir = params.outdir def outdir = params.outdir
""" """
set +e
touch run.log
mkdir -p results mkdir -p results
touch run.log
echo "=== Environment Debug ===" >> run.log # Download input PDB from S3/MinIO
echo "Input path: ${pdb_path}" >> run.log
echo "Output path: ${outdir}" >> run.log
PDB_FILE=""
# Method 1: Direct path (if PVC mounted or local)
if [ -f "${pdb_path}" ]; then
echo "Found at direct path" >> run.log
cp "${pdb_path}" input.pdb && PDB_FILE="input.pdb"
fi
# Method 2: Convert s3://omic/... to /omic/... (PVC mount path)
if [ -z "\$PDB_FILE" ]; then
PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')"
if [ -f "\$PVC_PATH" ]; then
echo "Found at PVC path: \$PVC_PATH" >> run.log
cp "\$PVC_PATH" input.pdb && PDB_FILE="input.pdb"
else
echo "Not found at PVC path: \$PVC_PATH" >> run.log
fi
fi
# Method 3: Download from MinIO using boto3
if [ -z "\$PDB_FILE" ]; then
ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}"
echo "Downloading via boto3 from: \$ENDPOINT" >> run.log
python -c " python -c "
import os, sys, boto3 import os, boto3
from botocore.client import Config from botocore.client import Config
s3_path = '${pdb_path}'.replace('s3://', '')
bucket, key = s3_path.split('/', 1) def get_s3_client():
endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000') return boto3.client('s3',
print(f'Bucket: {bucket}, Key: {key}') endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'),
s3 = boto3.client('s3', endpoint_url=endpoint,
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
config=Config(signature_version='s3v4'), region_name='us-east-1') config=Config(signature_version='s3v4'),
region_name='us-east-1')
bucket, key = '${pdb_path}'.replace('s3://', '').split('/', 1)
s3 = get_s3_client()
s3.download_file(bucket, key, 'input.pdb') s3.download_file(bucket, key, 'input.pdb')
print(f'Downloaded {os.path.getsize(\"input.pdb\")} bytes') print(f'Downloaded input ({os.path.getsize(\"input.pdb\")} bytes)')
" >> run.log 2>&1 " 2>&1 | tee -a run.log
if [ -f input.pdb ] && [ -s input.pdb ]; then
PDB_FILE="input.pdb"
fi
fi
set -e if [ ! -s input.pdb ]; then
echo "ERROR: Failed to download PDB file" | tee -a run.log
if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then
echo "ERROR: Could not retrieve PDB file" >> run.log
cat run.log
exit 1 exit 1
fi fi
PDB_BASENAME="\$(basename '${pdb_path}' .pdb)" PDB_BASENAME="\$(basename '${pdb_path}' .pdb)"
# Run PocketMiner prediction
python /workspace/entrypoint.py \\ python /workspace/entrypoint.py \\
--pdb input.pdb \\ --pdb input.pdb \\
--output-folder results \\ --output-folder results \\
@@ -89,27 +58,28 @@ print(f'Downloaded {os.path.getsize(\"input.pdb\")} bytes')
--model-path ${params.model_path} \\ --model-path ${params.model_path} \\
${debug_flag} 2>&1 | tee -a run.log ${debug_flag} 2>&1 | tee -a run.log
# Upload results to S3 # Upload results to S3/MinIO
echo "Uploading results to ${outdir}..." >> run.log
python -c " python -c "
import os, boto3 import os, boto3
from botocore.client import Config from botocore.client import Config
outdir = '${outdir}'.replace('s3://', '')
bucket, prefix = outdir.split('/', 1) def get_s3_client():
endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000') return boto3.client('s3',
s3 = boto3.client('s3', endpoint_url=endpoint, endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'),
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
config=Config(signature_version='s3v4'), region_name='us-east-1') config=Config(signature_version='s3v4'),
region_name='us-east-1')
bucket, prefix = '${outdir}'.replace('s3://', '').split('/', 1)
s3 = get_s3_client()
for f in os.listdir('results'): for f in os.listdir('results'):
filepath = os.path.join('results', f) filepath = os.path.join('results', f)
if os.path.isfile(filepath): if os.path.isfile(filepath):
key = f'{prefix}/{f}' key = f'{prefix}/{f}'
s3.upload_file(filepath, bucket, key) s3.upload_file(filepath, bucket, key)
print(f'Uploaded {f} -> s3://{bucket}/{key}') print(f'Uploaded {f} -> s3://{bucket}/{key}')
" >> run.log 2>&1 " 2>&1 | tee -a run.log
echo "Pipeline completed successfully" >> run.log
""" """
} }

View File

@@ -1,4 +1,3 @@
// Manifest for Nextflow metadata
manifest { manifest {
name = 'pocketminer-Nextflow' name = 'pocketminer-Nextflow'
author = 'Olamide' author = 'Olamide'
@@ -8,26 +7,12 @@ manifest {
version = '1.0.0' version = '1.0.0'
} }
// S3/MinIO plugin for direct S3 access (bypasses PVC mount issues)
plugins {
id 'nf-amazon'
}
aws {
client {
endpoint = 'http://datalake-hl.datalake.svc.cluster.local:9000'
s3PathStyleAccess = true
}
}
// Global default parameters
params { params {
pdb = "s3://omic/eureka/Pocketminer/1HSG.pdb" pdb = "s3://omic/eureka/Pocketminer/1HSG.pdb"
outdir = "s3://omic/eureka/Pocketminer/output" outdir = "s3://omic/eureka/Pocketminer/output"
debug = false debug = false
} }
// Profiles for different execution environments
profiles { profiles {
standard { standard {
docker { docker {
@@ -44,14 +29,9 @@ profiles {
docker { docker {
enabled = true enabled = true
} }
k8s {
storageClaimName = 'eureka-pvc'
storageMountPath = '/omic/eureka'
}
} }
} }
// Process configurations
process { process {
cpus = 2 cpus = 2
memory = '8 GB' memory = '8 GB'