- Remove debug logging and dead PVC/direct-path fallbacks from main.nf - Remove nf-amazon plugin and PVC storage config from nextflow.config (S3 access is handled by boto3 in the script) - Simplify input download and output upload to single boto3 blocks
89 lines
2.6 KiB
Plaintext
89 lines
2.6 KiB
Plaintext
#!/usr/bin/env nextflow
|
|
|
|
nextflow.enable.dsl=2
|
|
|
|
params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb'
|
|
params.outdir = 's3://omic/eureka/Pocketminer/output'
|
|
params.model_path = '/workspace/gvp/models/pocketminer'
|
|
params.debug = false
|
|
|
|
process POCKETMINER {
|
|
container 'harbor.cluster.omic.ai/omic/pocketminer:v2'
|
|
|
|
input:
|
|
val pdb_path
|
|
|
|
output:
|
|
path "results/*", emit: all_results
|
|
path "run.log", emit: log
|
|
|
|
script:
|
|
def debug_flag = params.debug ? '--debug' : ''
|
|
def outdir = params.outdir
|
|
"""
|
|
mkdir -p results
|
|
touch run.log
|
|
|
|
# Download input PDB from S3/MinIO
|
|
python -c "
|
|
import os, boto3
|
|
from botocore.client import Config
|
|
|
|
def get_s3_client():
|
|
return boto3.client('s3',
|
|
endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'),
|
|
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
|
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
|
config=Config(signature_version='s3v4'),
|
|
region_name='us-east-1')
|
|
|
|
bucket, key = '${pdb_path}'.replace('s3://', '').split('/', 1)
|
|
s3 = get_s3_client()
|
|
s3.download_file(bucket, key, 'input.pdb')
|
|
print(f'Downloaded input ({os.path.getsize(\"input.pdb\")} bytes)')
|
|
" 2>&1 | tee -a run.log
|
|
|
|
if [ ! -s input.pdb ]; then
|
|
echo "ERROR: Failed to download PDB file" | tee -a run.log
|
|
exit 1
|
|
fi
|
|
|
|
PDB_BASENAME="\$(basename '${pdb_path}' .pdb)"
|
|
|
|
# Run PocketMiner prediction
|
|
python /workspace/entrypoint.py \\
|
|
--pdb input.pdb \\
|
|
--output-folder results \\
|
|
--output-name "\$PDB_BASENAME" \\
|
|
--model-path ${params.model_path} \\
|
|
${debug_flag} 2>&1 | tee -a run.log
|
|
|
|
# Upload results to S3/MinIO
|
|
python -c "
|
|
import os, boto3
|
|
from botocore.client import Config
|
|
|
|
def get_s3_client():
|
|
return boto3.client('s3',
|
|
endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'),
|
|
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
|
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
|
config=Config(signature_version='s3v4'),
|
|
region_name='us-east-1')
|
|
|
|
bucket, prefix = '${outdir}'.replace('s3://', '').split('/', 1)
|
|
s3 = get_s3_client()
|
|
for f in os.listdir('results'):
|
|
filepath = os.path.join('results', f)
|
|
if os.path.isfile(filepath):
|
|
key = f'{prefix}/{f}'
|
|
s3.upload_file(filepath, bucket, key)
|
|
print(f'Uploaded {f} -> s3://{bucket}/{key}')
|
|
" 2>&1 | tee -a run.log
|
|
"""
|
|
}
|
|
|
|
workflow {
|
|
POCKETMINER(params.pdb)
|
|
}
|