Clean up pipeline configuration
- Remove debug logging and dead PVC/direct-path fallbacks from main.nf - Remove nf-amazon plugin and PVC storage config from nextflow.config (S3 access is handled by boto3 in the script) - Simplify input download and output upload to single boto3 blocks
This commit is contained in:
86
main.nf
86
main.nf
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
nextflow.enable.dsl=2
|
nextflow.enable.dsl=2
|
||||||
|
|
||||||
// Pipeline parameters
|
|
||||||
params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb'
|
params.pdb = 's3://omic/eureka/Pocketminer/1HSG.pdb'
|
||||||
params.outdir = 's3://omic/eureka/Pocketminer/output'
|
params.outdir = 's3://omic/eureka/Pocketminer/output'
|
||||||
params.model_path = '/workspace/gvp/models/pocketminer'
|
params.model_path = '/workspace/gvp/models/pocketminer'
|
||||||
@@ -22,66 +21,36 @@ process POCKETMINER {
|
|||||||
def debug_flag = params.debug ? '--debug' : ''
|
def debug_flag = params.debug ? '--debug' : ''
|
||||||
def outdir = params.outdir
|
def outdir = params.outdir
|
||||||
"""
|
"""
|
||||||
set +e
|
|
||||||
touch run.log
|
|
||||||
mkdir -p results
|
mkdir -p results
|
||||||
|
touch run.log
|
||||||
|
|
||||||
echo "=== Environment Debug ===" >> run.log
|
# Download input PDB from S3/MinIO
|
||||||
echo "Input path: ${pdb_path}" >> run.log
|
|
||||||
echo "Output path: ${outdir}" >> run.log
|
|
||||||
|
|
||||||
PDB_FILE=""
|
|
||||||
|
|
||||||
# Method 1: Direct path (if PVC mounted or local)
|
|
||||||
if [ -f "${pdb_path}" ]; then
|
|
||||||
echo "Found at direct path" >> run.log
|
|
||||||
cp "${pdb_path}" input.pdb && PDB_FILE="input.pdb"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Method 2: Convert s3://omic/... to /omic/... (PVC mount path)
|
|
||||||
if [ -z "\$PDB_FILE" ]; then
|
|
||||||
PVC_PATH="\$(echo '${pdb_path}' | sed 's|^s3://|/|')"
|
|
||||||
if [ -f "\$PVC_PATH" ]; then
|
|
||||||
echo "Found at PVC path: \$PVC_PATH" >> run.log
|
|
||||||
cp "\$PVC_PATH" input.pdb && PDB_FILE="input.pdb"
|
|
||||||
else
|
|
||||||
echo "Not found at PVC path: \$PVC_PATH" >> run.log
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Method 3: Download from MinIO using boto3
|
|
||||||
if [ -z "\$PDB_FILE" ]; then
|
|
||||||
ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}"
|
|
||||||
echo "Downloading via boto3 from: \$ENDPOINT" >> run.log
|
|
||||||
python -c "
|
python -c "
|
||||||
import os, sys, boto3
|
import os, boto3
|
||||||
from botocore.client import Config
|
from botocore.client import Config
|
||||||
s3_path = '${pdb_path}'.replace('s3://', '')
|
|
||||||
bucket, key = s3_path.split('/', 1)
|
def get_s3_client():
|
||||||
endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000')
|
return boto3.client('s3',
|
||||||
print(f'Bucket: {bucket}, Key: {key}')
|
endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'),
|
||||||
s3 = boto3.client('s3', endpoint_url=endpoint,
|
|
||||||
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
||||||
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
||||||
config=Config(signature_version='s3v4'), region_name='us-east-1')
|
config=Config(signature_version='s3v4'),
|
||||||
|
region_name='us-east-1')
|
||||||
|
|
||||||
|
bucket, key = '${pdb_path}'.replace('s3://', '').split('/', 1)
|
||||||
|
s3 = get_s3_client()
|
||||||
s3.download_file(bucket, key, 'input.pdb')
|
s3.download_file(bucket, key, 'input.pdb')
|
||||||
print(f'Downloaded {os.path.getsize(\"input.pdb\")} bytes')
|
print(f'Downloaded input ({os.path.getsize(\"input.pdb\")} bytes)')
|
||||||
" >> run.log 2>&1
|
" 2>&1 | tee -a run.log
|
||||||
if [ -f input.pdb ] && [ -s input.pdb ]; then
|
|
||||||
PDB_FILE="input.pdb"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
set -e
|
if [ ! -s input.pdb ]; then
|
||||||
|
echo "ERROR: Failed to download PDB file" | tee -a run.log
|
||||||
if [ -z "\$PDB_FILE" ] || [ ! -f input.pdb ]; then
|
|
||||||
echo "ERROR: Could not retrieve PDB file" >> run.log
|
|
||||||
cat run.log
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PDB_BASENAME="\$(basename '${pdb_path}' .pdb)"
|
PDB_BASENAME="\$(basename '${pdb_path}' .pdb)"
|
||||||
|
|
||||||
|
# Run PocketMiner prediction
|
||||||
python /workspace/entrypoint.py \\
|
python /workspace/entrypoint.py \\
|
||||||
--pdb input.pdb \\
|
--pdb input.pdb \\
|
||||||
--output-folder results \\
|
--output-folder results \\
|
||||||
@@ -89,27 +58,28 @@ print(f'Downloaded {os.path.getsize(\"input.pdb\")} bytes')
|
|||||||
--model-path ${params.model_path} \\
|
--model-path ${params.model_path} \\
|
||||||
${debug_flag} 2>&1 | tee -a run.log
|
${debug_flag} 2>&1 | tee -a run.log
|
||||||
|
|
||||||
# Upload results to S3
|
# Upload results to S3/MinIO
|
||||||
echo "Uploading results to ${outdir}..." >> run.log
|
|
||||||
python -c "
|
python -c "
|
||||||
import os, boto3
|
import os, boto3
|
||||||
from botocore.client import Config
|
from botocore.client import Config
|
||||||
outdir = '${outdir}'.replace('s3://', '')
|
|
||||||
bucket, prefix = outdir.split('/', 1)
|
def get_s3_client():
|
||||||
endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000')
|
return boto3.client('s3',
|
||||||
s3 = boto3.client('s3', endpoint_url=endpoint,
|
endpoint_url=os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000'),
|
||||||
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
||||||
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
||||||
config=Config(signature_version='s3v4'), region_name='us-east-1')
|
config=Config(signature_version='s3v4'),
|
||||||
|
region_name='us-east-1')
|
||||||
|
|
||||||
|
bucket, prefix = '${outdir}'.replace('s3://', '').split('/', 1)
|
||||||
|
s3 = get_s3_client()
|
||||||
for f in os.listdir('results'):
|
for f in os.listdir('results'):
|
||||||
filepath = os.path.join('results', f)
|
filepath = os.path.join('results', f)
|
||||||
if os.path.isfile(filepath):
|
if os.path.isfile(filepath):
|
||||||
key = f'{prefix}/{f}'
|
key = f'{prefix}/{f}'
|
||||||
s3.upload_file(filepath, bucket, key)
|
s3.upload_file(filepath, bucket, key)
|
||||||
print(f'Uploaded {f} -> s3://{bucket}/{key}')
|
print(f'Uploaded {f} -> s3://{bucket}/{key}')
|
||||||
" >> run.log 2>&1
|
" 2>&1 | tee -a run.log
|
||||||
|
|
||||||
echo "Pipeline completed successfully" >> run.log
|
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
// Manifest for Nextflow metadata
|
|
||||||
manifest {
|
manifest {
|
||||||
name = 'pocketminer-Nextflow'
|
name = 'pocketminer-Nextflow'
|
||||||
author = 'Olamide'
|
author = 'Olamide'
|
||||||
@@ -8,26 +7,12 @@ manifest {
|
|||||||
version = '1.0.0'
|
version = '1.0.0'
|
||||||
}
|
}
|
||||||
|
|
||||||
// S3/MinIO plugin for direct S3 access (bypasses PVC mount issues)
|
|
||||||
plugins {
|
|
||||||
id 'nf-amazon'
|
|
||||||
}
|
|
||||||
|
|
||||||
aws {
|
|
||||||
client {
|
|
||||||
endpoint = 'http://datalake-hl.datalake.svc.cluster.local:9000'
|
|
||||||
s3PathStyleAccess = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Global default parameters
|
|
||||||
params {
|
params {
|
||||||
pdb = "s3://omic/eureka/Pocketminer/1HSG.pdb"
|
pdb = "s3://omic/eureka/Pocketminer/1HSG.pdb"
|
||||||
outdir = "s3://omic/eureka/Pocketminer/output"
|
outdir = "s3://omic/eureka/Pocketminer/output"
|
||||||
debug = false
|
debug = false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Profiles for different execution environments
|
|
||||||
profiles {
|
profiles {
|
||||||
standard {
|
standard {
|
||||||
docker {
|
docker {
|
||||||
@@ -44,14 +29,9 @@ profiles {
|
|||||||
docker {
|
docker {
|
||||||
enabled = true
|
enabled = true
|
||||||
}
|
}
|
||||||
k8s {
|
|
||||||
storageClaimName = 'eureka-pvc'
|
|
||||||
storageMountPath = '/omic/eureka'
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process configurations
|
|
||||||
process {
|
process {
|
||||||
cpus = 2
|
cpus = 2
|
||||||
memory = '8 GB'
|
memory = '8 GB'
|
||||||
|
|||||||
Reference in New Issue
Block a user