diff --git a/main.nf b/main.nf index 3f60b17..c33b676 100644 --- a/main.nf +++ b/main.nf @@ -56,74 +56,46 @@ process POCKETMINER { fi fi - # Method 3: Download from MinIO using python boto3/S3 + # Method 3: Download from MinIO using python if [ -z "\$PDB_FILE" ]; then S3_PATH="${pdb_path}" ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}" - echo "Downloading via python S3 client from: \$ENDPOINT" >> run.log + echo "Downloading via python S3 from: \$ENDPOINT" >> run.log echo "S3 path: \$S3_PATH" >> run.log echo "AWS_ACCESS_KEY_ID: \${AWS_ACCESS_KEY_ID:+set}" >> run.log echo "AWS_SECRET_ACCESS_KEY: \${AWS_SECRET_ACCESS_KEY:+set}" >> run.log echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" >> run.log + + # Install boto3 if needed, then download + pip install -q boto3 2>> run.log || conda install -y -q boto3 2>> run.log || true + python -c " import os, sys -s3_path = '\$S3_PATH' -endpoint = '\$ENDPOINT' - -# Parse s3://bucket/key +s3_path = os.environ.get('S3_INPUT', '${pdb_path}') +endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000') path = s3_path.replace('s3://', '') parts = path.split('/', 1) -bucket = parts[0] -key = parts[1] if len(parts) > 1 else '' - -print(f'Bucket: {bucket}, Key: {key}') -print(f'Endpoint: {endpoint}') - -try: - import boto3 - from botocore.client import Config - s3 = boto3.client('s3', - endpoint_url=endpoint, - aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), - aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), - config=Config(signature_version='s3v4'), - region_name='us-east-1' - ) - s3.download_file(bucket, key, 'input.pdb') - print(f'Downloaded via boto3 ({os.path.getsize(\"input.pdb\")} bytes)') -except ImportError: - print('boto3 not available, trying urllib with signing...') - # Fallback: use subprocess to call python with hmac signing - import urllib.request, hmac, hashlib, datetime - access_key = os.environ.get('AWS_ACCESS_KEY_ID', '') - secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY', '') - if access_key and secret_key: - # Simple S3 GET with AWS Signature V2 - date_str = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT') - string_to_sign = f'GET\n\n\n{date_str}\n/{bucket}/{key}' - signature = hmac.new(secret_key.encode(), string_to_sign.encode(), hashlib.sha1) - import base64 - sig_b64 = base64.b64encode(signature.digest()).decode() - url = f'{endpoint}/{bucket}/{key}' - req = urllib.request.Request(url) - req.add_header('Date', date_str) - req.add_header('Authorization', f'AWS {access_key}:{sig_b64}') - with urllib.request.urlopen(req) as resp: - with open('input.pdb', 'wb') as f: - f.write(resp.read()) - print(f'Downloaded via signed URL ({os.path.getsize(\"input.pdb\")} bytes)') - else: - print('No AWS credentials available') - sys.exit(1) -except Exception as e: - print(f'Download failed: {e}') - sys.exit(1) +bucket, key = parts[0], parts[1] +print(f'Bucket: {bucket}, Key: {key}, Endpoint: {endpoint}') +import boto3 +from botocore.client import Config +s3 = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''), + aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''), + config=Config(signature_version='s3v4'), + region_name='us-east-1' +) +s3.download_file(bucket, key, 'input.pdb') +size = os.path.getsize('input.pdb') +print(f'Downloaded {size} bytes') " >> run.log 2>&1 if [ -f input.pdb ] && [ -s input.pdb ]; then echo "S3 download successful (\$(wc -c < input.pdb) bytes)" >> run.log PDB_FILE="input.pdb" else - echo "S3 download failed" >> run.log + echo "S3 download failed - check run.log" >> run.log + cat run.log rm -f input.pdb fi fi