Install boto3 at runtime for authenticated S3 download (fix syntax error)
This commit is contained in:
76
main.nf
76
main.nf
@@ -56,74 +56,46 @@ process POCKETMINER {
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Method 3: Download from MinIO using python boto3/S3
|
# Method 3: Download from MinIO using python
|
||||||
if [ -z "\$PDB_FILE" ]; then
|
if [ -z "\$PDB_FILE" ]; then
|
||||||
S3_PATH="${pdb_path}"
|
S3_PATH="${pdb_path}"
|
||||||
ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}"
|
ENDPOINT="\${AWS_ENDPOINT_URL:-http://datalake-hl.datalake.svc.cluster.local:9000}"
|
||||||
echo "Downloading via python S3 client from: \$ENDPOINT" >> run.log
|
echo "Downloading via python S3 from: \$ENDPOINT" >> run.log
|
||||||
echo "S3 path: \$S3_PATH" >> run.log
|
echo "S3 path: \$S3_PATH" >> run.log
|
||||||
echo "AWS_ACCESS_KEY_ID: \${AWS_ACCESS_KEY_ID:+set}" >> run.log
|
echo "AWS_ACCESS_KEY_ID: \${AWS_ACCESS_KEY_ID:+set}" >> run.log
|
||||||
echo "AWS_SECRET_ACCESS_KEY: \${AWS_SECRET_ACCESS_KEY:+set}" >> run.log
|
echo "AWS_SECRET_ACCESS_KEY: \${AWS_SECRET_ACCESS_KEY:+set}" >> run.log
|
||||||
echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" >> run.log
|
echo "AWS_ENDPOINT_URL: \${AWS_ENDPOINT_URL:-not set}" >> run.log
|
||||||
|
|
||||||
|
# Install boto3 if needed, then download
|
||||||
|
pip install -q boto3 2>> run.log || conda install -y -q boto3 2>> run.log || true
|
||||||
|
|
||||||
python -c "
|
python -c "
|
||||||
import os, sys
|
import os, sys
|
||||||
s3_path = '\$S3_PATH'
|
s3_path = os.environ.get('S3_INPUT', '${pdb_path}')
|
||||||
endpoint = '\$ENDPOINT'
|
endpoint = os.environ.get('AWS_ENDPOINT_URL', 'http://datalake-hl.datalake.svc.cluster.local:9000')
|
||||||
|
|
||||||
# Parse s3://bucket/key
|
|
||||||
path = s3_path.replace('s3://', '')
|
path = s3_path.replace('s3://', '')
|
||||||
parts = path.split('/', 1)
|
parts = path.split('/', 1)
|
||||||
bucket = parts[0]
|
bucket, key = parts[0], parts[1]
|
||||||
key = parts[1] if len(parts) > 1 else ''
|
print(f'Bucket: {bucket}, Key: {key}, Endpoint: {endpoint}')
|
||||||
|
import boto3
|
||||||
print(f'Bucket: {bucket}, Key: {key}')
|
from botocore.client import Config
|
||||||
print(f'Endpoint: {endpoint}')
|
s3 = boto3.client('s3',
|
||||||
|
endpoint_url=endpoint,
|
||||||
try:
|
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
||||||
import boto3
|
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
||||||
from botocore.client import Config
|
config=Config(signature_version='s3v4'),
|
||||||
s3 = boto3.client('s3',
|
region_name='us-east-1'
|
||||||
endpoint_url=endpoint,
|
)
|
||||||
aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', ''),
|
s3.download_file(bucket, key, 'input.pdb')
|
||||||
aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', ''),
|
size = os.path.getsize('input.pdb')
|
||||||
config=Config(signature_version='s3v4'),
|
print(f'Downloaded {size} bytes')
|
||||||
region_name='us-east-1'
|
|
||||||
)
|
|
||||||
s3.download_file(bucket, key, 'input.pdb')
|
|
||||||
print(f'Downloaded via boto3 ({os.path.getsize(\"input.pdb\")} bytes)')
|
|
||||||
except ImportError:
|
|
||||||
print('boto3 not available, trying urllib with signing...')
|
|
||||||
# Fallback: use subprocess to call python with hmac signing
|
|
||||||
import urllib.request, hmac, hashlib, datetime
|
|
||||||
access_key = os.environ.get('AWS_ACCESS_KEY_ID', '')
|
|
||||||
secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY', '')
|
|
||||||
if access_key and secret_key:
|
|
||||||
# Simple S3 GET with AWS Signature V2
|
|
||||||
date_str = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
|
|
||||||
string_to_sign = f'GET\n\n\n{date_str}\n/{bucket}/{key}'
|
|
||||||
signature = hmac.new(secret_key.encode(), string_to_sign.encode(), hashlib.sha1)
|
|
||||||
import base64
|
|
||||||
sig_b64 = base64.b64encode(signature.digest()).decode()
|
|
||||||
url = f'{endpoint}/{bucket}/{key}'
|
|
||||||
req = urllib.request.Request(url)
|
|
||||||
req.add_header('Date', date_str)
|
|
||||||
req.add_header('Authorization', f'AWS {access_key}:{sig_b64}')
|
|
||||||
with urllib.request.urlopen(req) as resp:
|
|
||||||
with open('input.pdb', 'wb') as f:
|
|
||||||
f.write(resp.read())
|
|
||||||
print(f'Downloaded via signed URL ({os.path.getsize(\"input.pdb\")} bytes)')
|
|
||||||
else:
|
|
||||||
print('No AWS credentials available')
|
|
||||||
sys.exit(1)
|
|
||||||
except Exception as e:
|
|
||||||
print(f'Download failed: {e}')
|
|
||||||
sys.exit(1)
|
|
||||||
" >> run.log 2>&1
|
" >> run.log 2>&1
|
||||||
if [ -f input.pdb ] && [ -s input.pdb ]; then
|
if [ -f input.pdb ] && [ -s input.pdb ]; then
|
||||||
echo "S3 download successful (\$(wc -c < input.pdb) bytes)" >> run.log
|
echo "S3 download successful (\$(wc -c < input.pdb) bytes)" >> run.log
|
||||||
PDB_FILE="input.pdb"
|
PDB_FILE="input.pdb"
|
||||||
else
|
else
|
||||||
echo "S3 download failed" >> run.log
|
echo "S3 download failed - check run.log" >> run.log
|
||||||
|
cat run.log
|
||||||
rm -f input.pdb
|
rm -f input.pdb
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user