122 lines
2.8 KiB
Plaintext
122 lines
2.8 KiB
Plaintext
#!/usr/bin/env nextflow
|
|
|
|
nextflow.enable.dsl=2
|
|
|
|
// Input parameters
|
|
params.protein_fasta = 's3://omic/eureka/rosettafold-all-atom/input/protein.fasta'
|
|
params.na_fasta = ''
|
|
params.na_type = 'dna'
|
|
params.sm_file = ''
|
|
params.sm_type = 'sdf'
|
|
params.covale_inputs = ''
|
|
|
|
// Output parameters
|
|
params.outdir = 's3://omic/eureka/rosettafold-all-atom/output'
|
|
params.job_name = 'rfaa_prediction'
|
|
|
|
// Model parameters
|
|
params.max_cycle = 4
|
|
|
|
// Database parameters
|
|
params.db_uniref30 = '/mnt/databases/UniRef30_2020_06'
|
|
params.db_bfd = '/mnt/databases/bfd'
|
|
params.db_pdb100 = '/mnt/databases/pdb100_2021Mar03'
|
|
params.weights = '/mnt/databases/RFAA_paper_weights.pt'
|
|
|
|
process RFAA_PREDICT {
|
|
container 'harbor.cluster.omic.ai/omic/rosettafold-all-atom:latest'
|
|
containerOptions '--rm --gpus all -v /mnt:/mnt'
|
|
publishDir params.outdir, mode: 'copy'
|
|
stageInMode 'copy'
|
|
|
|
input:
|
|
path protein_fasta
|
|
|
|
output:
|
|
path "${params.job_name}.pdb"
|
|
path "${params.job_name}_aux.pt"
|
|
path "run.log"
|
|
|
|
script:
|
|
def na_block = params.na_fasta ? """
|
|
na_inputs:
|
|
B:
|
|
fasta: ${params.na_fasta}
|
|
input_type: "${params.na_type}"
|
|
""" : ""
|
|
def sm_block = params.sm_file ? """
|
|
sm_inputs:
|
|
C:
|
|
input: ${params.sm_file}
|
|
input_type: "${params.sm_type}"
|
|
""" : ""
|
|
def covale_block = params.covale_inputs ? """
|
|
covale_inputs: "${params.covale_inputs}"
|
|
""" : ""
|
|
"""
|
|
mkdir -p config
|
|
|
|
# Create hydra config file
|
|
cat > config/${params.job_name}.yaml << EOF
|
|
defaults:
|
|
- base
|
|
|
|
job_name: "${params.job_name}"
|
|
|
|
protein_inputs:
|
|
A:
|
|
fasta_file: \$PWD/${protein_fasta}
|
|
${na_block}${sm_block}${covale_block}
|
|
loader_params:
|
|
MAXCYCLE: ${params.max_cycle}
|
|
EOF
|
|
|
|
# Create base config with database paths
|
|
cat > config/base.yaml << EOF
|
|
output_path: "."
|
|
|
|
database_params:
|
|
sequencedb: "${params.db_pdb100}/pdb100_2021Mar03"
|
|
hhdb: "${params.db_pdb100}/pdb100_2021Mar03"
|
|
command: "/opt/RoseTTAFold-All-Atom/make_msa.sh"
|
|
|
|
checkpoint_path: "${params.weights}"
|
|
|
|
loader_params:
|
|
MAXCYCLE: ${params.max_cycle}
|
|
|
|
model_params:
|
|
n_extra_block: 4
|
|
n_main_block: 32
|
|
n_ref_block: 4
|
|
d_msa: 256
|
|
d_msa_full: 64
|
|
d_pair: 128
|
|
d_templ: 64
|
|
n_head_msa: 8
|
|
n_head_pair: 4
|
|
n_head_templ: 4
|
|
d_hidden: 32
|
|
d_hidden_templ: 32
|
|
p_drop: 0.0
|
|
EOF
|
|
|
|
# Set environment variables
|
|
export DB_UR30="${params.db_uniref30}/UniRef30_2020_06"
|
|
export DB_BFD="${params.db_bfd}/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"
|
|
export BLASTMAT="/opt/RoseTTAFold-All-Atom/blast-2.2.26/data/"
|
|
|
|
# Run RFAA
|
|
cd /opt/RoseTTAFold-All-Atom
|
|
python -m rf2aa.run_inference \
|
|
--config-path \$OLDPWD/config \
|
|
--config-name ${params.job_name} 2>&1 | tee \$OLDPWD/run.log
|
|
|
|
cd \$OLDPWD
|
|
"""
|
|
}
|
|
|
|
workflow {
|
|
RFAA_PREDICT(Channel.fromPath(params.protein_fasta))
|
|
}
|