Initial commit: RoseTTAFold-All-Atom configured for Wes with Harbor images and s3:// paths
This commit is contained in:
121
main.nf
Normal file
121
main.nf
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl=2
|
||||
|
||||
// Input parameters
|
||||
params.protein_fasta = 's3://omic/eureka/rosettafold-all-atom/input/protein.fasta'
|
||||
params.na_fasta = ''
|
||||
params.na_type = 'dna'
|
||||
params.sm_file = ''
|
||||
params.sm_type = 'sdf'
|
||||
params.covale_inputs = ''
|
||||
|
||||
// Output parameters
|
||||
params.outdir = 's3://omic/eureka/rosettafold-all-atom/output'
|
||||
params.job_name = 'rfaa_prediction'
|
||||
|
||||
// Model parameters
|
||||
params.max_cycle = 4
|
||||
|
||||
// Database parameters
|
||||
params.db_uniref30 = '/mnt/databases/UniRef30_2020_06'
|
||||
params.db_bfd = '/mnt/databases/bfd'
|
||||
params.db_pdb100 = '/mnt/databases/pdb100_2021Mar03'
|
||||
params.weights = '/mnt/databases/RFAA_paper_weights.pt'
|
||||
|
||||
process RFAA_PREDICT {
|
||||
container 'harbor.cluster.omic.ai/omic/rosettafold-all-atom:latest'
|
||||
containerOptions '--rm --gpus all -v /mnt:/mnt'
|
||||
publishDir params.outdir, mode: 'copy'
|
||||
stageInMode 'copy'
|
||||
|
||||
input:
|
||||
path protein_fasta
|
||||
|
||||
output:
|
||||
path "${params.job_name}.pdb"
|
||||
path "${params.job_name}_aux.pt"
|
||||
path "run.log"
|
||||
|
||||
script:
|
||||
def na_block = params.na_fasta ? """
|
||||
na_inputs:
|
||||
B:
|
||||
fasta: ${params.na_fasta}
|
||||
input_type: "${params.na_type}"
|
||||
""" : ""
|
||||
def sm_block = params.sm_file ? """
|
||||
sm_inputs:
|
||||
C:
|
||||
input: ${params.sm_file}
|
||||
input_type: "${params.sm_type}"
|
||||
""" : ""
|
||||
def covale_block = params.covale_inputs ? """
|
||||
covale_inputs: "${params.covale_inputs}"
|
||||
""" : ""
|
||||
"""
|
||||
mkdir -p config
|
||||
|
||||
# Create hydra config file
|
||||
cat > config/${params.job_name}.yaml << EOF
|
||||
defaults:
|
||||
- base
|
||||
|
||||
job_name: "${params.job_name}"
|
||||
|
||||
protein_inputs:
|
||||
A:
|
||||
fasta_file: \$PWD/${protein_fasta}
|
||||
${na_block}${sm_block}${covale_block}
|
||||
loader_params:
|
||||
MAXCYCLE: ${params.max_cycle}
|
||||
EOF
|
||||
|
||||
# Create base config with database paths
|
||||
cat > config/base.yaml << EOF
|
||||
output_path: "."
|
||||
|
||||
database_params:
|
||||
sequencedb: "${params.db_pdb100}/pdb100_2021Mar03"
|
||||
hhdb: "${params.db_pdb100}/pdb100_2021Mar03"
|
||||
command: "/opt/RoseTTAFold-All-Atom/make_msa.sh"
|
||||
|
||||
checkpoint_path: "${params.weights}"
|
||||
|
||||
loader_params:
|
||||
MAXCYCLE: ${params.max_cycle}
|
||||
|
||||
model_params:
|
||||
n_extra_block: 4
|
||||
n_main_block: 32
|
||||
n_ref_block: 4
|
||||
d_msa: 256
|
||||
d_msa_full: 64
|
||||
d_pair: 128
|
||||
d_templ: 64
|
||||
n_head_msa: 8
|
||||
n_head_pair: 4
|
||||
n_head_templ: 4
|
||||
d_hidden: 32
|
||||
d_hidden_templ: 32
|
||||
p_drop: 0.0
|
||||
EOF
|
||||
|
||||
# Set environment variables
|
||||
export DB_UR30="${params.db_uniref30}/UniRef30_2020_06"
|
||||
export DB_BFD="${params.db_bfd}/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"
|
||||
export BLASTMAT="/opt/RoseTTAFold-All-Atom/blast-2.2.26/data/"
|
||||
|
||||
# Run RFAA
|
||||
cd /opt/RoseTTAFold-All-Atom
|
||||
python -m rf2aa.run_inference \
|
||||
--config-path \$OLDPWD/config \
|
||||
--config-name ${params.job_name} 2>&1 | tee \$OLDPWD/run.log
|
||||
|
||||
cd \$OLDPWD
|
||||
"""
|
||||
}
|
||||
|
||||
workflow {
|
||||
RFAA_PREDICT(Channel.fromPath(params.protein_fasta))
|
||||
}
|
||||
Reference in New Issue
Block a user