Initial commit: RoseTTAFold-All-Atom configured for Wes with Harbor images and s3:// paths

This commit is contained in:
2026-03-17 17:57:24 +01:00
commit 6eef3bb748
108 changed files with 28144 additions and 0 deletions

121
main.nf Normal file
View File

@@ -0,0 +1,121 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
// Input parameters
params.protein_fasta = 's3://omic/eureka/rosettafold-all-atom/input/protein.fasta'
params.na_fasta = ''
params.na_type = 'dna'
params.sm_file = ''
params.sm_type = 'sdf'
params.covale_inputs = ''
// Output parameters
params.outdir = 's3://omic/eureka/rosettafold-all-atom/output'
params.job_name = 'rfaa_prediction'
// Model parameters
params.max_cycle = 4
// Database parameters
params.db_uniref30 = '/mnt/databases/UniRef30_2020_06'
params.db_bfd = '/mnt/databases/bfd'
params.db_pdb100 = '/mnt/databases/pdb100_2021Mar03'
params.weights = '/mnt/databases/RFAA_paper_weights.pt'
process RFAA_PREDICT {
container 'harbor.cluster.omic.ai/omic/rosettafold-all-atom:latest'
containerOptions '--rm --gpus all -v /mnt:/mnt'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
input:
path protein_fasta
output:
path "${params.job_name}.pdb"
path "${params.job_name}_aux.pt"
path "run.log"
script:
def na_block = params.na_fasta ? """
na_inputs:
B:
fasta: ${params.na_fasta}
input_type: "${params.na_type}"
""" : ""
def sm_block = params.sm_file ? """
sm_inputs:
C:
input: ${params.sm_file}
input_type: "${params.sm_type}"
""" : ""
def covale_block = params.covale_inputs ? """
covale_inputs: "${params.covale_inputs}"
""" : ""
"""
mkdir -p config
# Create hydra config file
cat > config/${params.job_name}.yaml << EOF
defaults:
- base
job_name: "${params.job_name}"
protein_inputs:
A:
fasta_file: \$PWD/${protein_fasta}
${na_block}${sm_block}${covale_block}
loader_params:
MAXCYCLE: ${params.max_cycle}
EOF
# Create base config with database paths
cat > config/base.yaml << EOF
output_path: "."
database_params:
sequencedb: "${params.db_pdb100}/pdb100_2021Mar03"
hhdb: "${params.db_pdb100}/pdb100_2021Mar03"
command: "/opt/RoseTTAFold-All-Atom/make_msa.sh"
checkpoint_path: "${params.weights}"
loader_params:
MAXCYCLE: ${params.max_cycle}
model_params:
n_extra_block: 4
n_main_block: 32
n_ref_block: 4
d_msa: 256
d_msa_full: 64
d_pair: 128
d_templ: 64
n_head_msa: 8
n_head_pair: 4
n_head_templ: 4
d_hidden: 32
d_hidden_templ: 32
p_drop: 0.0
EOF
# Set environment variables
export DB_UR30="${params.db_uniref30}/UniRef30_2020_06"
export DB_BFD="${params.db_bfd}/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"
export BLASTMAT="/opt/RoseTTAFold-All-Atom/blast-2.2.26/data/"
# Run RFAA
cd /opt/RoseTTAFold-All-Atom
python -m rf2aa.run_inference \
--config-path \$OLDPWD/config \
--config-name ${params.job_name} 2>&1 | tee \$OLDPWD/run.log
cd \$OLDPWD
"""
}
workflow {
RFAA_PREDICT(Channel.fromPath(params.protein_fasta))
}