#!/usr/bin/env nextflow nextflow.enable.dsl=2 // Input parameters params.protein_fasta = 's3://omic/eureka/rosettafold-all-atom/input/protein.fasta' params.na_fasta = '' params.na_type = 'dna' params.sm_file = '' params.sm_type = 'sdf' params.covale_inputs = '' // Output parameters params.outdir = 's3://omic/eureka/rosettafold-all-atom/output' params.job_name = 'rfaa_prediction' // Model parameters params.max_cycle = 4 // Database parameters params.db_uniref30 = '/mnt/databases/UniRef30_2020_06' params.db_bfd = '/mnt/databases/bfd' params.db_pdb100 = '/mnt/databases/pdb100_2021Mar03' params.weights = '/mnt/databases/RFAA_paper_weights.pt' process RFAA_PREDICT { container 'harbor.cluster.omic.ai/omic/rosettafold-all-atom:latest' containerOptions '--rm --gpus all -v /mnt:/mnt' publishDir params.outdir, mode: 'copy' stageInMode 'copy' input: path protein_fasta output: path "${params.job_name}.pdb" path "${params.job_name}_aux.pt" path "run.log" script: def na_block = params.na_fasta ? """ na_inputs: B: fasta: ${params.na_fasta} input_type: "${params.na_type}" """ : "" def sm_block = params.sm_file ? """ sm_inputs: C: input: ${params.sm_file} input_type: "${params.sm_type}" """ : "" def covale_block = params.covale_inputs ? """ covale_inputs: "${params.covale_inputs}" """ : "" """ mkdir -p config # Create hydra config file cat > config/${params.job_name}.yaml << EOF defaults: - base job_name: "${params.job_name}" protein_inputs: A: fasta_file: \$PWD/${protein_fasta} ${na_block}${sm_block}${covale_block} loader_params: MAXCYCLE: ${params.max_cycle} EOF # Create base config with database paths cat > config/base.yaml << EOF output_path: "." database_params: sequencedb: "${params.db_pdb100}/pdb100_2021Mar03" hhdb: "${params.db_pdb100}/pdb100_2021Mar03" command: "/opt/RoseTTAFold-All-Atom/make_msa.sh" checkpoint_path: "${params.weights}" loader_params: MAXCYCLE: ${params.max_cycle} model_params: n_extra_block: 4 n_main_block: 32 n_ref_block: 4 d_msa: 256 d_msa_full: 64 d_pair: 128 d_templ: 64 n_head_msa: 8 n_head_pair: 4 n_head_templ: 4 d_hidden: 32 d_hidden_templ: 32 p_drop: 0.0 EOF # Set environment variables export DB_UR30="${params.db_uniref30}/UniRef30_2020_06" export DB_BFD="${params.db_bfd}/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" export BLASTMAT="/opt/RoseTTAFold-All-Atom/blast-2.2.26/data/" # Run RFAA cd /opt/RoseTTAFold-All-Atom python -m rf2aa.run_inference \ --config-path \$OLDPWD/config \ --config-name ${params.job_name} 2>&1 | tee \$OLDPWD/run.log cd \$OLDPWD """ } workflow { RFAA_PREDICT(Channel.fromPath(params.protein_fasta)) }