Initial commit: Chai-1 protein structure prediction pipeline for WES

- Nextflow pipeline using chai1 Docker image from Harbor
- S3-based input/output paths (s3://omic/eureka/chai-lab/)
- GPU-accelerated protein folding with MSA support

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-16 12:55:08 +01:00
commit f971fd0e21
26 changed files with 1289 additions and 0 deletions

45
main.nf Executable file
View File

@@ -0,0 +1,45 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
params.input_dir = 's3://omic/eureka/chai-lab/input'
params.outdir = 's3://omic/eureka/chai-lab/output'
params.use_msa = true
params.msa_server = 'https://api.colabfold.com'
params.num_samples = 5
process CHAI1 {
container 'harbor.cluster.omic.ai/omic/chai1:latest'
publishDir params.outdir, mode: 'copy'
stageInMode 'copy'
maxForks 1
input:
path fasta
output:
path "${fasta.simpleName.replace('.fasta', '')}", emit: output_dir
script:
"""
OUTPUT_DIR=\$(basename ${fasta} .fasta)
mkdir -p \$OUTPUT_DIR
# Construct MSA parameters
MSA_OPTIONS=""
if ${params.use_msa}; then
MSA_OPTIONS="--use-msa-server --msa-server-url ${params.msa_server}"
fi
# Run CHAI1
chai fold \\
\$MSA_OPTIONS \\
--num-diffn-samples ${params.num_samples} \\
${fasta} \\
\$OUTPUT_DIR
"""
}
workflow {
fasta_ch = Channel.fromPath(params.input_dir + '/*.fasta')
CHAI1(fasta_ch)
}