Large reference/model files excluded from repo - to be staged to S3 or baked into Docker images.
66 lines
1.8 KiB
Plaintext
66 lines
1.8 KiB
Plaintext
nextflow.enable.dsl=2
|
|
|
|
process VCF2PROT {
|
|
memory 2.GB
|
|
|
|
container "${params.container_vcf2prot}"
|
|
containerOptions "${params.containerOptions}"
|
|
// echo true
|
|
publishDir "${params.outdir}/vcf2prot", mode: 'copy'
|
|
stageInMode 'copy'
|
|
maxForks 1
|
|
|
|
input:
|
|
path vcf
|
|
path vcf_filtered
|
|
|
|
output:
|
|
path "*.fasta"
|
|
|
|
script:
|
|
"""
|
|
#!/bin/bash
|
|
workdir=`pwd`
|
|
patient_name=\$(basename $vcf .vcf)
|
|
|
|
grep '^#' $vcf > work1.vcf
|
|
|
|
/opt/conda/envs/vcf2prot/bin/python3 -c "
|
|
import pickle
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
with open('$vcf_filtered', 'rb') as fp:
|
|
vcf_file = pickle.load(fp)
|
|
|
|
vcf_filtered=[]
|
|
for i in vcf_file:
|
|
vcf_filtered.extend(i[1])
|
|
|
|
vcf_filtered = pd.DataFrame(vcf_filtered)
|
|
|
|
#drop duplicates, it returns error, consequence of overlapping genes on genome
|
|
vcf_filtered = vcf_filtered.drop_duplicates()
|
|
|
|
vcf_filtered = vcf_filtered.replace({'chr':''}, regex=True)
|
|
|
|
vcf_filtered = vcf_filtered.sort_values(by=[0,1])
|
|
|
|
with open('work1.vcf', 'a') as f:
|
|
for line in np.array(vcf_filtered):
|
|
li = str(line).replace('[', '').replace(']', '').replace('\\'', '').replace(' ', '\\t').replace('\\n', '')
|
|
f.write(f'{li}\\n')
|
|
"
|
|
|
|
|
|
awk '{gsub(/^##contig=<ID=chr/,"##contig=<ID="); print}' work1.vcf > \$workdir/work_2.vcf
|
|
###remove non canonical chromososms
|
|
grep -v '[0-9]_\\|Y_\\|X_\\|Un_' \$workdir/work_2.vcf > \$workdir/work.vcf
|
|
###
|
|
bcftools csq -f /home/omic/Homo_sapiens.GRCh38.dna.toplevel.fa -g /home/omic/Homo_sapiens.GRCh38.112.gff3 \$workdir/work.vcf --phase a --ncsq 120 -O v -o \$workdir/annotated.vcf
|
|
/home/omic/vcf2prot/bins/Linux/vcf2prot -f \$workdir/annotated.vcf -r /home/omic/vcf2prot/MANE_transcipts_reference.fasta -v -g st -o \$workdir
|
|
|
|
mv *.fasta \${patient_name}_transcript_id_mutations.fasta
|
|
"""
|
|
}
|