- Add main.nf as WES entry point with PVC-based paths - Update nextflow.config with k8s/k8s_gpu profiles (eureka-pvc) - Update params.json defaults to /omic/eureka/digital-patients/ paths - Remove stageInMode 'copy' from corto and vcf2prot for k8s compat - Add reassemble.nf for one-time chunk reassembly on PVC
65 lines
1.8 KiB
Plaintext
65 lines
1.8 KiB
Plaintext
nextflow.enable.dsl=2
|
|
|
|
process VCF2PROT {
|
|
memory 2.GB
|
|
|
|
container "${params.container_vcf2prot}"
|
|
containerOptions "${params.containerOptions}"
|
|
// echo true
|
|
publishDir "${params.outdir}/vcf2prot", mode: 'copy'
|
|
maxForks 1
|
|
|
|
input:
|
|
path vcf
|
|
path vcf_filtered
|
|
|
|
output:
|
|
path "*.fasta"
|
|
|
|
script:
|
|
"""
|
|
#!/bin/bash
|
|
workdir=`pwd`
|
|
patient_name=\$(basename $vcf .vcf)
|
|
|
|
grep '^#' $vcf > work1.vcf
|
|
|
|
/opt/conda/envs/vcf2prot/bin/python3 -c "
|
|
import pickle
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
with open('$vcf_filtered', 'rb') as fp:
|
|
vcf_file = pickle.load(fp)
|
|
|
|
vcf_filtered=[]
|
|
for i in vcf_file:
|
|
vcf_filtered.extend(i[1])
|
|
|
|
vcf_filtered = pd.DataFrame(vcf_filtered)
|
|
|
|
#drop duplicates, it returns error, consequence of overlapping genes on genome
|
|
vcf_filtered = vcf_filtered.drop_duplicates()
|
|
|
|
vcf_filtered = vcf_filtered.replace({'chr':''}, regex=True)
|
|
|
|
vcf_filtered = vcf_filtered.sort_values(by=[0,1])
|
|
|
|
with open('work1.vcf', 'a') as f:
|
|
for line in np.array(vcf_filtered):
|
|
li = str(line).replace('[', '').replace(']', '').replace('\\'', '').replace(' ', '\\t').replace('\\n', '')
|
|
f.write(f'{li}\\n')
|
|
"
|
|
|
|
|
|
awk '{gsub(/^##contig=<ID=chr/,"##contig=<ID="); print}' work1.vcf > \$workdir/work_2.vcf
|
|
###remove non canonical chromososms
|
|
grep -v '[0-9]_\\|Y_\\|X_\\|Un_' \$workdir/work_2.vcf > \$workdir/work.vcf
|
|
###
|
|
bcftools csq -f /home/omic/Homo_sapiens.GRCh38.dna.toplevel.fa -g /home/omic/Homo_sapiens.GRCh38.112.gff3 \$workdir/work.vcf --phase a --ncsq 120 -O v -o \$workdir/annotated.vcf
|
|
/home/omic/vcf2prot/bins/Linux/vcf2prot -f \$workdir/annotated.vcf -r /home/omic/vcf2prot/MANE_transcipts_reference.fasta -v -g st -o \$workdir
|
|
|
|
mv *.fasta \${patient_name}_transcript_id_mutations.fasta
|
|
"""
|
|
}
|