Merge branch 'single'
This commit is contained in:
0
Dockerfile
Normal file → Executable file
0
Dockerfile
Normal file → Executable file
113
main.nf
Normal file → Executable file
113
main.nf
Normal file → Executable file
@@ -1,105 +1,42 @@
|
|||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
nextflow.enable.dsl=2
|
nextflow.enable.dsl=2
|
||||||
|
|
||||||
|
// Parameters
|
||||||
|
params.TPM = '/omic/eureka/corto/20002_1289_female_patient_0_TPM.csv'
|
||||||
|
params.regulon = '/omic/eureka/corto/regulon.rda'
|
||||||
|
params.outdir = '/omic/eureka/corto/output'
|
||||||
|
|
||||||
process CORTO {
|
process CORTO {
|
||||||
container "${params.container}"
|
container 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest'
|
||||||
containerOptions "${params.containerOptions}"
|
publishDir params.outdir, mode: 'copy'
|
||||||
publishDir "${params.outdir}/${params.project_name}", mode: 'copy'
|
|
||||||
debug true
|
debug true
|
||||||
// maxForks 1
|
|
||||||
stageInMode 'copy'
|
|
||||||
|
|
||||||
input:
|
input:
|
||||||
path gene_expression_matrix
|
path TPM
|
||||||
path centroid_list
|
path regulon
|
||||||
path cnv_data // This could be optional
|
|
||||||
|
|
||||||
// Define output channels
|
|
||||||
output:
|
output:
|
||||||
path "regulon.rda", emit: regulon
|
path "*_metabolome.csv", emit: csv_metabol
|
||||||
path "*.csv", emit: csv_regulon
|
|
||||||
path "*.log", optional: true, emit: logs // if you have log files
|
|
||||||
|
|
||||||
script:
|
script:
|
||||||
"""
|
"""
|
||||||
#!/bin/bash
|
#!/usr/bin/Rscript
|
||||||
|
|
||||||
# Create an R script
|
|
||||||
cat <<EOF > corto_analysis.R
|
|
||||||
# Print the R version
|
|
||||||
print(R.version.string)
|
|
||||||
|
|
||||||
# Load the corto library
|
|
||||||
library(corto)
|
library(corto)
|
||||||
|
library(data.table)
|
||||||
|
|
||||||
# Function to load data
|
TPM <- as.matrix(fread("$TPM"),rownames=1)
|
||||||
loadData <- function(file_name, expected_var) {
|
|
||||||
load(file_name)
|
|
||||||
if (exists(expected_var)) {
|
|
||||||
data <- get(expected_var)
|
|
||||||
} else {
|
|
||||||
stop(paste("Object", expected_var, "not found in", file_name))
|
|
||||||
}
|
|
||||||
return(data)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Load the input matrix
|
load("$regulon")
|
||||||
inmat <- loadData("${gene_expression_matrix}", "inmat")
|
|
||||||
print("Dimensions of inmat before any operation:")
|
|
||||||
print(dim(inmat))
|
|
||||||
|
|
||||||
# Load the centroids
|
predicted<-mra(TPM, regulon=regulon)
|
||||||
centroids <- loadData("${centroid_list}", "centroids")
|
|
||||||
print("Length of centroids:")
|
|
||||||
print(length(centroids))
|
|
||||||
|
|
||||||
# Run corto with specified parameters
|
|
||||||
regulon <- corto(inmat, centroids=centroids, nbootstraps=10, p=1e-30, nthreads=2)
|
|
||||||
|
|
||||||
# Save the regulon object for later use
|
|
||||||
save(regulon, file="regulon.rda")
|
|
||||||
|
|
||||||
# Transform regulon into a data frame
|
|
||||||
regulon_to_df <- function(regulon) {
|
|
||||||
result_df <- data.frame(TF = character(),
|
|
||||||
Target = character(),
|
|
||||||
TFMode = numeric(),
|
|
||||||
Likelihood = numeric(),
|
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
|
|
||||||
for (tf in names(regulon)) {
|
|
||||||
tf_data <- regulon[[tf]]
|
|
||||||
if (is.null(tf_data\$tfmode) || is.null(tf_data\$likelihood)) next
|
|
||||||
|
|
||||||
for (i in seq_along(tf_data\$tfmode)) {
|
|
||||||
tf_mode <- tf_data\$tfmode[[i]]
|
|
||||||
likelihood <- tf_data\$likelihood[[i]]
|
|
||||||
target_name <- names(tf_data\$tfmode)[i]
|
|
||||||
tf_df <- data.frame(TF = tf,
|
|
||||||
Target = target_name,
|
|
||||||
TFMode = tf_mode,
|
|
||||||
Likelihood = likelihood,
|
|
||||||
stringsAsFactors = FALSE)
|
|
||||||
result_df <- rbind(result_df, tf_df)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return(result_df)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check if regulon is a list and transform it
|
|
||||||
if (is.list(regulon)) {
|
|
||||||
regulon_df <- regulon_to_df(regulon)
|
|
||||||
write.csv(regulon_df, file="regulon.csv", row.names=FALSE)
|
|
||||||
} else {
|
|
||||||
warning("Regulon object is not a list. Custom transformation needed.")
|
|
||||||
}
|
|
||||||
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# Execute the R script
|
|
||||||
Rscript corto_analysis.R
|
|
||||||
|
|
||||||
|
name = strsplit(strsplit("$TPM", split = "/")[[1]][length(strsplit("$TPM", split = "/")[[1]])], split = "_TPM.csv")[[1]][1]
|
||||||
|
name = paste(name, "_metabolome.csv", sep="")
|
||||||
|
write.csv(predicted, name)
|
||||||
"""
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow {
|
||||||
|
CORTO(Channel.of(file(params.TPM)), Channel.of(file(params.regulon)))
|
||||||
}
|
}
|
||||||
|
|||||||
41
nextflow.config
Normal file → Executable file
41
nextflow.config
Normal file → Executable file
@@ -1,15 +1,34 @@
|
|||||||
manifest {
|
manifest {
|
||||||
name = 'corto'
|
name = 'corto'
|
||||||
author = 'omic'
|
author = 'omic'
|
||||||
recurseSubmodules = true
|
homePage = 'https://trs-gitea.cluster.omic.ai/omic/corto'
|
||||||
homePage = 'https://gitlab.com/omic/next/registry/tools/corto'
|
description = 'CORTO - Correlation Tool for gene regulatory network analysis'
|
||||||
description = ''
|
mainScript = 'main.nf'
|
||||||
mainScript = 'main.nf'
|
version = '1.0.0'
|
||||||
nextflowVersion = '!>=21.04.3'
|
defaultBranch = 'master'
|
||||||
defaultBranch = 'master'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
docker {
|
params {
|
||||||
enabled = true
|
TPM = null
|
||||||
temp = 'auto'
|
regulon = null
|
||||||
|
outdir = null
|
||||||
|
}
|
||||||
|
|
||||||
|
profiles {
|
||||||
|
standard {
|
||||||
|
docker {
|
||||||
|
enabled = true
|
||||||
|
temp = 'auto'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s {
|
||||||
|
process {
|
||||||
|
container = 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process {
|
||||||
|
cpus = 1
|
||||||
|
memory = '4 GB'
|
||||||
}
|
}
|
||||||
|
|||||||
49
params.json
Normal file
49
params.json
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
{
|
||||||
|
"params": {
|
||||||
|
"TPM": {
|
||||||
|
"type": "file",
|
||||||
|
"description": "Path to TPM (Transcripts Per Million) CSV file",
|
||||||
|
"default": "s3://omic/eureka/corto/20002_1289_female_patient_0_TPM.csv",
|
||||||
|
"required": true,
|
||||||
|
"pipeline_io": "input",
|
||||||
|
"var_name": "params.TPM",
|
||||||
|
"examples": [
|
||||||
|
"s3://omic/eureka/corto/20002_1289_female_patient_0_TPM.csv"
|
||||||
|
],
|
||||||
|
"pattern": ".*\\.csv$",
|
||||||
|
"enum": [],
|
||||||
|
"validation": {},
|
||||||
|
"notes": "A CSV file containing TPM values with ENSG IDs as rows and samples as columns"
|
||||||
|
},
|
||||||
|
"regulon": {
|
||||||
|
"type": "file",
|
||||||
|
"description": "Path to regulon RDA file",
|
||||||
|
"default": "s3://omic/eureka/corto/regulon.rda",
|
||||||
|
"required": true,
|
||||||
|
"pipeline_io": "input",
|
||||||
|
"var_name": "params.regulon",
|
||||||
|
"examples": [
|
||||||
|
"s3://omic/eureka/corto/regulon.rda"
|
||||||
|
],
|
||||||
|
"pattern": ".*\\.rda$",
|
||||||
|
"enum": [],
|
||||||
|
"validation": {},
|
||||||
|
"notes": "An R data file containing regulon information for the CORTO analysis"
|
||||||
|
},
|
||||||
|
"outdir": {
|
||||||
|
"type": "folder",
|
||||||
|
"description": "Directory for CORTO analysis results",
|
||||||
|
"default": "s3://omic/eureka/corto/output",
|
||||||
|
"required": true,
|
||||||
|
"pipeline_io": "output",
|
||||||
|
"var_name": "params.outdir",
|
||||||
|
"examples": [
|
||||||
|
"s3://omic/eureka/corto/output"
|
||||||
|
],
|
||||||
|
"pattern": ".*",
|
||||||
|
"enum": [],
|
||||||
|
"validation": {},
|
||||||
|
"notes": "Directory where metabolome prediction results will be stored"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
15
test.nf
Normal file → Executable file
15
test.nf
Normal file → Executable file
@@ -1,22 +1,17 @@
|
|||||||
nextflow.enable.dsl=2
|
nextflow.enable.dsl=2
|
||||||
|
|
||||||
// GLOBAL FPSIM2 PARAMS
|
// GLOBAL FPSIM2 PARAMS
|
||||||
params.container = 'corto:latest'
|
params.container_corto = 'corto:latest'
|
||||||
params.containerOptions = '--gpus all --rm -v /mnt:/mnt'
|
params.containerOptions = '--gpus all --rm -v /mnt:/mnt'
|
||||||
params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs'
|
params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs'
|
||||||
params.project_name = 'test'
|
params.project_name = 'test'
|
||||||
|
|
||||||
// // INPUTS
|
// // INPUTS
|
||||||
params.gene_expression_matrix = '/mnt/OmicNAS/private/old/gabe/corto/inputs/inmat.rda'
|
params.TPM = '/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv'
|
||||||
params.centroid_list = '/mnt/OmicNAS/private/old/gabe/corto/inputs/centroids.rda'
|
params.regulon = '/data/olamide/corto/corto_metabolite_prediction/regulon.rda'
|
||||||
params.cnv_data = '/mnt/OmicNAS/private/old/gabe/corto/inputs/cnvmat.rda'
|
//params.TPM_REFERENCE = '/data/olamide/corto/corto_metabolite_prediction/TPM_ENSG_NO_MUTATIONS.csv'
|
||||||
|
|
||||||
|
|
||||||
include {CORTO} from './main.nf'
|
include {CORTO} from './main.nf'
|
||||||
|
|
||||||
workflow {
|
workflow {
|
||||||
gene_expression_matrix = Channel.fromPath(params.gene_expression_matrix)
|
CORTO(params.TPM, params.regulon)// , params.TPM_REFERENCE)
|
||||||
centroid_list = Channel.fromPath(params.centroid_list)
|
|
||||||
cnv_data = Channel.fromPath(params.cnv_data)
|
|
||||||
CORTO(gene_expression_matrix, centroid_list, cnv_data)
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user