Update CORTO pipeline with params.json and workflow improvements

This commit is contained in:
2025-03-28 14:45:49 -07:00
parent bf380f2768
commit aaa43a4d54
6 changed files with 127 additions and 92 deletions

0
Dockerfile Normal file → Executable file
View File

0
README.md Normal file → Executable file
View File

101
main.nf Normal file → Executable file
View File

@@ -1,7 +1,7 @@
nextflow.enable.dsl=2 nextflow.enable.dsl=2
process CORTO { process CORTO {
container "${params.container}" container "${params.container_corto}"
containerOptions "${params.containerOptions}" containerOptions "${params.containerOptions}"
publishDir "${params.outdir}/${params.project_name}", mode: 'copy' publishDir "${params.outdir}/${params.project_name}", mode: 'copy'
debug true debug true
@@ -9,97 +9,36 @@ process CORTO {
stageInMode 'copy' stageInMode 'copy'
input: input:
path gene_expression_matrix path TPM
path centroid_list path regulon
path cnv_data // This could be optional // path TPM_ref
// Define output channels // Define output channels
output: output:
path "regulon.rda", emit: regulon path "*_metabolome.csv", emit: csv_metabol
path "*.csv", emit: csv_regulon
path "*.log", optional: true, emit: logs // if you have log files
script: script:
""" """
#!/bin/bash #!/usr/bin/Rscript
# Create an R script
cat <<EOF > corto_analysis.R
# Print the R version
print(R.version.string)
# Load the corto library
library(corto) library(corto)
library(data.table)
# Function to load data TPM <- as.matrix(fread("$TPM"),rownames=1)
loadData <- function(file_name, expected_var) { #TPM_ref <- as.matrix(fread("TPM_ref"),rownames=1) #dollar sign is missing
load(file_name)
if (exists(expected_var)) {
data <- get(expected_var)
} else {
stop(paste("Object", expected_var, "not found in", file_name))
}
return(data)
}
# Load the input matrix #try normalization; dont use
inmat <- loadData("${gene_expression_matrix}", "inmat") #TPM <- scale(TPM)
print("Dimensions of inmat before any operation:") #TPM_log2 = log2(TPM + 1)
print(dim(inmat))
# Load the centroids load("$regulon")
centroids <- loadData("${centroid_list}", "centroids")
print("Length of centroids:")
print(length(centroids))
# Run corto with specified parameters predicted<-mra(TPM, regulon=regulon)
regulon <- corto(inmat, centroids=centroids, nbootstraps=10, p=1e-30, nthreads=2)
# Save the regulon object for later use #use this if we want to compare patients and reference TPM, data formet output is not csv -> work on data saving if needed
save(regulon, file="regulon.rda") #predicted<-mra(TPM, path TPM_ref, regulon=regulon)
# Transform regulon into a data frame
regulon_to_df <- function(regulon) {
result_df <- data.frame(TF = character(),
Target = character(),
TFMode = numeric(),
Likelihood = numeric(),
stringsAsFactors = FALSE)
for (tf in names(regulon)) {
tf_data <- regulon[[tf]]
if (is.null(tf_data\$tfmode) || is.null(tf_data\$likelihood)) next
for (i in seq_along(tf_data\$tfmode)) {
tf_mode <- tf_data\$tfmode[[i]]
likelihood <- tf_data\$likelihood[[i]]
target_name <- names(tf_data\$tfmode)[i]
tf_df <- data.frame(TF = tf,
Target = target_name,
TFMode = tf_mode,
Likelihood = likelihood,
stringsAsFactors = FALSE)
result_df <- rbind(result_df, tf_df)
}
}
return(result_df)
}
# Check if regulon is a list and transform it
if (is.list(regulon)) {
regulon_df <- regulon_to_df(regulon)
write.csv(regulon_df, file="regulon.csv", row.names=FALSE)
} else {
warning("Regulon object is not a list. Custom transformation needed.")
}
EOF
# Execute the R script
Rscript corto_analysis.R
name = strsplit(strsplit("$TPM", split = "/")[[1]][length(strsplit("$TPM", split = "/")[[1]])], split = "_TPM.csv")[[1]][1]
name = paste(name, "_metabolome.csv", sep="")
write.csv(predicted, name)
""" """
} }

0
nextflow.config Normal file → Executable file
View File

101
params.json Normal file
View File

@@ -0,0 +1,101 @@
{
"params": {
"TPM": {
"type": "file",
"description": "Path to TPM (Transcripts Per Million) CSV file",
"default": "/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv",
"required": true,
"pipeline_io": "input",
"var_name": "params.TPM",
"examples": [
"/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv",
"/path/to/your/sample_TPM.csv"
],
"pattern": ".*\\.csv$",
"enum": [],
"validation": {},
"notes": "A CSV file containing TPM values with ENSG IDs as rows and samples as columns"
},
"regulon": {
"type": "file",
"description": "Path to regulon RDA file",
"default": "/data/olamide/corto/corto_metabolite_prediction/regulon.rda",
"required": true,
"pipeline_io": "input",
"var_name": "params.regulon",
"examples": [
"/data/olamide/corto/corto_metabolite_prediction/regulon.rda",
"/path/to/your/custom_regulon.rda"
],
"pattern": ".*\\.rda$",
"enum": [],
"validation": {},
"notes": "An R data file containing regulon information for the CORTO analysis"
},
"outdir": {
"type": "folder",
"description": "Directory for CORTO analysis results",
"default": "/mnt/OmicNAS/private/old/gabe/corto/outputs",
"required": true,
"pipeline_io": "output",
"var_name": "params.outdir",
"examples": [
"/mnt/OmicNAS/private/old/gabe/corto/outputs",
"/path/to/custom/output"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Directory where metabolome prediction results will be stored"
},
"project_name": {
"type": "string",
"description": "Project name for output directory organization",
"default": "test",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.project_name",
"examples": [
"test",
"patient_analysis",
"metabolite_prediction_run1"
],
"pattern": "^[A-Za-z0-9_-]+$",
"enum": [],
"validation": {},
"notes": "Name used to create a subdirectory within the output directory"
},
"container_corto": {
"type": "string",
"description": "Docker container for CORTO",
"default": "corto:latest",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.container_corto",
"examples": [
"corto:latest",
"yourregistry/corto:v1.0"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Docker container image to use for the CORTO analysis"
},
"containerOptions": {
"type": "string",
"description": "Docker container options",
"default": "--gpus all --rm -v /mnt:/mnt",
"required": false,
"pipeline_io": "parameter",
"var_name": "params.containerOptions",
"examples": [
"--gpus all --rm -v /mnt:/mnt",
"--rm -v /custom/path:/mnt"
],
"pattern": ".*",
"enum": [],
"validation": {},
"notes": "Options to pass to the Docker container runtime"
}
}
}

15
test.nf Normal file → Executable file
View File

@@ -1,22 +1,17 @@
nextflow.enable.dsl=2 nextflow.enable.dsl=2
// GLOBAL FPSIM2 PARAMS // GLOBAL FPSIM2 PARAMS
params.container = 'corto:latest' params.container_corto = 'corto:latest'
params.containerOptions = '--gpus all --rm -v /mnt:/mnt' params.containerOptions = '--gpus all --rm -v /mnt:/mnt'
params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs' params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs'
params.project_name = 'test' params.project_name = 'test'
// // INPUTS // // INPUTS
params.gene_expression_matrix = '/mnt/OmicNAS/private/old/gabe/corto/inputs/inmat.rda' params.TPM = '/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv'
params.centroid_list = '/mnt/OmicNAS/private/old/gabe/corto/inputs/centroids.rda' params.regulon = '/data/olamide/corto/corto_metabolite_prediction/regulon.rda'
params.cnv_data = '/mnt/OmicNAS/private/old/gabe/corto/inputs/cnvmat.rda' //params.TPM_REFERENCE = '/data/olamide/corto/corto_metabolite_prediction/TPM_ENSG_NO_MUTATIONS.csv'
include {CORTO} from './main.nf' include {CORTO} from './main.nf'
workflow { workflow {
gene_expression_matrix = Channel.fromPath(params.gene_expression_matrix) CORTO(params.TPM, params.regulon)// , params.TPM_REFERENCE)
centroid_list = Channel.fromPath(params.centroid_list)
cnv_data = Channel.fromPath(params.cnv_data)
CORTO(gene_expression_matrix, centroid_list, cnv_data)
} }