diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/main.nf b/main.nf old mode 100644 new mode 100755 index 6fef889..b8c600e --- a/main.nf +++ b/main.nf @@ -1,7 +1,7 @@ nextflow.enable.dsl=2 process CORTO { - container "${params.container}" + container "${params.container_corto}" containerOptions "${params.containerOptions}" publishDir "${params.outdir}/${params.project_name}", mode: 'copy' debug true @@ -9,97 +9,36 @@ process CORTO { stageInMode 'copy' input: - path gene_expression_matrix - path centroid_list - path cnv_data // This could be optional + path TPM + path regulon + // path TPM_ref // Define output channels output: - path "regulon.rda", emit: regulon - path "*.csv", emit: csv_regulon - path "*.log", optional: true, emit: logs // if you have log files + path "*_metabolome.csv", emit: csv_metabol - script: + script: """ - #!/bin/bash - - # Create an R script - cat < corto_analysis.R - # Print the R version - print(R.version.string) - - # Load the corto library + #!/usr/bin/Rscript library(corto) + library(data.table) - # Function to load data - loadData <- function(file_name, expected_var) { - load(file_name) - if (exists(expected_var)) { - data <- get(expected_var) - } else { - stop(paste("Object", expected_var, "not found in", file_name)) - } - return(data) - } + TPM <- as.matrix(fread("$TPM"),rownames=1) + #TPM_ref <- as.matrix(fread("TPM_ref"),rownames=1) #dollar sign is missing - # Load the input matrix - inmat <- loadData("${gene_expression_matrix}", "inmat") - print("Dimensions of inmat before any operation:") - print(dim(inmat)) + #try normalization; dont use + #TPM <- scale(TPM) + #TPM_log2 = log2(TPM + 1) - # Load the centroids - centroids <- loadData("${centroid_list}", "centroids") - print("Length of centroids:") - print(length(centroids)) + load("$regulon") + + predicted<-mra(TPM, regulon=regulon) - # Run corto with specified parameters - regulon <- corto(inmat, centroids=centroids, nbootstraps=10, p=1e-30, nthreads=2) - - # Save the regulon object for later use - save(regulon, file="regulon.rda") - - # Transform regulon into a data frame - regulon_to_df <- function(regulon) { - result_df <- data.frame(TF = character(), - Target = character(), - TFMode = numeric(), - Likelihood = numeric(), - stringsAsFactors = FALSE) - - for (tf in names(regulon)) { - tf_data <- regulon[[tf]] - if (is.null(tf_data\$tfmode) || is.null(tf_data\$likelihood)) next - - for (i in seq_along(tf_data\$tfmode)) { - tf_mode <- tf_data\$tfmode[[i]] - likelihood <- tf_data\$likelihood[[i]] - target_name <- names(tf_data\$tfmode)[i] - tf_df <- data.frame(TF = tf, - Target = target_name, - TFMode = tf_mode, - Likelihood = likelihood, - stringsAsFactors = FALSE) - result_df <- rbind(result_df, tf_df) - } - } - return(result_df) - } - - # Check if regulon is a list and transform it - if (is.list(regulon)) { - regulon_df <- regulon_to_df(regulon) - write.csv(regulon_df, file="regulon.csv", row.names=FALSE) - } else { - warning("Regulon object is not a list. Custom transformation needed.") - } - - EOF - - # Execute the R script - Rscript corto_analysis.R + #use this if we want to compare patients and reference TPM, data formet output is not csv -> work on data saving if needed + #predicted<-mra(TPM, path TPM_ref, regulon=regulon) + name = strsplit(strsplit("$TPM", split = "/")[[1]][length(strsplit("$TPM", split = "/")[[1]])], split = "_TPM.csv")[[1]][1] + name = paste(name, "_metabolome.csv", sep="") + write.csv(predicted, name) """ - - - } diff --git a/nextflow.config b/nextflow.config old mode 100644 new mode 100755 diff --git a/params.json b/params.json new file mode 100644 index 0000000..14666c1 --- /dev/null +++ b/params.json @@ -0,0 +1,101 @@ +{ + "params": { + "TPM": { + "type": "file", + "description": "Path to TPM (Transcripts Per Million) CSV file", + "default": "/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv", + "required": true, + "pipeline_io": "input", + "var_name": "params.TPM", + "examples": [ + "/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv", + "/path/to/your/sample_TPM.csv" + ], + "pattern": ".*\\.csv$", + "enum": [], + "validation": {}, + "notes": "A CSV file containing TPM values with ENSG IDs as rows and samples as columns" + }, + "regulon": { + "type": "file", + "description": "Path to regulon RDA file", + "default": "/data/olamide/corto/corto_metabolite_prediction/regulon.rda", + "required": true, + "pipeline_io": "input", + "var_name": "params.regulon", + "examples": [ + "/data/olamide/corto/corto_metabolite_prediction/regulon.rda", + "/path/to/your/custom_regulon.rda" + ], + "pattern": ".*\\.rda$", + "enum": [], + "validation": {}, + "notes": "An R data file containing regulon information for the CORTO analysis" + }, + "outdir": { + "type": "folder", + "description": "Directory for CORTO analysis results", + "default": "/mnt/OmicNAS/private/old/gabe/corto/outputs", + "required": true, + "pipeline_io": "output", + "var_name": "params.outdir", + "examples": [ + "/mnt/OmicNAS/private/old/gabe/corto/outputs", + "/path/to/custom/output" + ], + "pattern": ".*", + "enum": [], + "validation": {}, + "notes": "Directory where metabolome prediction results will be stored" + }, + "project_name": { + "type": "string", + "description": "Project name for output directory organization", + "default": "test", + "required": false, + "pipeline_io": "parameter", + "var_name": "params.project_name", + "examples": [ + "test", + "patient_analysis", + "metabolite_prediction_run1" + ], + "pattern": "^[A-Za-z0-9_-]+$", + "enum": [], + "validation": {}, + "notes": "Name used to create a subdirectory within the output directory" + }, + "container_corto": { + "type": "string", + "description": "Docker container for CORTO", + "default": "corto:latest", + "required": false, + "pipeline_io": "parameter", + "var_name": "params.container_corto", + "examples": [ + "corto:latest", + "yourregistry/corto:v1.0" + ], + "pattern": ".*", + "enum": [], + "validation": {}, + "notes": "Docker container image to use for the CORTO analysis" + }, + "containerOptions": { + "type": "string", + "description": "Docker container options", + "default": "--gpus all --rm -v /mnt:/mnt", + "required": false, + "pipeline_io": "parameter", + "var_name": "params.containerOptions", + "examples": [ + "--gpus all --rm -v /mnt:/mnt", + "--rm -v /custom/path:/mnt" + ], + "pattern": ".*", + "enum": [], + "validation": {}, + "notes": "Options to pass to the Docker container runtime" + } + } +} diff --git a/test.nf b/test.nf old mode 100644 new mode 100755 index 0d72e6d..8f60f18 --- a/test.nf +++ b/test.nf @@ -1,22 +1,17 @@ nextflow.enable.dsl=2 // GLOBAL FPSIM2 PARAMS -params.container = 'corto:latest' +params.container_corto = 'corto:latest' params.containerOptions = '--gpus all --rm -v /mnt:/mnt' params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs' params.project_name = 'test' // // INPUTS -params.gene_expression_matrix = '/mnt/OmicNAS/private/old/gabe/corto/inputs/inmat.rda' -params.centroid_list = '/mnt/OmicNAS/private/old/gabe/corto/inputs/centroids.rda' -params.cnv_data = '/mnt/OmicNAS/private/old/gabe/corto/inputs/cnvmat.rda' - - +params.TPM = '/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv' +params.regulon = '/data/olamide/corto/corto_metabolite_prediction/regulon.rda' +//params.TPM_REFERENCE = '/data/olamide/corto/corto_metabolite_prediction/TPM_ENSG_NO_MUTATIONS.csv' include {CORTO} from './main.nf' workflow { - gene_expression_matrix = Channel.fromPath(params.gene_expression_matrix) - centroid_list = Channel.fromPath(params.centroid_list) - cnv_data = Channel.fromPath(params.cnv_data) - CORTO(gene_expression_matrix, centroid_list, cnv_data) + CORTO(params.TPM, params.regulon)// , params.TPM_REFERENCE) }