diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/main.nf b/main.nf old mode 100644 new mode 100755 index 6fef889..4b66ccc --- a/main.nf +++ b/main.nf @@ -1,105 +1,42 @@ +#!/usr/bin/env nextflow + nextflow.enable.dsl=2 +// Parameters +params.TPM = '/omic/eureka/corto/20002_1289_female_patient_0_TPM.csv' +params.regulon = '/omic/eureka/corto/regulon.rda' +params.outdir = '/omic/eureka/corto/output' + process CORTO { - container "${params.container}" - containerOptions "${params.containerOptions}" - publishDir "${params.outdir}/${params.project_name}", mode: 'copy' + container 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest' + publishDir params.outdir, mode: 'copy' debug true - // maxForks 1 - stageInMode 'copy' input: - path gene_expression_matrix - path centroid_list - path cnv_data // This could be optional + path TPM + path regulon - // Define output channels output: - path "regulon.rda", emit: regulon - path "*.csv", emit: csv_regulon - path "*.log", optional: true, emit: logs // if you have log files + path "*_metabolome.csv", emit: csv_metabol - script: + script: """ - #!/bin/bash - - # Create an R script - cat < corto_analysis.R - # Print the R version - print(R.version.string) - - # Load the corto library + #!/usr/bin/Rscript library(corto) + library(data.table) - # Function to load data - loadData <- function(file_name, expected_var) { - load(file_name) - if (exists(expected_var)) { - data <- get(expected_var) - } else { - stop(paste("Object", expected_var, "not found in", file_name)) - } - return(data) - } + TPM <- as.matrix(fread("$TPM"),rownames=1) - # Load the input matrix - inmat <- loadData("${gene_expression_matrix}", "inmat") - print("Dimensions of inmat before any operation:") - print(dim(inmat)) + load("$regulon") - # Load the centroids - centroids <- loadData("${centroid_list}", "centroids") - print("Length of centroids:") - print(length(centroids)) - - # Run corto with specified parameters - regulon <- corto(inmat, centroids=centroids, nbootstraps=10, p=1e-30, nthreads=2) - - # Save the regulon object for later use - save(regulon, file="regulon.rda") - - # Transform regulon into a data frame - regulon_to_df <- function(regulon) { - result_df <- data.frame(TF = character(), - Target = character(), - TFMode = numeric(), - Likelihood = numeric(), - stringsAsFactors = FALSE) - - for (tf in names(regulon)) { - tf_data <- regulon[[tf]] - if (is.null(tf_data\$tfmode) || is.null(tf_data\$likelihood)) next - - for (i in seq_along(tf_data\$tfmode)) { - tf_mode <- tf_data\$tfmode[[i]] - likelihood <- tf_data\$likelihood[[i]] - target_name <- names(tf_data\$tfmode)[i] - tf_df <- data.frame(TF = tf, - Target = target_name, - TFMode = tf_mode, - Likelihood = likelihood, - stringsAsFactors = FALSE) - result_df <- rbind(result_df, tf_df) - } - } - return(result_df) - } - - # Check if regulon is a list and transform it - if (is.list(regulon)) { - regulon_df <- regulon_to_df(regulon) - write.csv(regulon_df, file="regulon.csv", row.names=FALSE) - } else { - warning("Regulon object is not a list. Custom transformation needed.") - } - - EOF - - # Execute the R script - Rscript corto_analysis.R + predicted<-mra(TPM, regulon=regulon) + name = strsplit(strsplit("$TPM", split = "/")[[1]][length(strsplit("$TPM", split = "/")[[1]])], split = "_TPM.csv")[[1]][1] + name = paste(name, "_metabolome.csv", sep="") + write.csv(predicted, name) """ - - - +} + +workflow { + CORTO(Channel.of(file(params.TPM)), Channel.of(file(params.regulon))) } diff --git a/nextflow.config b/nextflow.config old mode 100644 new mode 100755 index 784b39b..14b00da --- a/nextflow.config +++ b/nextflow.config @@ -1,15 +1,34 @@ manifest { - name = 'corto' - author = 'omic' - recurseSubmodules = true - homePage = 'https://gitlab.com/omic/next/registry/tools/corto' - description = '' - mainScript = 'main.nf' - nextflowVersion = '!>=21.04.3' - defaultBranch = 'master' + name = 'corto' + author = 'omic' + homePage = 'https://trs-gitea.cluster.omic.ai/omic/corto' + description = 'CORTO - Correlation Tool for gene regulatory network analysis' + mainScript = 'main.nf' + version = '1.0.0' + defaultBranch = 'master' } -docker { - enabled = true - temp = 'auto' +params { + TPM = null + regulon = null + outdir = null +} + +profiles { + standard { + docker { + enabled = true + temp = 'auto' + } + } + k8s { + process { + container = 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest' + } + } +} + +process { + cpus = 1 + memory = '4 GB' } diff --git a/params.json b/params.json new file mode 100644 index 0000000..5fc62c3 --- /dev/null +++ b/params.json @@ -0,0 +1,49 @@ +{ + "params": { + "TPM": { + "type": "file", + "description": "Path to TPM (Transcripts Per Million) CSV file", + "default": "s3://omic/eureka/corto/20002_1289_female_patient_0_TPM.csv", + "required": true, + "pipeline_io": "input", + "var_name": "params.TPM", + "examples": [ + "s3://omic/eureka/corto/20002_1289_female_patient_0_TPM.csv" + ], + "pattern": ".*\\.csv$", + "enum": [], + "validation": {}, + "notes": "A CSV file containing TPM values with ENSG IDs as rows and samples as columns" + }, + "regulon": { + "type": "file", + "description": "Path to regulon RDA file", + "default": "s3://omic/eureka/corto/regulon.rda", + "required": true, + "pipeline_io": "input", + "var_name": "params.regulon", + "examples": [ + "s3://omic/eureka/corto/regulon.rda" + ], + "pattern": ".*\\.rda$", + "enum": [], + "validation": {}, + "notes": "An R data file containing regulon information for the CORTO analysis" + }, + "outdir": { + "type": "folder", + "description": "Directory for CORTO analysis results", + "default": "s3://omic/eureka/corto/output", + "required": true, + "pipeline_io": "output", + "var_name": "params.outdir", + "examples": [ + "s3://omic/eureka/corto/output" + ], + "pattern": ".*", + "enum": [], + "validation": {}, + "notes": "Directory where metabolome prediction results will be stored" + } + } +} diff --git a/test.nf b/test.nf old mode 100644 new mode 100755 index 0d72e6d..8f60f18 --- a/test.nf +++ b/test.nf @@ -1,22 +1,17 @@ nextflow.enable.dsl=2 // GLOBAL FPSIM2 PARAMS -params.container = 'corto:latest' +params.container_corto = 'corto:latest' params.containerOptions = '--gpus all --rm -v /mnt:/mnt' params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs' params.project_name = 'test' // // INPUTS -params.gene_expression_matrix = '/mnt/OmicNAS/private/old/gabe/corto/inputs/inmat.rda' -params.centroid_list = '/mnt/OmicNAS/private/old/gabe/corto/inputs/centroids.rda' -params.cnv_data = '/mnt/OmicNAS/private/old/gabe/corto/inputs/cnvmat.rda' - - +params.TPM = '/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv' +params.regulon = '/data/olamide/corto/corto_metabolite_prediction/regulon.rda' +//params.TPM_REFERENCE = '/data/olamide/corto/corto_metabolite_prediction/TPM_ENSG_NO_MUTATIONS.csv' include {CORTO} from './main.nf' workflow { - gene_expression_matrix = Channel.fromPath(params.gene_expression_matrix) - centroid_list = Channel.fromPath(params.centroid_list) - cnv_data = Channel.fromPath(params.cnv_data) - CORTO(gene_expression_matrix, centroid_list, cnv_data) + CORTO(params.TPM, params.regulon)// , params.TPM_REFERENCE) }