Merge branch 'single'

2026-03-25 15:13:17 +01:00
parent b408cfd4dd 4bbc9822af
commit b773e291d2
6 changed files with 109 additions and 109 deletions
--- a/0
+++ b/0
--- a/README.md
+++ b/README.md
--- a/main.nf
+++ b/main.nf
@@ -1,105 +1,42 @@
+#!/usr/bin/env nextflow
+
 nextflow.enable.dsl=2

+// Parameters
+params.TPM = '/omic/eureka/corto/20002_1289_female_patient_0_TPM.csv'
+params.regulon = '/omic/eureka/corto/regulon.rda'
+params.outdir = '/omic/eureka/corto/output'
+
 process CORTO {
-  container "${params.container}"
-  containerOptions "${params.containerOptions}"
-  publishDir "${params.outdir}/${params.project_name}", mode: 'copy'
+  container 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest'
+  publishDir params.outdir, mode: 'copy'
  debug true
-  // maxForks 1
-  stageInMode 'copy'

  input:
-    path gene_expression_matrix
-    path centroid_list
-    path cnv_data // This could be optional
+    path TPM
+    path regulon

-  // Define output channels
  output:
-    path "regulon.rda", emit: regulon
-    path "*.csv", emit: csv_regulon
-    path "*.log", optional: true, emit: logs // if you have log files
+    path "*_metabolome.csv", emit: csv_metabol

  script:
    """
-    #!/bin/bash
-
-    # Create an R script
-    cat <<EOF > corto_analysis.R
-    # Print the R version
-    print(R.version.string)
-
-    # Load the corto library
+    #!/usr/bin/Rscript
    library(corto)
+    library(data.table)

-    # Function to load data
-    loadData <- function(file_name, expected_var) {
-        load(file_name)
-        if (exists(expected_var)) {
-            data <- get(expected_var)
-        } else {
-            stop(paste("Object", expected_var, "not found in", file_name))
-        }
-        return(data)
-    }
+    TPM <- as.matrix(fread("$TPM"),rownames=1)

-    # Load the input matrix
-    inmat <- loadData("${gene_expression_matrix}", "inmat")
-    print("Dimensions of inmat before any operation:")
-    print(dim(inmat))
+    load("$regulon")

-    # Load the centroids
-    centroids <- loadData("${centroid_list}", "centroids")
-    print("Length of centroids:")
-    print(length(centroids))
-
-    # Run corto with specified parameters
-    regulon <- corto(inmat, centroids=centroids, nbootstraps=10, p=1e-30, nthreads=2)
-
-    # Save the regulon object for later use
-    save(regulon, file="regulon.rda")
-
-    # Transform regulon into a data frame
-    regulon_to_df <- function(regulon) {
-        result_df <- data.frame(TF = character(),
-                                Target = character(),
-                                TFMode = numeric(),
-                                Likelihood = numeric(),
-                                stringsAsFactors = FALSE)
-
-        for (tf in names(regulon)) {
-            tf_data <- regulon[[tf]]
-            if (is.null(tf_data\$tfmode) || is.null(tf_data\$likelihood)) next
-
-            for (i in seq_along(tf_data\$tfmode)) {
-                tf_mode <- tf_data\$tfmode[[i]]
-                likelihood <- tf_data\$likelihood[[i]]
-                target_name <- names(tf_data\$tfmode)[i]
-                tf_df <- data.frame(TF = tf,
-                                    Target = target_name,
-                                    TFMode = tf_mode,
-                                    Likelihood = likelihood,
-                                    stringsAsFactors = FALSE)
-                result_df <- rbind(result_df, tf_df)
-            }
-        }
-        return(result_df)
-    }
-
-    # Check if regulon is a list and transform it
-    if (is.list(regulon)) {
-        regulon_df <- regulon_to_df(regulon)
-        write.csv(regulon_df, file="regulon.csv", row.names=FALSE)
-    } else {
-        warning("Regulon object is not a list. Custom transformation needed.")
-    }
-
-    EOF
-
-    # Execute the R script
-    Rscript corto_analysis.R
+    predicted<-mra(TPM, regulon=regulon)

+    name = strsplit(strsplit("$TPM", split = "/")[[1]][length(strsplit("$TPM", split = "/")[[1]])],  split = "_TPM.csv")[[1]][1]
+    name = paste(name, "_metabolome.csv", sep="")
+    write.csv(predicted, name)
    """
-
-
-
+}
+
+workflow {
+  CORTO(Channel.of(file(params.TPM)), Channel.of(file(params.regulon)))
 }
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,15 +1,34 @@
 manifest {
    name            = 'corto'
    author          = 'omic'
-    recurseSubmodules   = true
-    homePage            = 'https://gitlab.com/omic/next/registry/tools/corto'
-    description         = ''
+    homePage        = 'https://trs-gitea.cluster.omic.ai/omic/corto'
+    description     = 'CORTO - Correlation Tool for gene regulatory network analysis'
    mainScript      = 'main.nf'
-    nextflowVersion     = '!>=21.04.3'
+    version         = '1.0.0'
    defaultBranch   = 'master'
 }

-docker {
+params {
+    TPM = null
+    regulon = null
+    outdir = null
+}
+
+profiles {
+    standard {
+        docker {
            enabled = true
            temp = 'auto'
+        }
+    }
+    k8s {
+        process {
+            container = 'harbor.cluster.omic.ai/omic/digital-patients/corto:latest'
+        }
+    }
+}
+
+process {
+    cpus = 1
+    memory = '4 GB'
 }
--- a/params.json
+++ b/params.json
@@ -0,0 +1,49 @@
+{
+    "params": {
+        "TPM": {
+            "type": "file",
+            "description": "Path to TPM (Transcripts Per Million) CSV file",
+            "default": "s3://omic/eureka/corto/20002_1289_female_patient_0_TPM.csv",
+            "required": true,
+            "pipeline_io": "input",
+            "var_name": "params.TPM",
+            "examples": [
+                "s3://omic/eureka/corto/20002_1289_female_patient_0_TPM.csv"
+            ],
+            "pattern": ".*\\.csv$",
+            "enum": [],
+            "validation": {},
+            "notes": "A CSV file containing TPM values with ENSG IDs as rows and samples as columns"
+        },
+        "regulon": {
+            "type": "file",
+            "description": "Path to regulon RDA file",
+            "default": "s3://omic/eureka/corto/regulon.rda",
+            "required": true,
+            "pipeline_io": "input",
+            "var_name": "params.regulon",
+            "examples": [
+                "s3://omic/eureka/corto/regulon.rda"
+            ],
+            "pattern": ".*\\.rda$",
+            "enum": [],
+            "validation": {},
+            "notes": "An R data file containing regulon information for the CORTO analysis"
+        },
+        "outdir": {
+            "type": "folder",
+            "description": "Directory for CORTO analysis results",
+            "default": "s3://omic/eureka/corto/output",
+            "required": true,
+            "pipeline_io": "output",
+            "var_name": "params.outdir",
+            "examples": [
+                "s3://omic/eureka/corto/output"
+            ],
+            "pattern": ".*",
+            "enum": [],
+            "validation": {},
+            "notes": "Directory where metabolome prediction results will be stored"
+        }
+    }
+}
--- a/test.nf
+++ b/test.nf
@@ -1,22 +1,17 @@
 nextflow.enable.dsl=2

 // GLOBAL FPSIM2 PARAMS
-params.container = 'corto:latest'
+params.container_corto = 'corto:latest'
 params.containerOptions = '--gpus all --rm -v /mnt:/mnt'
 params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs'
 params.project_name = 'test'

 // // INPUTS
-params.gene_expression_matrix = '/mnt/OmicNAS/private/old/gabe/corto/inputs/inmat.rda'
-params.centroid_list = '/mnt/OmicNAS/private/old/gabe/corto/inputs/centroids.rda'
-params.cnv_data = '/mnt/OmicNAS/private/old/gabe/corto/inputs/cnvmat.rda'
-
-
+params.TPM = '/data/olamide/corto/corto_metabolite_prediction/20002_1289_female_patient_0_TPM.csv'
+params.regulon = '/data/olamide/corto/corto_metabolite_prediction/regulon.rda'
+//params.TPM_REFERENCE = '/data/olamide/corto/corto_metabolite_prediction/TPM_ENSG_NO_MUTATIONS.csv'
 include {CORTO} from './main.nf'

 workflow {
-  gene_expression_matrix = Channel.fromPath(params.gene_expression_matrix)
-  centroid_list = Channel.fromPath(params.centroid_list)
-  cnv_data = Channel.fromPath(params.cnv_data)
-  CORTO(gene_expression_matrix, centroid_list, cnv_data)
+  CORTO(params.TPM, params.regulon)// , params.TPM_REFERENCE)
 }