From bf380f2768f8d64d0f9a5cc6f892be4e07638f24 Mon Sep 17 00:00:00 2001 From: Gabe Richman Date: Sun, 31 Dec 2023 14:00:43 -0800 Subject: [PATCH] corto up and running! --- Dockerfile | 8 +++- README.md | 4 +- main.nf | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ nextflow.config | 15 +++++++ test.nf | 22 ++++++++++ 5 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 main.nf create mode 100644 nextflow.config create mode 100644 test.nf diff --git a/Dockerfile b/Dockerfile index 28c5679..4b8ef2d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,13 +7,17 @@ RUN apt-get update && apt-get install -y \ git \ libcurl4-gnutls-dev \ libssl-dev \ - libxml2-dev + libxml2-dev \ + procps # Adding procps for the 'ps' command # Install R packages RUN Rscript -e "install.packages('remotes', dependencies=TRUE)" \ && Rscript -e "remotes::install_github('federicogiorgi/corto')" # Test commands to validate installation -RUN Rscript -e "library(corto); data('inmat'); print(head(inmat))" +RUN Rscript -e "library(corto)" + +# Clone the corto repository (optional, based on your needs) +RUN git clone https://github.com/federicogiorgi/corto.git CMD ["/bin/bash"] diff --git a/README.md b/README.md index 613240c..c802660 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,14 @@ Already a pro? Just edit this README.md and make it your own. Want to make it ea ``` cd existing_repo -git remote add origin https://gitlab.com/omic/next/registry/tools/corto.git +git remote add origin https://gitlab.com/omic/next/registry/tools/clei2block.git git branch -M master git push -uf origin master ``` ## Integrate with your tools -- [ ] [Set up project integrations](https://gitlab.com/omic/next/registry/tools/corto/-/settings/integrations) +- [ ] [Set up project integrations](https://gitlab.com/omic/next/registry/tools/clei2block/-/settings/integrations) ## Collaborate with your team diff --git a/main.nf b/main.nf new file mode 100644 index 0000000..6fef889 --- /dev/null +++ b/main.nf @@ -0,0 +1,105 @@ +nextflow.enable.dsl=2 + +process CORTO { + container "${params.container}" + containerOptions "${params.containerOptions}" + publishDir "${params.outdir}/${params.project_name}", mode: 'copy' + debug true + // maxForks 1 + stageInMode 'copy' + + input: + path gene_expression_matrix + path centroid_list + path cnv_data // This could be optional + + // Define output channels + output: + path "regulon.rda", emit: regulon + path "*.csv", emit: csv_regulon + path "*.log", optional: true, emit: logs // if you have log files + + script: + """ + #!/bin/bash + + # Create an R script + cat < corto_analysis.R + # Print the R version + print(R.version.string) + + # Load the corto library + library(corto) + + # Function to load data + loadData <- function(file_name, expected_var) { + load(file_name) + if (exists(expected_var)) { + data <- get(expected_var) + } else { + stop(paste("Object", expected_var, "not found in", file_name)) + } + return(data) + } + + # Load the input matrix + inmat <- loadData("${gene_expression_matrix}", "inmat") + print("Dimensions of inmat before any operation:") + print(dim(inmat)) + + # Load the centroids + centroids <- loadData("${centroid_list}", "centroids") + print("Length of centroids:") + print(length(centroids)) + + # Run corto with specified parameters + regulon <- corto(inmat, centroids=centroids, nbootstraps=10, p=1e-30, nthreads=2) + + # Save the regulon object for later use + save(regulon, file="regulon.rda") + + # Transform regulon into a data frame + regulon_to_df <- function(regulon) { + result_df <- data.frame(TF = character(), + Target = character(), + TFMode = numeric(), + Likelihood = numeric(), + stringsAsFactors = FALSE) + + for (tf in names(regulon)) { + tf_data <- regulon[[tf]] + if (is.null(tf_data\$tfmode) || is.null(tf_data\$likelihood)) next + + for (i in seq_along(tf_data\$tfmode)) { + tf_mode <- tf_data\$tfmode[[i]] + likelihood <- tf_data\$likelihood[[i]] + target_name <- names(tf_data\$tfmode)[i] + tf_df <- data.frame(TF = tf, + Target = target_name, + TFMode = tf_mode, + Likelihood = likelihood, + stringsAsFactors = FALSE) + result_df <- rbind(result_df, tf_df) + } + } + return(result_df) + } + + # Check if regulon is a list and transform it + if (is.list(regulon)) { + regulon_df <- regulon_to_df(regulon) + write.csv(regulon_df, file="regulon.csv", row.names=FALSE) + } else { + warning("Regulon object is not a list. Custom transformation needed.") + } + + EOF + + # Execute the R script + Rscript corto_analysis.R + + """ + + + +} diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..784b39b --- /dev/null +++ b/nextflow.config @@ -0,0 +1,15 @@ +manifest { + name = 'corto' + author = 'omic' + recurseSubmodules = true + homePage = 'https://gitlab.com/omic/next/registry/tools/corto' + description = '' + mainScript = 'main.nf' + nextflowVersion = '!>=21.04.3' + defaultBranch = 'master' +} + +docker { + enabled = true + temp = 'auto' +} diff --git a/test.nf b/test.nf new file mode 100644 index 0000000..0d72e6d --- /dev/null +++ b/test.nf @@ -0,0 +1,22 @@ +nextflow.enable.dsl=2 + +// GLOBAL FPSIM2 PARAMS +params.container = 'corto:latest' +params.containerOptions = '--gpus all --rm -v /mnt:/mnt' +params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs' +params.project_name = 'test' + +// // INPUTS +params.gene_expression_matrix = '/mnt/OmicNAS/private/old/gabe/corto/inputs/inmat.rda' +params.centroid_list = '/mnt/OmicNAS/private/old/gabe/corto/inputs/centroids.rda' +params.cnv_data = '/mnt/OmicNAS/private/old/gabe/corto/inputs/cnvmat.rda' + + +include {CORTO} from './main.nf' + +workflow { + gene_expression_matrix = Channel.fromPath(params.gene_expression_matrix) + centroid_list = Channel.fromPath(params.centroid_list) + cnv_data = Channel.fromPath(params.cnv_data) + CORTO(gene_expression_matrix, centroid_list, cnv_data) +}