corto up and running!

This commit is contained in:
2023-12-31 14:00:43 -08:00
parent 0018149d36
commit bf380f2768
5 changed files with 150 additions and 4 deletions

View File

@@ -7,13 +7,17 @@ RUN apt-get update && apt-get install -y \
git \ git \
libcurl4-gnutls-dev \ libcurl4-gnutls-dev \
libssl-dev \ libssl-dev \
libxml2-dev libxml2-dev \
procps # Adding procps for the 'ps' command
# Install R packages # Install R packages
RUN Rscript -e "install.packages('remotes', dependencies=TRUE)" \ RUN Rscript -e "install.packages('remotes', dependencies=TRUE)" \
&& Rscript -e "remotes::install_github('federicogiorgi/corto')" && Rscript -e "remotes::install_github('federicogiorgi/corto')"
# Test commands to validate installation # Test commands to validate installation
RUN Rscript -e "library(corto); data('inmat'); print(head(inmat))" RUN Rscript -e "library(corto)"
# Clone the corto repository (optional, based on your needs)
RUN git clone https://github.com/federicogiorgi/corto.git
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -15,14 +15,14 @@ Already a pro? Just edit this README.md and make it your own. Want to make it ea
``` ```
cd existing_repo cd existing_repo
git remote add origin https://gitlab.com/omic/next/registry/tools/corto.git git remote add origin https://gitlab.com/omic/next/registry/tools/clei2block.git
git branch -M master git branch -M master
git push -uf origin master git push -uf origin master
``` ```
## Integrate with your tools ## Integrate with your tools
- [ ] [Set up project integrations](https://gitlab.com/omic/next/registry/tools/corto/-/settings/integrations) - [ ] [Set up project integrations](https://gitlab.com/omic/next/registry/tools/clei2block/-/settings/integrations)
## Collaborate with your team ## Collaborate with your team

105
main.nf Normal file
View File

@@ -0,0 +1,105 @@
nextflow.enable.dsl=2
process CORTO {
container "${params.container}"
containerOptions "${params.containerOptions}"
publishDir "${params.outdir}/${params.project_name}", mode: 'copy'
debug true
// maxForks 1
stageInMode 'copy'
input:
path gene_expression_matrix
path centroid_list
path cnv_data // This could be optional
// Define output channels
output:
path "regulon.rda", emit: regulon
path "*.csv", emit: csv_regulon
path "*.log", optional: true, emit: logs // if you have log files
script:
"""
#!/bin/bash
# Create an R script
cat <<EOF > corto_analysis.R
# Print the R version
print(R.version.string)
# Load the corto library
library(corto)
# Function to load data
loadData <- function(file_name, expected_var) {
load(file_name)
if (exists(expected_var)) {
data <- get(expected_var)
} else {
stop(paste("Object", expected_var, "not found in", file_name))
}
return(data)
}
# Load the input matrix
inmat <- loadData("${gene_expression_matrix}", "inmat")
print("Dimensions of inmat before any operation:")
print(dim(inmat))
# Load the centroids
centroids <- loadData("${centroid_list}", "centroids")
print("Length of centroids:")
print(length(centroids))
# Run corto with specified parameters
regulon <- corto(inmat, centroids=centroids, nbootstraps=10, p=1e-30, nthreads=2)
# Save the regulon object for later use
save(regulon, file="regulon.rda")
# Transform regulon into a data frame
regulon_to_df <- function(regulon) {
result_df <- data.frame(TF = character(),
Target = character(),
TFMode = numeric(),
Likelihood = numeric(),
stringsAsFactors = FALSE)
for (tf in names(regulon)) {
tf_data <- regulon[[tf]]
if (is.null(tf_data\$tfmode) || is.null(tf_data\$likelihood)) next
for (i in seq_along(tf_data\$tfmode)) {
tf_mode <- tf_data\$tfmode[[i]]
likelihood <- tf_data\$likelihood[[i]]
target_name <- names(tf_data\$tfmode)[i]
tf_df <- data.frame(TF = tf,
Target = target_name,
TFMode = tf_mode,
Likelihood = likelihood,
stringsAsFactors = FALSE)
result_df <- rbind(result_df, tf_df)
}
}
return(result_df)
}
# Check if regulon is a list and transform it
if (is.list(regulon)) {
regulon_df <- regulon_to_df(regulon)
write.csv(regulon_df, file="regulon.csv", row.names=FALSE)
} else {
warning("Regulon object is not a list. Custom transformation needed.")
}
EOF
# Execute the R script
Rscript corto_analysis.R
"""
}

15
nextflow.config Normal file
View File

@@ -0,0 +1,15 @@
manifest {
name = 'corto'
author = 'omic'
recurseSubmodules = true
homePage = 'https://gitlab.com/omic/next/registry/tools/corto'
description = ''
mainScript = 'main.nf'
nextflowVersion = '!>=21.04.3'
defaultBranch = 'master'
}
docker {
enabled = true
temp = 'auto'
}

22
test.nf Normal file
View File

@@ -0,0 +1,22 @@
nextflow.enable.dsl=2
// GLOBAL FPSIM2 PARAMS
params.container = 'corto:latest'
params.containerOptions = '--gpus all --rm -v /mnt:/mnt'
params.outdir = '/mnt/OmicNAS/private/old/gabe/corto/outputs'
params.project_name = 'test'
// // INPUTS
params.gene_expression_matrix = '/mnt/OmicNAS/private/old/gabe/corto/inputs/inmat.rda'
params.centroid_list = '/mnt/OmicNAS/private/old/gabe/corto/inputs/centroids.rda'
params.cnv_data = '/mnt/OmicNAS/private/old/gabe/corto/inputs/cnvmat.rda'
include {CORTO} from './main.nf'
workflow {
gene_expression_matrix = Channel.fromPath(params.gene_expression_matrix)
centroid_list = Channel.fromPath(params.centroid_list)
cnv_data = Channel.fromPath(params.cnv_data)
CORTO(gene_expression_matrix, centroid_list, cnv_data)
}