diff --git a/conf/incliva.config b/conf/incliva.config index 6c2a85a5a..0a0912af3 100644 --- a/conf/incliva.config +++ b/conf/incliva.config @@ -3,6 +3,13 @@ params { config_profile_description = 'INCLIVA Health Research Institute profile for nf-core.' config_profile_contact = 'Sheila Zúñiga Trejos - bioinfo@incliva.es' config_profile_url = 'https://www.incliva.es/en/services/platforms/bioinformatics-unit/' + hostname = 'hostname'.execute().text.trim() ?: 'unknown' + max_memory = params.hostname == 'vlinuxcervantes3srv' ? 60.GB + : params.hostname == 'vlinuxcervantes4srv' ? 120.GB + : System.err.println("\nERROR: unknown machine. Update incliva.config on nf-core/configs if you are working on another host.\n") + max_cpus = params.hostname == 'vlinuxcervantes3srv' ? 15 + : params.hostname == 'vlinuxcervantes4srv' ? 19 + : System.err.println("\nERROR: unknown machine. Update incliva.config on nf-core/configs if you are working on another host.\n") warning_message = { System.out.println("WARNING: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") @@ -19,53 +26,9 @@ params { }.call() } -// Function to get hostname -def getHostname() { - def process = 'hostname'.execute() - process.waitFor() - return process.text.trim() -} - -// Function to set singularity path according to which host nextflow is running on - -def hostname = { getHostname() } - -if (hostname == 'vlinuxcervantes3srv') { - System.out.println("\nINFO: working on ${hostname}\n") - - // Resources details - params.max_memory = 60.GB - params.max_cpus = 15 - singularity.cacheDir = "/nfs/home/software/singularity/nf_cacheDir" - - process { - resourceLimits = [ - memory: 60.GB, - cpus: 15, - ] - } - -} else if (hostname == 'vlinuxcervantes4srv') { - System.out.println("\nINFO: working on ${hostname}.\n") - - // Resources details - params.max_memory = 120.GB - params.max_cpus = 19 - singularity.cacheDir = "/nfs/home/software/singularity/nf_cacheDir" - - process { - resourceLimits = [ - memory: 120.GB, - cpus: 19, - ] - } - -} else { - System.err.println("\nERROR: unknown machine. Update incliva.config on nf-core/configs if you are working on another host.\n") -} - // Singularity details singularity { + cacheDir = "/nfs/home/software/singularity/nf_cacheDir" enabled = true autoMounts = true } @@ -74,4 +37,8 @@ singularity { process { executor = 'local' maxRetries = 2 + resourceLimits = params.max_memory && params.max_cpus ? [ + memory: params.max_memory, + cpus: params.max_cpus + ] : System.err.println("\nERROR: unknown machine. Update incliva.config on nf-core/configs if you are working on another host.\n") } diff --git a/conf/kaust.config b/conf/kaust.config index ba84b531c..bd0c39128 100755 --- a/conf/kaust.config +++ b/conf/kaust.config @@ -8,7 +8,7 @@ params { } // Load genome resources and assets hosted by the Bioinformatics team on IBEX cluster -includeConfig 'https://raw.githubusercontent.com/bioinfo-kaust/references/refs/heads/main/configs/genomes.config' +includeConfig (!System.getenv('NXF_OFFLINE') ? "https://raw.githubusercontent.com/bioinfo-kaust/references/refs/heads/main/configs/genomes.config" : "/dev/null") singularity { enabled = true @@ -17,7 +17,7 @@ singularity { // Use existing images from the centralized library, if available libraryDir = "/biocorelab/BIX/resources/singularity/images/" // Download images that are missing from the library to user space - cacheDir = "/ibex/user/$USER/.singularity/nf_images/" + cacheDir = "/ibex/user/${System.getenv('USER')}/.singularity/nf_images/" } process { diff --git a/conf/nci_gadi.config b/conf/nci_gadi.config index bf3e1b08d..1b09d241b 100644 --- a/conf/nci_gadi.config +++ b/conf/nci_gadi.config @@ -4,6 +4,7 @@ params { config_profile_contact = 'Georgie Samaha (@georgiesamaha), Matthew Downton (@mattdton)' config_profile_url = 'https://opus.nci.org.au/display/Help/Gadi+User+Guide' project = System.getenv("PROJECT") + trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') } // Enable use of Singularity to run containers @@ -34,10 +35,9 @@ process { } // Write custom trace file with outputs required for SU calculation -def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') trace { enabled = true overwrite = false - file = "./gadi-nf-core-trace-${trace_timestamp}.txt" + file = "./gadi-nf-core-trace-${params.trace_timestamp}.txt" fields = 'name,status,exit,duration,realtime,cpus,%cpu,memory,%mem,rss' } diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config index d96b1306e..b9ca79489 100644 --- a/conf/pipeline/eager/eva.config +++ b/conf/pipeline/eager/eva.config @@ -1,5 +1,10 @@ // Profile config names for nf-core/configs +validation.ignoreParams = [ + 'eager_eva_check_max_func' +] + + params { // Specific nf-core/configs params config_profile_contact = 'James Fellows Yates (@jfy133)' @@ -21,44 +26,44 @@ process { clusterOptions = { "-S /bin/bash -V -j y -o output.log -l h_vmem=${task.memory.toGiga()}G" } withLabel: sc_tiny { - cpus = { check_max(1, 'cpus') } - memory = { check_max(1.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(1.GB * task.attempt, 'memory') } time = '365.d' } withLabel: sc_small { - cpus = { check_max(1, 'cpus') } - memory = { check_max(4.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(4.GB * task.attempt, 'memory') } time = '365.d' } withLabel: sc_medium { - cpus = { check_max(1, 'cpus') } - memory = { check_max(8.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(8.GB * task.attempt, 'memory') } time = '365.d' } withLabel: mc_small { - cpus = { check_max(2, 'cpus') } - memory = { check_max(4.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(2, 'cpus') } + memory = { params.eager_eva_check_max_func(4.GB * task.attempt, 'memory') } time = '365.d' } withLabel: mc_medium { - cpus = { check_max(4, 'cpus') } - memory = { check_max(8.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(4, 'cpus') } + memory = { params.eager_eva_check_max_func(8.GB * task.attempt, 'memory') } time = '365.d' } withLabel: mc_large { - cpus = { check_max(8, 'cpus') } - memory = { check_max(16.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(8, 'cpus') } + memory = { params.eager_eva_check_max_func(16.GB * task.attempt, 'memory') } time = '365.d' } withLabel: mc_huge { - cpus = { check_max(32, 'cpus') } - memory = { check_max(256.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(32, 'cpus') } + memory = { params.eager_eva_check_max_func(256.GB * task.attempt, 'memory') } time = '365.d' } @@ -115,7 +120,7 @@ process { withName: markduplicates { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" } - memory = { check_max(20.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(20.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } @@ -129,7 +134,7 @@ process { withName: additional_library_merge { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" } - memory = { check_max(4.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(4.GB * task.attempt, 'memory') } } withName: metagenomic_complexity_filter { @@ -138,8 +143,8 @@ process { withName: malt { clusterOptions = { "-S /bin/bash -V -l h_vmem=1000G" } - cpus = { check_max(32, 'cpus') } - memory = { check_max(955.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(32, 'cpus') } + memory = { params.eager_eva_check_max_func(955.GB * task.attempt, 'memory') } } withName: maltextract { @@ -169,7 +174,7 @@ process { withName: damageprofiler { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" } - memory = { check_max(8.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(8.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } @@ -237,38 +242,38 @@ profiles { clusterOptions = { "-S /bin/bash -V -j y -o output.log -l h_vmem=${task.memory.toGiga()}G" } withLabel: sc_tiny { - cpus = { check_max(1, 'cpus') } - memory = { check_max(1.5.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(1.5.GB * task.attempt, 'memory') } } withLabel: sc_small { - cpus = { check_max(1, 'cpus') } - memory = { check_max(6.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(6.GB * task.attempt, 'memory') } } withLabel: sc_medium { - cpus = { check_max(1, 'cpus') } - memory = { check_max(12.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(12.GB * task.attempt, 'memory') } } withLabel: mc_small { - cpus = { check_max(2, 'cpus') } - memory = { check_max(6.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(2, 'cpus') } + memory = { params.eager_eva_check_max_func(6.GB * task.attempt, 'memory') } } withLabel: mc_medium { - cpus = { check_max(4, 'cpus') } - memory = { check_max(12.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(4, 'cpus') } + memory = { params.eager_eva_check_max_func(12.GB * task.attempt, 'memory') } } withLabel: mc_large { - cpus = { check_max(8, 'cpus') } - memory = { check_max(24.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(8, 'cpus') } + memory = { params.eager_eva_check_max_func(24.GB * task.attempt, 'memory') } } withLabel: mc_huge { - cpus = { check_max(32, 'cpus') } - memory = { check_max(256.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(32, 'cpus') } + memory = { params.eager_eva_check_max_func(256.GB * task.attempt, 'memory') } } // Fixes for SGE and Java incompatibility due to (and also some samtools?!) using more memory than you tell it to use @@ -308,7 +313,7 @@ profiles { withName: markduplicates { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 3)}G" } - memory = { check_max(32.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(32.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } @@ -322,7 +327,7 @@ profiles { withName: additional_library_merge { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 3)}G" } - memory = { check_max(4.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(4.GB * task.attempt, 'memory') } } withName: metagenomic_complexity_filter { @@ -331,13 +336,13 @@ profiles { withName: malt { clusterOptions = { "-S /bin/bash -V -l h_vmem=1000G" } - cpus = { check_max(32, 'cpus') } - memory = { check_max(955.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(32, 'cpus') } + memory = { params.eager_eva_check_max_func(955.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } withName: hostremoval_input_fastq { - memory = { check_max(32.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(32.GB * task.attempt, 'memory') } } withName: maltextract { @@ -367,7 +372,7 @@ profiles { withName: damageprofiler { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 3)}G" } - memory = { check_max(16.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(16.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } @@ -418,38 +423,38 @@ profiles { clusterOptions = { "-S /bin/bash -V -j y -o output.log -l h_vmem=${task.memory.toGiga()}G" } withLabel: sc_tiny { - cpus = { check_max(1, 'cpus') } - memory = { check_max(2.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(2.GB * task.attempt, 'memory') } } withLabel: sc_small { - cpus = { check_max(1, 'cpus') } - memory = { check_max(8.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(8.GB * task.attempt, 'memory') } } withLabel: sc_medium { - cpus = { check_max(1, 'cpus') } - memory = { check_max(16.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(1, 'cpus') } + memory = { params.eager_eva_check_max_func(16.GB * task.attempt, 'memory') } } withLabel: mc_small { - cpus = { check_max(2, 'cpus') } - memory = { check_max(8.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(2, 'cpus') } + memory = { params.eager_eva_check_max_func(8.GB * task.attempt, 'memory') } } withLabel: mc_medium { - cpus = { check_max(4, 'cpus') } - memory = { check_max(16.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(4, 'cpus') } + memory = { params.eager_eva_check_max_func(16.GB * task.attempt, 'memory') } } withLabel: mc_large { - cpus = { check_max(8, 'cpus') } - memory = { check_max(32.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(8, 'cpus') } + memory = { params.eager_eva_check_max_func(32.GB * task.attempt, 'memory') } } withLabel: mc_huge { - cpus = { check_max(32, 'cpus') } - memory = { check_max(512.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(32, 'cpus') } + memory = { params.eager_eva_check_max_func(512.GB * task.attempt, 'memory') } } // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use @@ -489,13 +494,13 @@ profiles { withName: markduplicates { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" } - memory = { check_max(48.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(48.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } withName: library_merge { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" } - memory = { check_max(6.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(6.GB * task.attempt, 'memory') } } withName: seqtype_merge { @@ -504,11 +509,11 @@ profiles { withName: additional_library_merge { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" } - memory = { check_max(6.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(6.GB * task.attempt, 'memory') } } withName: hostremoval_input_fastq { - memory = { check_max(32.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(32.GB * task.attempt, 'memory') } } withName: metagenomic_complexity_filter { @@ -517,8 +522,8 @@ profiles { withName: malt { clusterOptions = { "-S /bin/bash -V -l h_vmem=1000G" } - cpus = { check_max(32, 'cpus') } - memory = { check_max(955.GB * task.attempt, 'memory') } + cpus = { params.eager_eva_check_max_func(32, 'cpus') } + memory = { params.eager_eva_check_max_func(955.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } @@ -549,7 +554,7 @@ profiles { withName: damageprofiler { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" } - memory = { check_max(32.GB * task.attempt, 'memory') } + memory = { params.eager_eva_check_max_func(32.GB * task.attempt, 'memory') } errorStrategy = { task.exitStatus in [1, 143, 137, 104, 134, 139, 140] ? 'retry' : 'finish' } } @@ -627,31 +632,31 @@ profiles { // Function to ensure that resource requirements don't go beyond // a maximum limit // FOR DSL1 PIPELINE ONLY! -def check_max(obj, type) { +params.eager_eva_check_max_func = { obj, type -> if (type == 'memory') { try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit + if (obj.compareTo(params.max_memory as MemoryUnit) == 1) + return params.max_memory as MemoryUnit else return obj - } catch (all) { + } catch (_) { println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" return obj } } else if (type == 'time') { try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration + if (obj.compareTo(params.max_time as Duration) == 1) + return params.max_time as Duration else return obj - } catch (all) { + } catch (_) { println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" return obj } } else if (type == 'cpus') { try { return Math.min( obj, params.max_cpus as int ) - } catch (all) { + } catch (_) { println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" return obj } diff --git a/conf/pipeline/eager/maestro.config b/conf/pipeline/eager/maestro.config index 3db94b099..8ffca42a6 100644 --- a/conf/pipeline/eager/maestro.config +++ b/conf/pipeline/eager/maestro.config @@ -5,6 +5,12 @@ * nextflow run nf-core/eager -profile maestro,,maestro, (where is long or normal and is nuclear, mitocondrial or unlimitedtime) */ +validation { + ignoreParams = [ + 'eager_maestro_check_max_func' + ] +} + params { config_profile_name = 'nf-core/eager nuclear/mitocondrial - human profiles' config_profile_description = "Simple profiles for assessing computational ressources that fit human nuclear dna, human mitogenomes processing. unlimitedtime is also available " @@ -32,41 +38,41 @@ profiles { maxRetries = 2 withName: makeBWAIndex { - cpus = { check_max(8 * task.attempt, 'cpus') } - memory = { check_max(8.GB * task.attempt, 'memory') } - time = { check_max(12.h * task.attempt, 'time') } + cpus = { params.eager_maestro_check_max_func(8 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(8.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(12.h * task.attempt, 'time') } } withName: adapter_removal { - cpus = { check_max(8 * task.attempt, 'cpus') } - memory = { check_max(16.GB * task.attempt, 'memory') } - time = { check_max(12.h * task.attempt, 'time') } + cpus = { params.eager_maestro_check_max_func(8 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(16.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(12.h * task.attempt, 'time') } } withName: bwa { - cpus = { check_max(40 * task.attempt, 'cpus') } - memory = { check_max(40.GB * task.attempt, 'memory') } + cpus = { params.eager_maestro_check_max_func(40 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(40.GB * task.attempt, 'memory') } time = 24.h cache = 'deep' } withName: markduplicates { errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139] ? 'retry' : 'finish' } - cpus = { check_max(16 * task.attempt, 'cpus') } - memory = { check_max(16.GB * task.attempt, 'memory') } - time = { check_max(12.h * task.attempt, 'time') } + cpus = { params.eager_maestro_check_max_func(16 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(16.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(12.h * task.attempt, 'time') } } withName: damageprofiler { cpus = 1 - memory = { check_max(8.GB * task.attempt, 'memory') } - time = { check_max(6.h * task.attempt, 'time') } + memory = { params.eager_maestro_check_max_func(8.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(6.h * task.attempt, 'time') } } withName: fastp { cpus = 8 - memory = { check_max(8.GB * task.attempt, 'memory') } - time = { check_max(6.h * task.attempt, 'time') } + memory = { params.eager_maestro_check_max_func(8.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(6.h * task.attempt, 'time') } } withName: fastqc { cpus = 2 - memory = { check_max(8.GB * task.attempt, 'memory') } - time = { check_max(6.h * task.attempt, 'time') } + memory = { params.eager_maestro_check_max_func(8.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(6.h * task.attempt, 'time') } } } } @@ -77,40 +83,40 @@ profiles { maxRetries = 2 withName: makeBWAIndex { - cpus = { check_max(8 * task.attempt, 'cpus') } - memory = { check_max(8.GB * task.attempt, 'memory') } - time = { check_max(12.h * task.attempt, 'time') } + cpus = { params.eager_maestro_check_max_func(8 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(8.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(12.h * task.attempt, 'time') } } withName: adapter_removal { - cpus = { check_max(8 * task.attempt, 'cpus') } - memory = { check_max(16.GB * task.attempt, 'memory') } - time = { check_max(12.h * task.attempt, 'time') } + cpus = { params.eager_maestro_check_max_func(8 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(16.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(12.h * task.attempt, 'time') } } withName: bwa { - cpus = { check_max(5 * task.attempt, 'cpus') } - memory = { check_max(5.GB * task.attempt, 'memory') } + cpus = { params.eager_maestro_check_max_func(5 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(5.GB * task.attempt, 'memory') } time = 24.h } withName: markduplicates { errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139] ? 'retry' : 'finish' } - cpus = { check_max(5 * task.attempt, 'cpus') } - memory = { check_max(5.GB * task.attempt, 'memory') } - time = { check_max(6.h * task.attempt, 'time') } + cpus = { params.eager_maestro_check_max_func(5 * task.attempt, 'cpus') } + memory = { params.eager_maestro_check_max_func(5.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(6.h * task.attempt, 'time') } } withName: damageprofiler { cpus = 1 - memory = { check_max(5.GB * task.attempt, 'memory') } - time = { check_max(3.h * task.attempt, 'time') } + memory = { params.eager_maestro_check_max_func(5.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(3.h * task.attempt, 'time') } } withName: fastp { cpus = 8 - memory = { check_max(5.GB * task.attempt, 'memory') } - time = { check_max(3.h * task.attempt, 'time') } + memory = { params.eager_maestro_check_max_func(5.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(3.h * task.attempt, 'time') } } withName: fastqc { cpus = 2 - memory = { check_max(8.GB * task.attempt, 'memory') } - time = { check_max(6.h * task.attempt, 'time') } + memory = { params.eager_maestro_check_max_func(8.GB * task.attempt, 'memory') } + time = { params.eager_maestro_check_max_func(6.h * task.attempt, 'time') } } } } @@ -128,31 +134,31 @@ profiles { // Function to ensure that resource requirements don't go beyond // a maximum limit // FOR DSL1 PIPELINE ONLY! -def check_max(obj, type) { +params.eager_maestro_check_max_func = { obj, type -> if (type == 'memory') { try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit + if (obj.compareTo(params.max_memory as MemoryUnit) == 1) + return params.max_memory as MemoryUnit else return obj - } catch (all) { + } catch (_) { println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" return obj } } else if (type == 'time') { try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration + if (obj.compareTo(params.max_time as Duration) == 1) + return params.max_time as Duration else return obj - } catch (all) { + } catch (_) { println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" return obj } } else if (type == 'cpus') { try { return Math.min( obj, params.max_cpus as int ) - } catch (all) { + } catch (_) { println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" return obj } diff --git a/conf/pipeline/methylseq/ku_sund_danhead.config b/conf/pipeline/methylseq/ku_sund_danhead.config index 872cab827..1e1ebd95e 100644 --- a/conf/pipeline/methylseq/ku_sund_danhead.config +++ b/conf/pipeline/methylseq/ku_sund_danhead.config @@ -11,6 +11,6 @@ process { stageInMode = 'copy' } withName: 'NFCORE_METHYLSEQ:METHYLSEQ:BISMARK:BISMARK_ALIGN' { - multicore = 1 + ext.args = '--multicore 1' } } diff --git a/conf/pipeline/proteinfold/nci_gadi.config b/conf/pipeline/proteinfold/nci_gadi.config index f07999c01..b82bb4620 100644 --- a/conf/pipeline/proteinfold/nci_gadi.config +++ b/conf/pipeline/proteinfold/nci_gadi.config @@ -12,8 +12,6 @@ profiles { // Define process resource limits process { executor = 'pbspro' - project = System.getenv("PROJECT") - storage = params.storage_account?.trim() ? params.storage_account : "scratch/${params.project}+gdata/${params.project}" module = 'singularity' cache = 'lenient' stageInMode = 'symlink' @@ -22,7 +20,6 @@ profiles { withName: 'RUN_ALPHAFOLD2|RUN_ALPHAFOLD2_PRED|RUN_ALPHAFOLD2_MSA' { queue = params.use_gpu ? 'gpuvolta' : 'normal' cpus = 48 - gpus = 4 time = '4h' memory = 380.GB } @@ -31,7 +28,6 @@ profiles { container = "nf-core/proteinfold_colabfold:1.1.1" queue = params.use_gpu ? 'gpuvolta' : 'normal' cpus = 48 - gpus = 4 time = '4h' memory = 380.GB } @@ -40,18 +36,16 @@ profiles { container = "nf-core/proteinfold_esmfold:1.1.1" queue = params.use_gpu ? 'gpuvolta' : 'normal' cpus = 48 - gpus = 4 time = '4h' memory = 380.GB } } // Write custom trace file with outputs required for SU calculation - def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') trace { enabled = true overwrite = false - file = "./gadi-nf-core-trace-${trace_timestamp}.txt" + file = "./gadi-nf-core-trace-${new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')}.txt" fields = 'name,status,exit,duration,realtime,cpus,%cpu,memory,%mem,rss' } } diff --git a/conf/pipeline/sarek/cfc.config b/conf/pipeline/sarek/cfc.config index b7db254a5..db1c39d4e 100644 --- a/conf/pipeline/sarek/cfc.config +++ b/conf/pipeline/sarek/cfc.config @@ -1,5 +1,7 @@ // Profile config names for nf-core/configs +validation.ignoreParams = ['sarek_cfc_check_resource_func'] + params { // Specific nf-core/configs params config_profile_contact = 'Friederike Hanssen (@FriederikeHanssen)' diff --git a/conf/pipeline/sarek/icr_davros.config b/conf/pipeline/sarek/icr_davros.config index 5e5607581..4caaff472 100644 --- a/conf/pipeline/sarek/icr_davros.config +++ b/conf/pipeline/sarek/icr_davros.config @@ -6,10 +6,6 @@ process { errorStrategy = { task.exitStatus in [104, 134, 137, 139, 141, 143, 255] ? 'retry' : 'finish' } maxRetries = 5 - withName: MapReads { - memory = { check_resource(12.GB) } - time = { check_resource(48.h * task.attempt) } - } } params { diff --git a/conf/sage.config b/conf/sage.config index 148b1c29f..5cc35c2ba 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -12,6 +12,9 @@ params { max_time = 240.h single_cpu_mem = 6.GB + // Define dynamic scaling factors + exit_status_scaling = [143,137,104,134,139,247] + warning_message = { System.out.println("WARNING: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") System.out.println("WARNING:") @@ -60,47 +63,38 @@ process { // Enable retries globally for certain exit codes errorStrategy = { task.attempt <= 5 ? 'retry' : 'finish' } - cpus = { 1 * factor(task, 2) } - memory = { 6.GB * factor(task, 1) } - time = { 24.h * factor(task, 1) } + cpus = { task.exitStatus in params.exit_status_scaling ? Math.ceil(task.attempt/2) : 1 } + memory = { 6.GB * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } + time = { 24.h * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } // Process-specific resource requirements withLabel: process_single { - cpus = { 1 * factor(task, 2) } - memory = { 6.GB * factor(task, 1) } - time = { 24.h * factor(task, 1) } + cpus = { task.exitStatus in params.exit_status_scaling ? Math.ceil(task.attempt/2) : 1 } + memory = { 6.GB * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } + time = { 24.h * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } } withLabel: process_low { - cpus = { 2 * factor(task, 2) } - memory = { 12.GB * factor(task, 1) } - time = { 24.h * factor(task, 1) } + cpus = { 2 * (task.exitStatus in params.exit_status_scaling ? Math.ceil(task.attempt/2) : 1) } + memory = { 12.GB * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } + time = { 24.h * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } } withLabel: process_medium { - cpus = { 8 * factor(task, 2) } - memory = { 32.GB * factor(task, 1) } - time = { 48.h * factor(task, 1) } + cpus = { 8 * (task.exitStatus in params.exit_status_scaling ? Math.ceil(task.attempt/2) : 1) } + memory = { 32.GB * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } + time = { 48.h * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } } withLabel: process_high { - cpus = { 16 * factor(task, 2) } - memory = { 64.GB * factor(task, 1) } - time = { 96.h * factor(task, 1) } + cpus = { 16 * (task.exitStatus in params.exit_status_scaling ? Math.ceil(task.attempt/2) : 1) } + memory = { 64.GB * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } + time = { 96.h * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } } withLabel: process_long { - time = { 96.h * factor(task, 1) } + time = { 96.h * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } } withLabel: 'process_high_memory|memory_max' { - memory = { 128.GB * factor(task, 1) } + memory = { 128.GB * (task.exitStatus in params.exit_status_scaling ? task.attempt : 1) } } withLabel: cpus_max { - cpus = { 32 * factor(task, 2) } - } -} - -// Function to finely control the increase of the resource allocation -def factor(task, slow_factor = 1) { - if ( task.exitStatus in [143,137,104,134,139,247] ) { - return Math.ceil( task.attempt / slow_factor) as int - } else { - return 1 as int + cpus = { 32 * (task.exitStatus in params.exit_status_scaling ? Math.ceil(task.attempt/2) : 1) } } } diff --git a/conf/seadragon.config b/conf/seadragon.config index 5f3843a07..946655393 100644 --- a/conf/seadragon.config +++ b/conf/seadragon.config @@ -1,10 +1,16 @@ -//Profile config names for nf-core/configs +// Profile config names for nf-core/configs params { config_profile_description = 'University of Texas at MD Anderson HPC cluster profile provided by nf-core/configs' config_profile_contact = 'Jiawei Gu' config_profile_contact_github = '@jiawku' config_profile_contact_email = 'jiawei.gu@mdanderson.org' config_profile_url = 'https://hpcweb.mdanderson.edu/' + + // Define dynamic scaling factors + max_memory = 3900.GB // Maximum memory based on evhighmem nodes + max_cpus = 80 // Maximum CPUs based on E80 nodes + max_time = 504.h // Maximum runtime for evlong queues + igenomes_base = '/rsrch3/scratch/reflib/REFLIB_data/AWS-iGenomes' } env { @@ -33,12 +39,11 @@ executor { jobName = { "${task.process.split(':').last()}" } } - process { resourceLimits = [ memory: 3900.GB, // Max memory based on vhighmem node - cpus: 80, // Max CPUs based on E80 node - time: 504.h // Max time for long queues + cpus: 80, // Max CPUs based on E80 node + time: 504.h // Max time for long queues ] executor = 'lsf' // Use LSF executor @@ -71,11 +76,4 @@ process { } } -params { - max_memory = 3900.GB // Maximum memory based on nodes - max_cpus = 80 // Maximum CPUs based on E80 nodes - max_time = 504.h // Maximum runtime for evlong queues - igenomes_base = '/rsrch3/scratch/reflib/REFLIB_data/AWS-iGenomes' -} - cleanup = true diff --git a/conf/utd_ganymede.config b/conf/utd_ganymede.config index 5869eba65..ad4a319bb 100644 --- a/conf/utd_ganymede.config +++ b/conf/utd_ganymede.config @@ -1,4 +1,4 @@ -//Profile config names for nf-core/configs +// Profile config names for nf-core/configs params { config_profile_description = 'University of Texas at Dallas HPC cluster profile provided by nf-core/configs' config_profile_contact = 'Edmund Miller' @@ -8,32 +8,14 @@ params { } env { - SINGULARITY_CACHEDIR="/home/$USER/scratch/singularity" + SINGULARITY_CACHEDIR="/home/${System.getenv('USER')}/scratch/singularity" } singularity { enabled = true envWhitelist='SINGULARITY_BINDPATH,LD_LIBRARY_PATH' autoMounts = true - cacheDir = "/home/$USER/scratch/singularity" -} - -def membership = "groups".execute().text -def genomics_queue_memory = 28.GB - -def select_queue = { memory, cpu -> - if (memory <= genomics_queue_memory && cpu <= 16 && membership.contains('genomics')) { - return 'genomics,normal' - } - if (memory > genomics_queue_memory && memory <= 125.GB && cpu <= 12 && membership.contains('kim')) { - return 'Kim,128s' - } - if (memory > genomics_queue_memory && memory <= 125.GB && cpu <= 16) { - return '128s' - } - if (memory <= 250.GB && cpu <= 28) { - return '256i,256h' - } + cacheDir = "/home/${System.getenv('USER')}/scratch/singularity" } // Submit up to 100 concurrent jobs @@ -54,7 +36,13 @@ process { ] beforeScript = 'module load singularity/3.2.1' executor = 'slurm' - queue = { select_queue(task.memory, task.cpu) } + queue = { + if (task.memory <= 28.GB && task.cpus <= 16 && "groups".execute().text.contains('genomics')) return 'genomics,normal' + if (task.memory > 28.GB && task.memory <= 125.GB && task.cpus <= 12 && "groups".execute().text.contains('kim')) return 'Kim,128s' + if (task.memory > 28.GB && task.memory <= params.queue_128s.memory && task.cpus <= 16) return '128s' + if (task.memory <= params.queue_256.memory && task.cpus <= 28) return '256i,256h' + throw new IllegalArgumentException("No matching queue for memory=${task.memory}, cpu=${task.cpus}") + } withLabel:process_medium { cpus = 16 diff --git a/conf/utd_juno.config b/conf/utd_juno.config index 373e5f6f1..d9588b6eb 100644 --- a/conf/utd_juno.config +++ b/conf/utd_juno.config @@ -1,3 +1,4 @@ +// Profile config names for nf-core/configs params { config_profile_description = 'University of Texas at Dallas HPC cluster profile provided by nf-core/configs' config_profile_contact = 'Anne Fu' @@ -7,54 +8,13 @@ params { } env { - SINGULARITY_CACHEDIR="/home/$USER/scratch/singularity" + SINGULARITY_CACHEDIR="/home/${System.getenv('USER')}/scratch/singularity" } - -def select_queue_and_flags = { cpus, memory, time, accelerator -> - var accelerator_count = 0; - var accelerator_ty = null; - if (accelerator != null) { - if (accelerator instanceof Number) { - accelerator_count = accelerator; - } else if (accelerator instanceof Map) { - accelerator_ty = accelerator.type; - accelerator_count = accelerator.limit; - } else { - throw new IllegalArgumentException("Invalid `accelerator` directive value: $accelerator [${accelerator.getClass().getName()}]") - } - } - - if (accelerator_count > 0) { - if (["h100", "nvidia_h100", "nvidia-h100", "nvidia_h100_80gb_hbm3"].contains(accelerator_ty)) { - // if this is a long task with a single accelarator, probably need the whole GPU - if (accelerator_count == 1 && time >= 2.h) { - return ['queue': 'h100', 'flags': "--gres=nvidia_h100_80gb_hbm3=${accelerator_count}"]; - } else { - return ['queue': 'h100-2.40gb', 'flags': "--gres=nvidia_h100_80gb_hbm3=${accelerator_count}"]; - } - } - - if (["a30", "nvidia_a30", "nvidia-a30", "nvidia_a30_2g", "nvidia_a30_4g"].contains(accelerator_ty)) { - if (accelerator_count >= 4) { - return ['queue': 'a30-4.6gb', 'flags': "--gres=nvidia_a30=${accelerator_count}"]; - } else if (accelerator_count >= 2) { - return ['queue': 'a30-2.12gb', 'flags': "--gres=nvidia_a30_2g=${accelerator_count}"]; - } else if (accelerator_count >= 1) { - return ['queue': 'a30', 'flags': "--gres=nvidia_a30_1g=${accelerator_count}"]; - } - } - } - - if (memory >= 384.GB && memory <= 512.GB) { - return ['queue': 'h100', 'flags': ""]; - } - - if (memory >= 512.GB) { - return ['queue': 'a30', 'flags': ""]; - } - - return ['queue': 'normal', 'flags': '']; +singularity { + enabled = true + autoMounts = true + cacheDir = "/home/${System.getenv('USER')}/scratch/singularity" } executor { @@ -64,19 +24,49 @@ executor { jobName = { "${task.process.split(':').last()}" } } -singularity { - enabled = true - autoMounts = true - cacheDir = "/home/$USER/scratch/singularity" -} - process { executor = 'slurm' queue = { - select_queue_and_flags(task.cpus, task.memory, task.time, task.get('accelerator')).queue + if (task.get('accelerator')?.type in ['h100', 'nvidia_h100', 'nvidia-h100', 'nvidia_h100_80gb_hbm3']) { + if (task.get('accelerator')?.limit == 1 && task.time >= 2.h) { + return 'h100' + } + return 'h100-2.40gb' + } + if (task.get('accelerator')?.type in ['a30', 'nvidia_a30', 'nvidia-a30', 'nvidia_a30_2g', 'nvidia_a30_4g']) { + if (task.get('accelerator')?.limit >= 4) { + return 'a30-4.6gb' + } + if (task.get('accelerator')?.limit >= 2) { + return 'a30-2.12gb' + } + return 'a30' + } + if (task.memory >= 384.GB && task.memory <= 512.GB) { + return 'h100' + } + if (task.memory >= 512.GB) { + return 'a30' + } + return 'normal' } clusterOptions = { - select_queue_and_flags(task.cpus, task.memory, task.time, task.get('accelerator')).flags + if (task.get('accelerator')?.type in ['h100', 'nvidia_h100', 'nvidia-h100', 'nvidia_h100_80gb_hbm3']) { + if (task.get('accelerator')?.limit == 1 && task.time >= 2.h) { + return "--gres=nvidia_h100_80gb_hbm3=${task.get('accelerator')?.limit}" + } + return "--gres=nvidia_h100_80gb_hbm3=${task.get('accelerator')?.limit}" + } + if (task.get('accelerator')?.type in ['a30', 'nvidia_a30', 'nvidia-a30', 'nvidia_a30_2g', 'nvidia_a30_4g']) { + if (task.get('accelerator')?.limit >= 4) { + return "--gres=nvidia_a30=${task.get('accelerator')?.limit}" + } + if (task.get('accelerator')?.limit >= 2) { + return "--gres=nvidia_a30_2g=${task.get('accelerator')?.limit}" + } + return "--gres=nvidia_a30_1g=${task.get('accelerator')?.limit}" + } + return '' } withLabel: gpu { containerOptions = '--nv' diff --git a/conf/vsc_kul_uhasselt.config b/conf/vsc_kul_uhasselt.config index 152e2028c..9ccd2b059 100644 --- a/conf/vsc_kul_uhasselt.config +++ b/conf/vsc_kul_uhasselt.config @@ -1,9 +1,21 @@ +// Ignore configuration parameters in validation +validation { + ignoreParams = [ + 'vsc_kul_hasselt_scratchdir', + 'vsc_kul_hasselt_tier2_project', + 'vsc_kul_hasselt_available_queues', + 'vsc_kul_hasselt_publish_location', + 'vsc_kul_hasselt_time_threshold', + 'vsc_kul_hasselt_memory_threshold_genius', + 'vsc_kul_hasselt_memory_threshold_wice', + ] +} + // Default to /tmp directory if $VSC_SCRATCH scratch env is not available, // see: https://github.com/nf-core/configs?tab=readme-ov-file#adding-a-new-config -def SCRATCH_DIR = System.getenv("VSC_SCRATCH") ?: "/tmp" -def TIER2_PROJECT = System.getenv("SLURM_ACCOUNT") ?: null -def DEDICATED_QUEUES = System.getenv("VSC_DEDICATED_QUEUES") ?: "" -def AVAILABLE_QUEUES = DEDICATED_QUEUES.toString().split(',') +params.vsc_kul_hasselt_scratchdir = System.getenv("VSC_SCRATCH") ?: "/tmp" +params.vsc_kul_hasselt_tier2_project = System.getenv("SLURM_ACCOUNT") ?: null +params.vsc_kul_hasselt_available_queues = (System.getenv("VSC_DEDICATED_QUEUES") ?: "").toString().split(',') // Perform work directory cleanup when the run has succesfully completed // cleanup = true @@ -29,7 +41,7 @@ process { singularity { enabled = true autoMounts = true - cacheDir = "$SCRATCH_DIR/.singularity" + cacheDir = "${params.vsc_kul_hasselt_scratchdir}/.singularity" pullTimeout = "30 min" } @@ -38,28 +50,28 @@ params { config_profile_url = 'https://docs.vscentrum.be/en/latest/index.html' } - -def publishLocation = params.get('outdir', "$launchDir") - -def co2_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') +params.vsc_kul_hasselt_publish_location = params.get('outdir', "$launchDir") +params.vsc_kul_hasselt_co2_timestamp = params.containsKey('trace_report_suffix') ? params.trace_report_suffix : new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') co2footprint { - traceFile = "${publishLocation}/pipeline_info/co2footprint_trace_${co2_timestamp}.txt" - summaryFile = "${publishLocation}/pipeline_info/co2footprint_summary_${co2_timestamp}.txt" - reportFile = "${publishLocation}/pipeline_info/co2footprint_report_${co2_timestamp}.html" + traceFile = "${params.vsc_kul_hasselt_publish_location}/pipeline_info/co2footprint_trace_${params.vsc_kul_hasselt_co2_timestamp}.txt" + summaryFile = "${params.vsc_kul_hasselt_publish_location}/pipeline_info/co2footprint_summary_${params.vsc_kul_hasselt_co2_timestamp}.txt" + reportFile = "${params.vsc_kul_hasselt_publish_location}/pipeline_info/co2footprint_report_${params.vsc_kul_hasselt_co2_timestamp}.html" location = 'BE' //pue = 1.33 // replace with PUE of your data center machineType = 'compute cluster' // set to 'compute cluster', 'local', or 'cloud' } env { - APPTAINER_TMPDIR="$SCRATCH_DIR/.apptainer/tmp" - APPTAINER_CACHEDIR="$SCRATCH_DIR/.apptainer/cache" + APPTAINER_TMPDIR="${params.vsc_kul_hasselt_scratchdir}/.apptainer/tmp" + APPTAINER_CACHEDIR="${params.vsc_kul_hasselt_scratchdir}/.apptainer/cache" } // AWS maximum retries for errors (This way the pipeline doesn't fail if the download fails one time) aws { - maxErrorRetry = 3 + client { + maxErrorRetry = 3 + } } /* @@ -72,127 +84,13 @@ aws { /* * Constants: * ---------- - * TIME_THRESHOLD: 72 hours - Threshold for determining long-running jobs - * MEMORY_THRESHOLD (GENIUS): 175GB - Memory threshold for bigmem queues - * MEMORY_THRESHOLD (WICE): 239GB - Memory threshold for high-memory queues -*/ -def TIME_THRESHOLD = 72.h -def MEMORY_THRESHOLD_GENIUS = 175.GB -def MEMORY_THRESHOLD_WICE = 239.GB - -/* - * --------- - * Functions: - * ---------- - * These functions are designed to select the appropriate HPC queues of - * VSC_KUL_UHASSELT based on task requirements. They handle both standard - * and GPU queues, considering memory requirements, execution time, and - * queue availability. -*/ - -/* - * limitTaskTime(time, maxTime) - * Ensures task time doesn't exceed the maximum allowed time - * @param time Current task time - * @param maxTime Maximum allowed time - * @return Limited task time -*/ -def limitTaskTime(time, maxTime) { - return time > maxTime ? maxTime : time -} - -/* - * determineGeniusQueue(task) - * Selects appropriate CPU queue for GENIUS cluster - * @param task Nextflow task object containing memory and time requirements - * @return Queue name based on task requirements -*/ -def determineGeniusQueue = { task -> - def isHighMemory = task.memory >= MEMORY_THRESHOLD_GENIUS - def isLongRunning = task.time >= TIME_THRESHOLD - def hasDedicatedBigmem = AVAILABLE_QUEUES.contains('dedicated_big_bigmem') - - if (isHighMemory) { - return isLongRunning ? - (hasDedicatedBigmem ? 'dedicated_big_bigmem' : 'bigmem_long') : - 'bigmem' - } - - return isLongRunning ? 'batch_long' : 'batch' -} - -/* - * determineGeniusGpuQueue(task) - * Selects appropriate GPU queue for GENIUS cluster - * @param task Nextflow task object containing memory and time requirements - * @return GPU queue name based on task requirements -*/ -def determineGeniusGpuQueue = { task -> - def isHighMemory = task.memory >= MEMORY_THRESHOLD_GENIUS - def isLongRunning = task.time >= TIME_THRESHOLD - def hasDedicatedGpu = AVAILABLE_QUEUES.contains('dedicated_rega_gpu') - def hasAmdGpu = AVAILABLE_QUEUES.contains('amd') - - if (isHighMemory) { - return isLongRunning ? 'gpu_v100_long' : 'gpu_v100' - } - - if (isLongRunning) { - if (hasDedicatedGpu) return 'dedicated_rega_gpu' - if (hasAmdGpu) return 'amd_long' - return 'gpu_p100_long' - } - - return hasAmdGpu ? 'amd' : 'gpu_p100' -} - -/* - * determineWiceQueue(task) - * Selects appropriate CPU queue for WICE cluster - * @param task Nextflow task object containing memory and time requirements - * @return Queue name based on task requirements and availability + * params.vsc_kul_hasselt_time_threshold: 72 hours - Threshold for determining long-running jobs + * params.vsc_kul_hasselt_memory_threshold_genius: 175GB - Memory threshold for bigmem queues + * params.vsc_kul_hasselt_memory_threshold_wice: 239GB - Memory threshold for high-memory queues */ -def determineWiceQueue = { task -> - def isHighMemory = task.memory >= MEMORY_THRESHOLD_WICE - def isLongRunning = task.time >= TIME_THRESHOLD - def hasDedicatedQueue = AVAILABLE_QUEUES.contains('dedicated_big_bigmem') - - if (isHighMemory) { - if (isLongRunning && hasDedicatedQueue) { - return 'dedicated_big_bigmem' - } - task.time = limitTaskTime(task.time, TIME_THRESHOLD) - return 'bigmem,hugemem' - } - - return isLongRunning ? - 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : - 'batch,batch_sapphirerapids,batch_icelake' -} - -/* - * determineWiceGpuQueue(task) - * Selects appropriate GPU queue for WICE cluster - * @param task Nextflow task object containing memory and time requirements - * @return GPU queue name based on task requirements -*/ -def determineWiceGpuQueue = { task -> - def isHighMemory = task.memory >= MEMORY_THRESHOLD_WICE - def isLongRunning = task.time >= TIME_THRESHOLD - def hasDedicatedQueue = isHighMemory ? - AVAILABLE_QUEUES.contains('dedicated_big_gpu_h100') : - AVAILABLE_QUEUES.contains('dedicated_big_gpu') - - if (isLongRunning && !hasDedicatedQueue) { - task.time = limitTaskTime(task.time, TIME_THRESHOLD) - } - - if (isHighMemory) { - return (isLongRunning && hasDedicatedQueue) ? 'dedicated_big_gpu_h100' : 'gpu_h100' - } - - return (isLongRunning && hasDedicatedQueue) ? 'dedicated_big_gpu' : 'gpu_a100,gpu' -} +params.vsc_kul_hasselt_time_threshold = 72.h +params.vsc_kul_hasselt_memory_threshold_genius = 175.GB +params.vsc_kul_hasselt_memory_threshold_wice = 239.GB /* * ======== @@ -212,24 +110,54 @@ profiles { process { // 768 - 65 so 65GB for overhead, max is 720000MB + ext.vsc_kul_hasselt_queue = { + def isHighMemory = task.memory >= params.vsc_kul_hasselt_memory_threshold_genius + def isLongRunning = task.time >= params.vsc_kul_hasselt_time_threshold + def hasDedicatedBigmem = params.vsc_kul_hasselt_available_queues.contains('dedicated_big_bigmem') + + if (isHighMemory) { + return isLongRunning ? + (hasDedicatedBigmem ? 'dedicated_big_bigmem' : 'bigmem_long') : + 'bigmem' + } + + return isLongRunning ? 'batch_long' : 'batch' + } resourceLimits = [ memory: 703.GB, cpus: 36, time: 168.h ] - beforeScript = { 'module load cluster/genius/' + determineGeniusQueue(task).toString().split(',')[0] } - queue = { determineGeniusQueue(task) } + beforeScript = { 'module load cluster/genius/' + task.ext.vsc_kul_hasselt_queue.toString().split(',')[0] } + queue = { task.ext.vsc_kul_hasselt_queue } clusterOptions = { - determineGeniusQueue(task) =~ /dedicated/ ? + task.ext.vsc_kul_hasselt_queue =~ /dedicated/ ? "--clusters=genius --account=lp_big_genius_cpu" : - "--clusters=genius --account=$TIER2_PROJECT" + "--clusters=genius --account=${params.vsc_kul_hasselt_tier2_project}" } withLabel: '.*gpu.*'{ + ext.vsc_kul_hasselt_queue = { + def isHighMemory = task.memory >= params.vsc_kul_hasselt_memory_threshold_genius + def isLongRunning = task.time >= params.vsc_kul_hasselt_time_threshold + def hasDedicatedGpu = params.vsc_kul_hasselt_available_queues.contains('dedicated_rega_gpu') + def hasAmdGpu = params.vsc_kul_hasselt_available_queues.contains('amd') + + if (isHighMemory) { + return isLongRunning ? 'gpu_v100_long' : 'gpu_v100' + } + + if (isLongRunning) { + if (hasDedicatedGpu) return 'dedicated_rega_gpu' + if (hasAmdGpu) return 'amd_long' + return 'gpu_p100_long' + } + + return hasAmdGpu ? 'amd' : 'gpu_p100' + } resourceLimits = [ memory: 703.GB, cpus: 36 , time: 168.h ] - beforeScript = { 'module load cluster/genius/' + determineGeniusGpuQueue(task).toString().split(',')[0] } - apptainer.runOptions = '--containall --cleanenv --nv' - singularity.runOptions = '--containall --cleanenv --nv' - queue = { determineGeniusGpuQueue(task) } + beforeScript = { 'module load cluster/genius/' + task.ext.vsc_kul_hasselt_queue.toString().split(',')[0] } + containerOptions = '--containall --cleanenv --nv' + queue = { task.ext.vsc_kul_hasselt_queue } clusterOptions = { def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/9) as int) - "--gres=gpu:${gpus} --clusters=genius --account=$TIER2_PROJECT" + "--gres=gpu:${gpus} --clusters=genius --account=${params.vsc_kul_hasselt_tier2_project}" } } } @@ -242,12 +170,30 @@ profiles { process { // 768 - 65 so 65GB for overhead, max is 720000MB + ext.vsc_kul_hasselt_queue = { + def isHighMemory = task.memory >= params.vsc_kul_hasselt_memory_threshold_genius + def isLongRunning = task.time >= params.vsc_kul_hasselt_time_threshold + def hasDedicatedGpu = params.vsc_kul_hasselt_available_queues.contains('dedicated_rega_gpu') + def hasAmdGpu = params.vsc_kul_hasselt_available_queues.contains('amd') + + if (isHighMemory) { + return isLongRunning ? 'gpu_v100_long' : 'gpu_v100' + } + + if (isLongRunning) { + if (hasDedicatedGpu) return 'dedicated_rega_gpu' + if (hasAmdGpu) return 'amd_long' + return 'gpu_p100_long' + } + + return hasAmdGpu ? 'amd' : 'gpu_p100' + } resourceLimits = [ memory: 703.GB, cpus: 36, time: 168.h] - beforeScript = { 'module load cluster/genius/' + determineGeniusGpuQueue(task).toString().split(',')[0] } - queue = { determineGeniusGpuQueue(task) } + beforeScript = { 'module load cluster/genius/' + task.ext.vsc_kul_hasselt_queue.toString().split(',')[0] } + queue = { task.ext.vsc_kul_hasselt_queue } clusterOptions = { def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/9) as int) - "--gres=gpu:${gpus} --clusters=genius --account=$TIER2_PROJECT" + "--gres=gpu:${gpus} --clusters=genius --account=${params.vsc_kul_hasselt_tier2_project}" } } } @@ -257,27 +203,60 @@ profiles { process { // max is 2016000 + ext.vsc_kul_hasselt_queue = { + def isHighMemory = task.memory >= params.vsc_kul_hasselt_memory_threshold_wice + def isLongRunning = task.time >= params.vsc_kul_hasselt_time_threshold + def hasDedicatedQueue = params.vsc_kul_hasselt_available_queues.contains('dedicated_big_bigmem') + + if (isHighMemory) { + if (isLongRunning && hasDedicatedQueue) { + return 'dedicated_big_bigmem' + } + task.time = { task.time > params.vsc_kul_hasselt_time_threshold ? params.vsc_kul_hasselt_time_threshold : task.time } + return 'bigmem,hugemem' + } + + return isLongRunning ? + 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : + 'batch,batch_sapphirerapids,batch_icelake' + } resourceLimits = [ memory: 1968.GB, cpus: 72, time: 168.h ] - beforeScript = { 'module load cluster/wice/' + determineWiceQueue(task).toString().split(',')[0] } - queue = { determineWiceQueue(task) } + beforeScript = { 'module load cluster/wice/' + task.ext.vsc_kul_hasselt_queue.toString().split(',')[0] } + queue = { task.ext.vsc_kul_hasselt_queue } clusterOptions = { - determineWiceQueue(task) =~ /dedicated/ ? + task.ext.vsc_kul_hasselt_queue =~ /dedicated/ ? "--clusters=wice --account=lp_big_wice_cpu" : - "--clusters=wice --account=$TIER2_PROJECT" + "--clusters=wice --account=${params.vsc_kul_hasselt_tier2_project}" } withLabel: '.*gpu.*' { + ext.vsc_kul_hasselt_queue = { + def isHighMemory = task.memory >= params.vsc_kul_hasselt_memory_threshold_wice + def isLongRunning = task.time >= params.vsc_kul_hasselt_time_threshold + def hasDedicatedQueue = isHighMemory ? + params.vsc_kul_hasselt_available_queues.contains('dedicated_big_gpu_h100') : + params.vsc_kul_hasselt_available_queues.contains('dedicated_big_gpu') + + if (isLongRunning && !hasDedicatedQueue) { + task.time = { task.time > params.vsc_kul_hasselt_time_threshold ? params.vsc_kul_hasselt_time_threshold : task.time } + } + + if (isHighMemory) { + return (isLongRunning && hasDedicatedQueue) ? 'dedicated_big_gpu_h100' : 'gpu_h100' + } + + return (isLongRunning && hasDedicatedQueue) ? 'dedicated_big_gpu' : 'gpu_a100,gpu' + } resourceLimits = [ memory: 703.GB, cpus: 64, time: 168.h ] - apptainer.runOptions = '--containall --cleanenv --nv' - singularity.runOptions = '--containall --cleanenv --nv' - beforeScript = { 'module load cluster/wice/' + determineWiceGpuQueue(task).toString().split(',')[0] } - queue = { determineWiceGpuQueue(task) } + containerOptions = '--containall --cleanenv --nv' + beforeScript = { 'module load cluster/wice/' + task.ext.vsc_kul_hasselt_queue.toString().split(',')[0] } + queue = { task.ext.vsc_kul_hasselt_queue } clusterOptions = { def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int) - def queueValue = determineWiceGpuQueue(task) + def queueValue = task.ext.vsc_kul_hasselt_queue queueValue =~ /dedicated_big_gpu_h100/ ? "--clusters=wice --account=lp_big_wice_gpu_h100 --gres=gpu:${gpus}" : queueValue =~ /dedicated_big_gpu/ ? "--clusters=wice --account=lp_big_wice_gpu --gres=gpu:${gpus}" : - "--clusters=wice --account=$TIER2_PROJECT --gres=gpu:${gpus}" + "--clusters=wice --account=${params.vsc_kul_hasselt_tier2_project} --gres=gpu:${gpus}" } } } @@ -290,15 +269,32 @@ profiles { process { // 768 - 65 so 65GB for overhead, max is 720000MB - beforeScript = { 'module load cluster/wice/' + determineWiceGpuQueue(task).toString().split(',')[0] } + ext.vsc_kul_hasselt_queue = { + def isHighMemory = task.memory >= params.vsc_kul_hasselt_memory_threshold_wice + def isLongRunning = task.time >= params.vsc_kul_hasselt_time_threshold + def hasDedicatedQueue = isHighMemory ? + params.vsc_kul_hasselt_available_queues.contains('dedicated_big_gpu_h100') : + params.vsc_kul_hasselt_available_queues.contains('dedicated_big_gpu') + + if (isLongRunning && !hasDedicatedQueue) { + task.time = { task.time > params.vsc_kul_hasselt_time_threshold ? params.vsc_kul_hasselt_time_threshold : task.time } + } + + if (isHighMemory) { + return (isLongRunning && hasDedicatedQueue) ? 'dedicated_big_gpu_h100' : 'gpu_h100' + } + + return (isLongRunning && hasDedicatedQueue) ? 'dedicated_big_gpu' : 'gpu_a100,gpu' + } + beforeScript = { 'module load cluster/wice/' + task.ext.vsc_kul_hasselt_queue.toString().split(',')[0] } resourceLimits = [ memory: 703.GB, cpus: 64, time: 168.h ] - queue = { determineWiceGpuQueue(task) } + queue = { task.ext.vsc_kul_hasselt_queue } clusterOptions = { def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int) - def queueValue = determineWiceGpuQueue(task) + def queueValue = task.ext.vsc_kul_hasselt_queue queueValue =~ /dedicated_big_gpu_h100/ ? "--clusters=wice --account=lp_big_wice_gpu_h100 --gres=gpu:${gpus}" : queueValue =~ /dedicated_big_gpu/ ? "--clusters=wice --account=lp_big_wice_gpu --gres=gpu:${gpus}" : - "--clusters=wice --account=$TIER2_PROJECT --gres=gpu:${gpus}" + "--clusters=wice --account=${params.vsc_kul_hasselt_tier2_project} --gres=gpu:${gpus}" } } } @@ -307,7 +303,7 @@ profiles { params.config_profile_description = 'superdome profile for use on the genius cluster of the VSC HPC.' process { - clusterOptions = {"--clusters=genius --account=$TIER2_PROJECT"} + clusterOptions = {"--clusters=genius --account=${params.vsc_kul_hasselt_tier2_project}"} beforeScript = 'module load cluster/genius/superdome' // 6000 - 228 so 228GB for overhead, max is 5910888MB resourceLimits = [ memory: 5772.GB, cpus: 14, time: 168.h]