Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/appendices/environment_vars.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,14 @@ There are several environment variables that affect the way Toil runs.
+--------------------------------------+-----------------------------------------------------+
| TOIL_SLURM_PARTITION | Partition to send Slurm jobs to. |
+--------------------------------------+-----------------------------------------------------+
| TOIL_SLURM_QOS | Quality Of Service to request for Slurm jobs. |
+--------------------------------------+-----------------------------------------------------+
| TOIL_SLURM_GPU_PARTITION | Partition to send Slurm jobs to if they ask for |
| | GPUs. |
+--------------------------------------+-----------------------------------------------------+
| TOIL_SLURM_GPU_QOS | Quality Of Service to request for Slurm jobs if |
| | they ask for GPUs. |
+--------------------------------------+-----------------------------------------------------+
| TOIL_SLURM_PE | Name of the slurm partition to use for parallel |
| | jobs. Useful for Slurm clusters that do not offer |
| | a partition accepting both single-core and |
Expand Down
4 changes: 4 additions & 0 deletions docs/running/cliOptions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,12 @@ levels in toil are based on priority from the logging module:
Slurm job time limit, in [DD-]HH:MM:SS format.
--slurmPartition SLURM_PARTITION
Partition to send Slurm jobs to.
--slurmQOS SLURM_QOS
Quality Of Service to request for Slurm jobs.
--slurmGPUPartition SLURM_GPU_PARTITION
Partition to send Slurm jobs to if they ask for GPUs.
--slurmGPUQOS SLURM_GPU_QOS
Quality Of Service to request for Slurm jobs if they ask for GPUs.
--slurmPE SLURM_PE Special partition to send Slurm jobs to if they ask
for more than 1 CPU. Useful for Slurm clusters that do
not offer a partition accepting both single-core and
Expand Down
80 changes: 53 additions & 27 deletions src/toil/batchSystems/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,23 @@ def is_match(option: str) -> bool:
return is_match


def parse_slurm_option_value(args: list[str], i: int) -> tuple[str, int]:
"""
Parse a Slurm option value in either ``--opt=value`` or ``--opt value`` form.

Returns the extracted value and the last consumed index.

:param args: list of command-line arguments
:param i: index of the option argument in the list to parse
"""
arg = args[i]
if "=" not in arg:
if i + 1 >= len(args):
raise ValueError(f"No value supplied for Slurm {arg} argument")
return args[i + 1], i + 1
return arg.split("=", 1)[1], i


class SlurmBatchSystem(AbstractGridEngineBatchSystem):
class PartitionInfo(NamedTuple):
partition_name: str
Expand Down Expand Up @@ -883,6 +900,7 @@ def prepareSbatch(
is_export_file_option = option_detector("export-file")
is_time_option = option_detector("time", "t")
is_partition_option = option_detector("partition", "p")
is_qos_option = option_detector("qos")

# We will fill these in with stuff parsed from TOIL_SLURM_ARGS, or
# with our own determinations if they aren't there.
Expand All @@ -892,6 +910,7 @@ def prepareSbatch(
export_list = [] # Some items here may be multiple comma-separated values
time_limit: int | None = self.boss.config.slurm_time # type: ignore[attr-defined]
partition: str | None = None
qos: str | None = None

if nativeConfig is not None:
logger.debug(
Expand All @@ -913,43 +932,23 @@ def prepareSbatch(
elif is_export_option(arg):
# Capture the export argument value so we can modify it
export_all = False
if "=" not in arg:
if i + 1 >= len(args):
raise ValueError(
f"No value supplied for Slurm {arg} argument"
)
i += 1
export_list.append(args[i])
else:
export_list.append(arg.split("=", 1)[1])
export_value, i = parse_slurm_option_value(args, i)
export_list.append(export_value)
elif is_export_file_option(arg):
# Keep --export-file but turn off --export=ALL in that
# case.
export_all = False
sbatch_line.append(arg)
elif is_time_option(arg):
# Capture the time limit in seconds so we can use it for picking a partition
if "=" not in arg:
if i + 1 >= len(args):
raise ValueError(
f"No value supplied for Slurm {arg} argument"
)
i += 1
time_string = args[i]
else:
time_string = arg.split("=", 1)[1]
time_string, i = parse_slurm_option_value(args, i)
time_limit = parse_slurm_time(time_string)
elif is_partition_option(arg):
# Capture the partition so we can run checks on it and know not to assign one
if "=" not in arg:
if i + 1 >= len(args):
raise ValueError(
f"No value supplied for Slurm {arg} argument"
)
i += 1
partition = args[i]
else:
partition = arg.split("=", 1)[1]
partition, i = parse_slurm_option_value(args, i)
elif is_qos_option(arg):
# Capture the QOS so we can avoid assigning one on top of it
qos, i = parse_slurm_option_value(args, i)
else:
# Other arguments pass through.
sbatch_line.append(arg)
Expand Down Expand Up @@ -1013,12 +1012,23 @@ def prepareSbatch(
# Pick a partition based on time limit
partition = self.boss.partitions.get_partition(time_limit)

if qos is None:
# Apply a configured QOS if one wasn't already supplied.
gpu_qos_override: str | None = self.boss.config.slurm_gpu_qos # type: ignore[attr-defined]
qos_override: str | None = self.boss.config.slurm_qos # type: ignore[attr-defined]
if gpus and gpu_qos_override:
qos = gpu_qos_override
elif qos_override:
qos = qos_override

# Now generate all the arguments
if len(export_list) > 0:
# add --export to the sbatch
sbatch_line.append("--export=" + ",".join(export_list))
if partition is not None:
sbatch_line.append(f"--partition={partition}")
if qos is not None:
sbatch_line.append(f"--qos={qos}")
if gpus:
# Generate GPU assignment argument
sbatch_line.append(f"--gres=gpu:{gpus}")
Expand Down Expand Up @@ -1178,13 +1188,27 @@ def add_options(cls, parser: ArgumentParser | _ArgumentGroup) -> None:
env_var="TOIL_SLURM_PARTITION",
help="Partition to send Slurm jobs to.",
)
parser.add_argument(
"--slurmQOS",
dest="slurm_qos",
default=None,
env_var="TOIL_SLURM_QOS",
help="Quality Of Service to request for Slurm jobs.",
)
parser.add_argument(
"--slurmGPUPartition",
dest="slurm_gpu_partition",
default=None,
env_var="TOIL_SLURM_GPU_PARTITION",
help="Partition to send Slurm jobs to if they ask for GPUs.",
)
parser.add_argument(
"--slurmGPUQOS",
dest="slurm_gpu_qos",
default=None,
env_var="TOIL_SLURM_GPU_QOS",
help="Quality Of Service to request for Slurm jobs if they ask for GPUs.",
)
parser.add_argument(
"--slurmPE",
dest="slurm_pe",
Expand All @@ -1208,6 +1232,8 @@ def setOptions(cls, setOption: OptionSetter) -> None:
setOption("slurm_default_all_mem")
setOption("slurm_time")
setOption("slurm_partition")
setOption("slurm_qos")
setOption("slurm_gpu_partition")
setOption("slurm_gpu_qos")
setOption("slurm_pe")
setOption("slurm_args")
13 changes: 13 additions & 0 deletions src/toil/test/batchSystems/test_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,19 @@ def test_any_option_detector(self):
self.assertFalse(detector("--no-bazz"))
self.assertFalse(detector("--foo-bar=--bazz-only"))

def test_parse_slurm_option_value(self):
args = ["--qos", "high", "--partition=gpu"]
value, index = toil.batchSystems.slurm.parse_slurm_option_value(args, 0)
self.assertEqual(value, "high")
self.assertEqual(index, 1)

value, index = toil.batchSystems.slurm.parse_slurm_option_value(args, 2)
self.assertEqual(value, "gpu")
self.assertEqual(index, 2)

with self.assertRaises(ValueError):
toil.batchSystems.slurm.parse_slurm_option_value(["--qos"], 0)

def test_sinfo_not_permitted(self):
"""Test when ``sinfo`` is not available.

Expand Down
Loading