diff --git a/docs/joining-tasks.md b/docs/joining-tasks.md new file mode 100644 index 00000000..8ff0d83a --- /dev/null +++ b/docs/joining-tasks.md @@ -0,0 +1,81 @@ +# Joining tasks + +Spider provides two methods for joining tasks together: + +1. Binding the output(s) of one task to the input(s) of another. +2. Starting a task from within another task. + +(1) is useful when you simply want to assemble a directed acyclic graph (DAG) of tasks. (2) is +useful when you want a task to act like a client itself, running, monitoring, and collecting the +output of Spider jobs. + +## Binding tasks together + +The `spider::Driver::bind` function allows you to bind two or more tasks together. For example, we +can compute the hypotenuse of a right-angle triangle using the DAG of tasks shown in Figure 1 below. + +```mermaid +flowchart TD + square1["square(value) -> int"] + square2["square(value) -> int"] + square_root["square_root(value) -> int"] + sum["sum(x, y) -> int"] + a((a)) + b((b)) + asquared(("a2")) + bsquared(("b2")) + asquaredplusbsquared(("a2 + b2")) + h((hypotenuse)) + + a --> square1 --> asquared --> sum + b --> square2 --> bsquared --> sum + sum --> asquaredplusbsquared --> square_root --> h +``` + +*Figure 1: A DAG of tasks to calculate the hypotenuse of a right-angle triangle. Square blocks +represent tasks and circular blocks represent values.* + +This DAG of tasks is implemented in `examples/joining-tasks/src/`. + +> [!NOTE] +> To build and run the example, you can follow the steps from the quick-start guide, but from inside +> the `examples/joining-tasks` directory. + +`spider::Driver::bind` takes two or more parameters as input: + +- The first parameter is the *target* task or `TaskGraph` that will take (as inputs) the outputs of + any tasks or `TaskGraph`s bound to it. For brevity, we’ll collectively refer to tasks and + `TaskGraph`s as *runnables*. +- Each subsequent parameter is a either a *source* runnable, or a value that conforms to the + `Serializable` or `Data` interfaces. +- If the parameter is a runnable, its outputs will be passed to the inputs of the target runnable + (which is why we call it a source runnable). + +In the example: + +- We first use `bind` to bind the outputs of two `square` tasks to the inputs of the `sum` tasks. + This invocation returns a `TaskGraph` that we store in `sum_of_squares_task_graph`. +- Next, we use `bind` again to bind the output of `sum_of_squares_task_graph` to the input of the + `square_root` task, storing the result in `hypotenuse_task_graph`. `hypotenuse_task_graph` + represents the DAG in Figure 1. +- Finally, we submit `hypotenuse_task_graph` for execution with the inputs `4` & `5`. + +### Ordering of bound inputs + +Notice that the values we pass to `spider::Driver::bind` and `spider::Driver::start` are distributed +to the inputs of the target runnable, from left-to-right. In the example, the output of the first +`square` task is passed to the first input of `sum`, and likewise for the second `square` task and +input. Similarly, in `spider::Driver::start`, `4` is passed to the left `square` task and `5` is +passed to the right `square` task. + +> [!NOTE] +> Unlike `std::bind`, `spider::Driver::bind` doesn’t support placeholder inputs. + +> +> + +## Nesting tasks + +To run a task from within another task, you can use the task’s`TaskContext` parameter (the first +parameter) similar to how we use `spider::Driver`. Specifically, both have `bind` and `start` +methods with equivalent parameters and return values. diff --git a/docs/quick-start.md b/docs/quick-start.md index 96cc07cc..e0d63b1c 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -202,8 +202,9 @@ storage backend URL in the command. # Next steps -In future guides, we'll explain how to write more complex tasks, as well as how to leverage Spider's -support for fault tolerance. +The following guides describe how to leverage Spider to implement more advanced functionality: + +* [Joining tasks](./joining-tasks.md) [Docker]: https://docs.docker.com/engine/install/ [docker-non-root]: https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user diff --git a/examples/joining-tasks/.clang-format b/examples/joining-tasks/.clang-format new file mode 100644 index 00000000..0cde5f4f --- /dev/null +++ b/examples/joining-tasks/.clang-format @@ -0,0 +1,17 @@ +BasedOnStyle: "InheritParentConfig" + +IncludeCategories: + # NOTE: A header is grouped by first matching regex + # Project headers + - Regex: "^\"" + Priority: 4 + # Library headers. Update when adding new libraries. + # NOTE: clang-format retains leading white-space on a line in violation of the YAML spec. + - Regex: "^<(spider)" + Priority: 3 + # C system headers + - Regex: "^<.+\\.h>" + Priority: 1 + # C++ standard libraries + - Regex: "^<.+>" + Priority: 2 diff --git a/examples/joining-tasks/CMakeLists.txt b/examples/joining-tasks/CMakeLists.txt new file mode 100644 index 00000000..9d6a5113 --- /dev/null +++ b/examples/joining-tasks/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.22.1) +project(spider_joining_tasks) + +# Add the Spider library +add_subdirectory(../../ spider EXCLUDE_FROM_ALL) + +# Add the tasks library +add_library( + tasks + SHARED + src/tasks.cpp + src/tasks.hpp +) + +# Link the Spider library to the tasks library +target_link_libraries(tasks PRIVATE spider::spider) + +# Add the client +add_executable(client src/client.cpp) + +# Link the Spider and tasks library to the client +target_link_libraries( + client + PRIVATE + spider::spider + tasks +) diff --git a/examples/joining-tasks/src/client.cpp b/examples/joining-tasks/src/client.cpp new file mode 100644 index 00000000..26432f7e --- /dev/null +++ b/examples/joining-tasks/src/client.cpp @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#include + +#include "tasks.hpp" + +namespace { +/** + * @tparam JobOutputType + * @param job + * @param expected + * @return Whether the job was successful. + */ +template +auto validate_job_output(spider::Job& job, JobOutputType const& expected) -> bool { + switch (auto job_status = job.get_status()) { + case spider::JobStatus::Succeeded: { + auto result = job.get_result(); + if (expected == result) { + return true; + } + std::cerr << "job returned unexpected result. Expected: " << expected + << ". Actual: " << result << '\n'; + return false; + } + case spider::JobStatus::Failed: { + std::pair const error_and_fn_name = job.get_error(); + std::cerr << "Job failed in function " << error_and_fn_name.second << " - " + << error_and_fn_name.first << '\n'; + return false; + } + default: + std::cerr << "Job is in unexpected state - " + << static_cast>(job_status) + << '\n'; + return false; + } +} +} // namespace + +auto main(int argc, char const* argv[]) -> int { + // Parse the storage backend URL from the command line arguments + if (argc < 2) { + std::cerr << "Usage: ./client " << '\n'; + return 1; + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + std::string const storage_url{argv[1]}; + if (storage_url.empty()) { + std::cerr << "storage-backend-url cannot be empty." << '\n'; + return 1; + } + + // Create a driver that connects to the Spider cluster + spider::Driver driver{storage_url}; + + auto sum_of_squares_task_graph = driver.bind(&sum, &square, &square); + auto hypotenuse_task_graph = driver.bind(&square_root, &sum_of_squares_task_graph); + + // Submit the task graph for execution + int const a = 4; + int const b = 5; + auto job = driver.start(hypotenuse_task_graph, a, b); + + job.wait_complete(); + + if (false == validate_job_output(job, std::sqrt((a * a) + (b * b)))) { + return 1; + } + + return 0; +} diff --git a/examples/joining-tasks/src/tasks.cpp b/examples/joining-tasks/src/tasks.cpp new file mode 100644 index 00000000..9180b49c --- /dev/null +++ b/examples/joining-tasks/src/tasks.cpp @@ -0,0 +1,25 @@ +#include "tasks.hpp" + +#include + +#include + +auto square(spider::TaskContext&, int value) -> int { + return value * value; +} + +auto square_root(spider::TaskContext&, int value) -> double { + return std::sqrt(value); +} + +auto sum(spider::TaskContext&, int x, int y) -> int { + return x + y; +} + +// Register the tasks +// NOLINTBEGIN(cert-err58-cpp) +SPIDER_REGISTER_TASK(square); +SPIDER_REGISTER_TASK(square_root); +SPIDER_REGISTER_TASK(sum); + +// NOLINTEND(cert-err58-cpp) diff --git a/examples/joining-tasks/src/tasks.hpp b/examples/joining-tasks/src/tasks.hpp new file mode 100644 index 00000000..8881fdba --- /dev/null +++ b/examples/joining-tasks/src/tasks.hpp @@ -0,0 +1,28 @@ +#ifndef TASKS_HPP +#define TASKS_HPP + +#include + +/** + * @param context + * @param value + * @return The square of the given value. + */ +auto square(spider::TaskContext& context, int value) -> int; + +/** + * @param context + * @param value + * @return The square root of the given value. + */ +auto square_root(spider::TaskContext& context, int value) -> double; + +/** + * @param context + * @param x + * @param y + * @return The sum of x and y. + */ +auto sum(spider::TaskContext& context, int x, int y) -> int; + +#endif // TASKS_HPP