From f959ef35ed1507a1c1f93441d50e1926a14309d0 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 18:52:58 +0000 Subject: [PATCH 01/64] Add job cancel storage call --- src/spider/storage/MetadataStorage.hpp | 8 +++++++ src/spider/storage/mysql/MySqlStorage.cpp | 29 +++++++++++++++++++++++ src/spider/storage/mysql/MySqlStorage.hpp | 1 + 3 files changed, 38 insertions(+) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 91f733b7..69dd3a41 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -72,6 +72,14 @@ class MetadataStorage { std::vector* job_ids ) -> StorageErr = 0; + /** + * Cancel a job. This will set the job state to CANCELED and set all tasks that have not + * finished or started to CANCELED. + * @param conn + * @param id The job id. + * @return The error code. + */ + virtual auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto add_child(StorageConnection& conn, boost::uuids::uuid parent_id, Task const& child) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index 79df41e9..626782e0 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1053,6 +1053,35 @@ auto MySqlMetadataStorage::get_jobs_by_client_id( return StorageErr{}; } +auto MySqlMetadataStorage::cancel_job(StorageConnection& conn, boost::uuids::uuid const id) + -> StorageErr { + try { + // Set all pending/ready/running tasks from the job to cancelled + std::unique_ptr task_statement( + static_cast(conn)->prepareStatement( + "UPDATE `tasks` SET `state` = 'cancel' WHERE `job_id` = ? AND " + "`state` IN ('pending', 'ready', 'running')" + ) + ); + sql::bytes id_bytes = uuid_get_bytes(id); + task_statement->setBytes(1, &id_bytes); + task_statement->executeUpdate(); + // Set job state to cancelled + std::unique_ptr job_statement( + static_cast(conn)->prepareStatement( + "UPDATE `jobs` SET `state` = 'cancel' WHERE `id` = ?" + ) + ); + job_statement->setBytes(1, &id_bytes); + job_statement->executeUpdate(); + } catch (sql::SQLException& e) { + static_cast(conn)->rollback(); + return StorageErr{StorageErrType::OtherErr, e.what()}; + } + static_cast(conn)->commit(); + return StorageErr{}; +} + auto MySqlMetadataStorage::remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr { try { diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index 2dcc30f4..4aa80453 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -71,6 +71,7 @@ class MySqlMetadataStorage : public MetadataStorage { boost::uuids::uuid client_id, std::vector* job_ids ) -> StorageErr override; + auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto add_child(StorageConnection& conn, boost::uuids::uuid parent_id, Task const& child) From 8a3ef43b5cb1a4d5968931caed04664bf67ccc49 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 19:11:02 +0000 Subject: [PATCH 02/64] Add tests for job cancel storage function --- tests/storage/test-MetadataStorage.cpp | 75 ++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index a685b16d..6bea4c2e 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -442,6 +442,81 @@ TEMPLATE_LIST_TEST_CASE("Task finish", "[storage]", spider::test::StorageFactory REQUIRE(storage->remove_job(*conn, job_id).success()); } +TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryTypeList) { + std::unique_ptr storage_factory + = spider::test::create_storage_factory(); + std::unique_ptr storage + = storage_factory->provide_metadata_storage(); + + std::variant, spider::core::StorageErr> + conn_result = storage_factory->provide_storage_connection(); + REQUIRE(std::holds_alternative>(conn_result)); + auto conn = std::move(std::get>(conn_result)); + + boost::uuids::random_generator gen; + boost::uuids::uuid const job_id = gen(); + + // Create a complicated task graph + spider::core::Task child_task{"child"}; + spider::core::Task parent_1{"p1"}; + spider::core::Task parent_2{"p2"}; + parent_1.add_input(spider::core::TaskInput{"1", "float"}); + parent_1.add_input(spider::core::TaskInput{"2", "float"}); + parent_2.add_input(spider::core::TaskInput{"3", "int"}); + parent_2.add_input(spider::core::TaskInput{"4", "int"}); + parent_1.add_output(spider::core::TaskOutput{"float"}); + parent_2.add_output(spider::core::TaskOutput{"int"}); + child_task.add_input(spider::core::TaskInput{parent_1.get_id(), 0, "float"}); + child_task.add_input(spider::core::TaskInput{parent_2.get_id(), 0, "int"}); + child_task.add_output(spider::core::TaskOutput{"float"}); + parent_1.set_max_retries(1); + parent_2.set_max_retries(1); + child_task.set_max_retries(1); + spider::core::TaskGraph graph; + // Add task and dependencies to task graph in wrong order + graph.add_task(child_task); + graph.add_task(parent_1); + graph.add_task(parent_2); + graph.add_dependency(parent_2.get_id(), child_task.get_id()); + graph.add_dependency(parent_1.get_id(), child_task.get_id()); + graph.add_input_task(parent_1.get_id()); + graph.add_input_task(parent_2.get_id()); + graph.add_output_task(child_task.get_id()); + // Submit job should success + REQUIRE(storage->add_job(*conn, job_id, gen(), graph).success()); + + // Task finish for parent 1 should succeed + spider::core::TaskInstance const parent_1_instance{gen(), parent_1.get_id()}; + REQUIRE(storage->set_task_state(*conn, parent_1.get_id(), spider::core::TaskState::Running) + .success()); + REQUIRE(storage->task_finish( + *conn, + parent_1_instance, + {spider::core::TaskOutput{"1.1", "float"}} + ) + .success()); + + // Cancel job should succeed + REQUIRE(storage->cancel_job(*conn, job_id).success()); + // Job status should be cancelled + spider::core::JobStatus job_status = spider::core::JobStatus::Running; + REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); + REQUIRE(spider::core::JobStatus::Cancelled == job_status); + + // Parent 1 state should be success + spider::core::Task task_res{""}; + REQUIRE(storage->get_task(*conn, parent_1.get_id(), &task_res).success()); + REQUIRE(spider::core::TaskState::Succeed == task_res.get_state()); + // Parent 2 and child states should be cancelled + REQUIRE(storage->get_task(*conn, parent_2.get_id(), &task_res).success()); + REQUIRE(spider::core::TaskState::Canceled == task_res.get_state()); + REQUIRE(storage->get_task(*conn, child_task.get_id(), &task_res).success()); + REQUIRE(spider::core::TaskState::Canceled == task_res.get_state()); + + // Clean up + REQUIRE(storage->remove_job(*conn, job_id).success()); +} + TEMPLATE_LIST_TEST_CASE("Job reset", "[storage]", spider::test::StorageFactoryTypeList) { std::unique_ptr storage_factory = spider::test::create_storage_factory(); From a79dff146a52b833ad4ec54cc3c0d3ad9c283d25 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 19:25:15 +0000 Subject: [PATCH 03/64] Add get_task_state --- src/spider/storage/MetadataStorage.hpp | 3 +++ src/spider/storage/mysql/MySqlStorage.cpp | 33 +++++++++++++++++++++++ src/spider/storage/mysql/MySqlStorage.hpp | 2 ++ tests/storage/test-MetadataStorage.cpp | 14 +++++----- 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 69dd3a41..609d59dd 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -100,6 +100,9 @@ class MetadataStorage { virtual auto set_task_state(StorageConnection& conn, boost::uuids::uuid id, TaskState state) -> StorageErr = 0; + virtual auto get_task_state(StorageConnection& conn, boost::uuids::uuid id, TaskState* state) + -> StorageErr + = 0; virtual auto set_task_running(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto add_task_instance(StorageConnection& conn, TaskInstance const& instance) -> StorageErr diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index 626782e0..f584fd11 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1428,6 +1428,39 @@ auto MySqlMetadataStorage::set_task_state( return StorageErr{}; } +auto MySqlMetadataStorage::get_task_state( + StorageConnection& conn, + boost::uuids::uuid const id, + TaskState* state +) -> StorageErr { + try { + // Get the state of the task + std::unique_ptr statement( + static_cast(conn)->prepareStatement( + "SELECT `state` FROM `tasks` WHERE `id` = ?" + ) + ); + sql::bytes id_bytes = uuid_get_bytes(id); + statement->setBytes(1, &id_bytes); + std::unique_ptr const res(statement->executeQuery()); + if (res->rowsCount() == 0) { + static_cast(conn)->commit(); + return StorageErr{ + StorageErrType::KeyNotFoundErr, + fmt::format("No task with id {} ", boost::uuids::to_string(id)) + }; + } + res->next(); + std::string const state_str = get_sql_string(res->getString("state")); + *state = string_to_task_state(state_str); + } catch (sql::SQLException& e) { + static_cast(conn)->rollback(); + return StorageErr{StorageErrType::OtherErr, e.what()}; + } + static_cast(conn)->commit(); + return StorageErr{}; +} + auto MySqlMetadataStorage::set_task_running(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr { try { diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index 4aa80453..d595b38b 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -87,6 +87,8 @@ class MySqlMetadataStorage : public MetadataStorage { ) -> StorageErr override; auto set_task_state(StorageConnection& conn, boost::uuids::uuid id, TaskState state) -> StorageErr override; + auto get_task_state(StorageConnection& conn, boost::uuids::uuid id, TaskState* state) + -> StorageErr override; auto set_task_running(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto add_task_instance(StorageConnection& conn, TaskInstance const& instance) -> StorageErr override; diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index 6bea4c2e..eb3c469d 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -504,14 +504,14 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT REQUIRE(spider::core::JobStatus::Cancelled == job_status); // Parent 1 state should be success - spider::core::Task task_res{""}; - REQUIRE(storage->get_task(*conn, parent_1.get_id(), &task_res).success()); - REQUIRE(spider::core::TaskState::Succeed == task_res.get_state()); + spider::core::TaskState task_state = spider::core::TaskState::Running; + REQUIRE(storage->get_task_state(*conn, parent_1.get_id(), &task_state).success()); + REQUIRE(spider::core::TaskState::Succeed == task_state); // Parent 2 and child states should be cancelled - REQUIRE(storage->get_task(*conn, parent_2.get_id(), &task_res).success()); - REQUIRE(spider::core::TaskState::Canceled == task_res.get_state()); - REQUIRE(storage->get_task(*conn, child_task.get_id(), &task_res).success()); - REQUIRE(spider::core::TaskState::Canceled == task_res.get_state()); + REQUIRE(storage->get_task_state(*conn, parent_2.get_id(), &task_state).success()); + REQUIRE(spider::core::TaskState::Canceled == task_state); + REQUIRE(storage->get_task_state(*conn, child_task.get_id(), &task_state).success()); + REQUIRE(spider::core::TaskState::Canceled == task_state); // Clean up REQUIRE(storage->remove_job(*conn, job_id).success()); From 80e27cb62c95b31970b6cae40ddb7523c6bb2811 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 20:06:31 +0000 Subject: [PATCH 04/64] Add cancel messages in storage --- src/spider/storage/MetadataStorage.hpp | 10 +++++++--- src/spider/storage/mysql/MySqlStorage.cpp | 16 ++++++++++++++-- src/spider/storage/mysql/MySqlStorage.hpp | 3 ++- src/spider/storage/mysql/mysql_stmt.hpp | 12 ++++++++++-- tests/storage/test-MetadataStorage.cpp | 3 ++- tools/scripts/storage/init_db.sql | 6 ++++++ 6 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 609d59dd..d2a15fcd 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -73,13 +73,17 @@ class MetadataStorage { ) -> StorageErr = 0; /** - * Cancel a job. This will set the job state to CANCELED and set all tasks that have not - * finished or started to CANCELED. + * Cancel a job. This will set the job state to CANCEL and set all tasks that have not + * finished or started to CANCEL. * @param conn * @param id The job id. + * @param message The error message of the cancellation. * @return The error code. */ - virtual auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; + virtual auto + cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) + -> StorageErr + = 0; virtual auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto add_child(StorageConnection& conn, boost::uuids::uuid parent_id, Task const& child) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index f584fd11..b52838fd 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1053,8 +1053,11 @@ auto MySqlMetadataStorage::get_jobs_by_client_id( return StorageErr{}; } -auto MySqlMetadataStorage::cancel_job(StorageConnection& conn, boost::uuids::uuid const id) - -> StorageErr { +auto MySqlMetadataStorage::cancel_job( + StorageConnection& conn, + boost::uuids::uuid const id, + std::string const& message +) -> StorageErr { try { // Set all pending/ready/running tasks from the job to cancelled std::unique_ptr task_statement( @@ -1074,6 +1077,15 @@ auto MySqlMetadataStorage::cancel_job(StorageConnection& conn, boost::uuids::uui ); job_statement->setBytes(1, &id_bytes); job_statement->executeUpdate(); + // Set the cancel message + std::unique_ptr msg_statemement( + static_cast(conn)->prepareStatement( + "UPDATE `jobs` SET `message` = ? WHERE `id` = ?" + ) + ); + msg_statemement->setString(1, message); + msg_statemement->setBytes(2, &id_bytes); + msg_statemement->executeUpdate(); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); return StorageErr{StorageErrType::OtherErr, e.what()}; diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index d595b38b..50b64300 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -71,7 +71,8 @@ class MySqlMetadataStorage : public MetadataStorage { boost::uuids::uuid client_id, std::vector* job_ids ) -> StorageErr override; - auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; + auto cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) + -> StorageErr override; auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto add_child(StorageConnection& conn, boost::uuids::uuid parent_id, Task const& child) diff --git a/src/spider/storage/mysql/mysql_stmt.hpp b/src/spider/storage/mysql/mysql_stmt.hpp index f699bf51..136826cf 100644 --- a/src/spider/storage/mysql/mysql_stmt.hpp +++ b/src/spider/storage/mysql/mysql_stmt.hpp @@ -32,6 +32,13 @@ std::string const cCreateJobTable = R"(CREATE TABLE IF NOT EXISTS jobs ( PRIMARY KEY (`id`) ))"; +std::string const cCreateJobErrorTable = R"(CREATE TABLE IF NOT EXISTS `job_errors` ( + `job_id` BINARY(16) NOT NULL, + `message` VARCHAR(999) NOT NULL, + CONSTRAINT `job_error_job_id` FOREIGN KEY (`job_id`) REFERENCES `jobs` (`id`) ON UPDATE NO ACTION ON DELETE CASCADE, + PRIMARY KEY (`job_id`) +))"; + std::string const cCreateTaskTable = R"(CREATE TABLE IF NOT EXISTS tasks ( `id` BINARY(16) NOT NULL, `job_id` BINARY(16) NOT NULL, @@ -167,10 +174,11 @@ std::string const cCreateTaskKVDataTable = R"(CREATE TABLE IF NOT EXISTS `task_k CONSTRAINT `kv_data_task_id` FOREIGN KEY (`task_id`) REFERENCES `tasks` (`id`) ON UPDATE NO ACTION ON DELETE CASCADE ))"; -std::array const cCreateStorage = { +std::array const cCreateStorage = { cCreateDriverTable, // drivers table must be created before data_ref_driver cCreateSchedulerTable, - cCreateJobTable, // jobs table must be created before task + cCreateJobTable, // jobs table must be created before task and job_error + cCreateJobErrorTable, cCreateTaskTable, // tasks table must be created before data_ref_task cCreateDataTable, // data table must be created before task_outputs cCreateDataLocalityTable, diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index eb3c469d..ce676eea 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -497,7 +497,8 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT .success()); // Cancel job should succeed - REQUIRE(storage->cancel_job(*conn, job_id).success()); + std::string const error_message = "test error message"; + REQUIRE(storage->cancel_job(*conn, job_id, error_message).success()); // Job status should be cancelled spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); diff --git a/tools/scripts/storage/init_db.sql b/tools/scripts/storage/init_db.sql index d56f49c9..61afbd96 100644 --- a/tools/scripts/storage/init_db.sql +++ b/tools/scripts/storage/init_db.sql @@ -23,6 +23,12 @@ CREATE TABLE IF NOT EXISTS jobs INDEX idx_jobs_state (`state`), PRIMARY KEY (`id`) ); +CREATE TABLE IF NOT EXISTS `job_errors` ( + `job_id` BINARY(16) NOT NULL, + `message` VARCHAR(999) NOT NULL, + CONSTRAINT `job_error_job_id` FOREIGN KEY (`job_id`) REFERENCES `jobs` (`id`) ON UPDATE NO ACTION ON DELETE CASCADE, + PRIMARY KEY (`job_id`) +); CREATE TABLE IF NOT EXISTS tasks ( `id` BINARY(16) NOT NULL, From f7fa1d70691807c9abb369ad147db8d5ba153c7d Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 20:14:13 +0000 Subject: [PATCH 05/64] Add get message function in storage --- src/spider/storage/MetadataStorage.hpp | 11 +++++++ src/spider/storage/mysql/MySqlStorage.cpp | 36 ++++++++++++++++++++--- src/spider/storage/mysql/MySqlStorage.hpp | 2 ++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index d2a15fcd..7eb99140 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -84,6 +84,17 @@ class MetadataStorage { cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr = 0; + /** + * Get the error message of a cancelled job. + * @param conn + * @param id The job id. + * @param message The error message of the cancellation. + * @return The error code. + */ + virtual auto + get_job_message(StorageConnection& conn, boost::uuids::uuid id, std::string* message) + -> StorageErr + = 0; virtual auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto add_child(StorageConnection& conn, boost::uuids::uuid parent_id, Task const& child) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index b52838fd..0e592d5d 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1078,14 +1078,42 @@ auto MySqlMetadataStorage::cancel_job( job_statement->setBytes(1, &id_bytes); job_statement->executeUpdate(); // Set the cancel message - std::unique_ptr msg_statemement( + std::unique_ptr message_statement( static_cast(conn)->prepareStatement( "UPDATE `jobs` SET `message` = ? WHERE `id` = ?" ) ); - msg_statemement->setString(1, message); - msg_statemement->setBytes(2, &id_bytes); - msg_statemement->executeUpdate(); + message_statement->setString(1, message); + message_statement->setBytes(2, &id_bytes); + message_statement->executeUpdate(); + } catch (sql::SQLException& e) { + static_cast(conn)->rollback(); + return StorageErr{StorageErrType::OtherErr, e.what()}; + } + static_cast(conn)->commit(); + return StorageErr{}; +} + +auto MySqlMetadataStorage::get_job_message( + StorageConnection& conn, + boost::uuids::uuid const id, + std::string* message +) -> StorageErr { + try { + std::unique_ptr statement{ + static_cast(conn)->prepareStatement( + "SELECT `message` FROM `job_errors` WHERE `job_id` = ?" + ) + }; + sql::bytes id_bytes = uuid_get_bytes(id); + statement->setBytes(1, &id_bytes); + std::unique_ptr const res{statement->executeQuery()}; + if (res->rowsCount() == 0) { + static_cast(conn)->commit(); + return StorageErr{StorageErrType::KeyNotFoundErr, "No messages found"}; + } + res->next(); + *message = get_sql_string(res->getString("message")); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); return StorageErr{StorageErrType::OtherErr, e.what()}; diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index 50b64300..7ade6feb 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -73,6 +73,8 @@ class MySqlMetadataStorage : public MetadataStorage { ) -> StorageErr override; auto cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr override; + auto get_job_message(StorageConnection& conn, boost::uuids::uuid id, std::string* message) + -> StorageErr override; auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto add_child(StorageConnection& conn, boost::uuids::uuid parent_id, Task const& child) From 9aced177c0133a4e9840bf24bb996f611a5c69a0 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 20:20:48 +0000 Subject: [PATCH 06/64] Fix job error message --- src/spider/storage/mysql/MySqlStorage.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index 0e592d5d..b9c4dd7a 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1080,11 +1080,11 @@ auto MySqlMetadataStorage::cancel_job( // Set the cancel message std::unique_ptr message_statement( static_cast(conn)->prepareStatement( - "UPDATE `jobs` SET `message` = ? WHERE `id` = ?" + "INSERT INTO `job_errors` (`job_id`, `message`) VALUES (?, ?) " ) ); - message_statement->setString(1, message); - message_statement->setBytes(2, &id_bytes); + message_statement->setBytes(1, &id_bytes); + message_statement->setString(2, message); message_statement->executeUpdate(); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); From 2198185ced27646e6dc8adee48b46692a613f773 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 20:21:07 +0000 Subject: [PATCH 07/64] Add error message into tests --- tests/storage/test-MetadataStorage.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index ce676eea..755f1b4f 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -503,6 +503,10 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); + // Job error message should be set + std::string error_message_res; + REQUIRE(storage->get_job_message(*conn, job_id, &error_message_res).success()); + REQUIRE(error_message == error_message_res); // Parent 1 state should be success spider::core::TaskState task_state = spider::core::TaskState::Running; From 25e9b3f2a5cd0f94a9f4575996b47d6c648bdfc9 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 20:36:17 +0000 Subject: [PATCH 08/64] Add cancel_job_by_task --- src/spider/storage/MetadataStorage.hpp | 12 +++++ src/spider/storage/mysql/MySqlStorage.cpp | 56 +++++++++++++++++++++++ src/spider/storage/mysql/MySqlStorage.hpp | 3 ++ 3 files changed, 71 insertions(+) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 7eb99140..b85279a7 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -84,6 +84,18 @@ class MetadataStorage { cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr = 0; + /** + * Cancel a job that owns the task. This will set the job state to CANCEL and set all tasks + * that have not finished or started to CANCEL. + * @param conn + * @param id The task id. + * @param message The error message of the cancellation. + * @return The error code. + */ + virtual auto + cancel_job_by_task(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) + -> StorageErr + = 0; /** * Get the error message of a cancelled job. * @param conn diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index b9c4dd7a..4f66e898 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1094,6 +1094,62 @@ auto MySqlMetadataStorage::cancel_job( return StorageErr{}; } +auto MySqlMetadataStorage::cancel_job_by_task( + StorageConnection& conn, + boost::uuids::uuid id, + std::string const& message +) -> StorageErr { + try { + // Get job id + sql::bytes task_id_bytes = uuid_get_bytes(id); + std::unique_ptr statement( + static_cast(conn)->prepareStatement( + "SELECT `job_id` FROM `tasks` WHERE `id` = ?" + ) + ); + statement->setBytes(1, &task_id_bytes); + std::unique_ptr const res{statement->executeQuery()}; + if (res->rowsCount() == 0) { + static_cast(conn)->rollback(); + return StorageErr{StorageErrType::KeyNotFoundErr, "No task with id"}; + } + res->next(); + boost::uuids::uuid const job_id = read_id(res->getBinaryStream("job_id")); + sql::bytes job_id_bytes = uuid_get_bytes(job_id); + // Set all pending/ready/running tasks from the job to cancelled + std::unique_ptr task_statement( + static_cast(conn)->prepareStatement( + "UPDATE `tasks` SET `state` = 'cancel' WHERE `job_id` = ? AND " + "`state` IN ('pending', 'ready', 'running')" + ) + ); + task_statement->setBytes(1, &job_id_bytes); + task_statement->executeUpdate(); + // Set job state to cancelled + std::unique_ptr job_statement( + static_cast(conn)->prepareStatement( + "UPDATE `jobs` SET `state` = 'cancel' WHERE `id` = ?" + ) + ); + job_statement->setBytes(1, &job_id_bytes); + job_statement->executeUpdate(); + // Set the cancel message + std::unique_ptr message_statement( + static_cast(conn)->prepareStatement( + "INSERT INTO `job_errors` (`job_id`, `message`) VALUES (?, ?) " + ) + ); + message_statement->setBytes(1, &job_id_bytes); + message_statement->setString(2, message); + message_statement->executeUpdate(); + } catch (sql::SQLException& e) { + static_cast(conn)->rollback(); + return StorageErr{StorageErrType::OtherErr, e.what()}; + } + static_cast(conn)->commit(); + return StorageErr{}; +} + auto MySqlMetadataStorage::get_job_message( StorageConnection& conn, boost::uuids::uuid const id, diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index 7ade6feb..d6e0c95c 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -73,6 +73,9 @@ class MySqlMetadataStorage : public MetadataStorage { ) -> StorageErr override; auto cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr override; + auto + cancel_job_by_task(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) + -> StorageErr override; auto get_job_message(StorageConnection& conn, boost::uuids::uuid id, std::string* message) -> StorageErr override; auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; From 4292d461419c9483abe9ac932452585801ca5042 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 8 May 2025 20:40:26 +0000 Subject: [PATCH 09/64] Add tests for cancel_job_by_task --- tests/storage/test-MetadataStorage.cpp | 80 ++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index 755f1b4f..e6787513 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -522,6 +522,86 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT REQUIRE(storage->remove_job(*conn, job_id).success()); } +TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::StorageFactoryTypeList) { + std::unique_ptr storage_factory + = spider::test::create_storage_factory(); + std::unique_ptr storage + = storage_factory->provide_metadata_storage(); + + std::variant, spider::core::StorageErr> + conn_result = storage_factory->provide_storage_connection(); + REQUIRE(std::holds_alternative>(conn_result)); + auto conn = std::move(std::get>(conn_result)); + + boost::uuids::random_generator gen; + boost::uuids::uuid const job_id = gen(); + + // Create a complicated task graph + spider::core::Task child_task{"child"}; + spider::core::Task parent_1{"p1"}; + spider::core::Task parent_2{"p2"}; + parent_1.add_input(spider::core::TaskInput{"1", "float"}); + parent_1.add_input(spider::core::TaskInput{"2", "float"}); + parent_2.add_input(spider::core::TaskInput{"3", "int"}); + parent_2.add_input(spider::core::TaskInput{"4", "int"}); + parent_1.add_output(spider::core::TaskOutput{"float"}); + parent_2.add_output(spider::core::TaskOutput{"int"}); + child_task.add_input(spider::core::TaskInput{parent_1.get_id(), 0, "float"}); + child_task.add_input(spider::core::TaskInput{parent_2.get_id(), 0, "int"}); + child_task.add_output(spider::core::TaskOutput{"float"}); + parent_1.set_max_retries(1); + parent_2.set_max_retries(1); + child_task.set_max_retries(1); + spider::core::TaskGraph graph; + // Add task and dependencies to task graph in wrong order + graph.add_task(child_task); + graph.add_task(parent_1); + graph.add_task(parent_2); + graph.add_dependency(parent_2.get_id(), child_task.get_id()); + graph.add_dependency(parent_1.get_id(), child_task.get_id()); + graph.add_input_task(parent_1.get_id()); + graph.add_input_task(parent_2.get_id()); + graph.add_output_task(child_task.get_id()); + // Submit job should success + REQUIRE(storage->add_job(*conn, job_id, gen(), graph).success()); + + // Task finish for parent 1 should succeed + spider::core::TaskInstance const parent_1_instance{gen(), parent_1.get_id()}; + REQUIRE(storage->set_task_state(*conn, parent_1.get_id(), spider::core::TaskState::Running) + .success()); + REQUIRE(storage->task_finish( + *conn, + parent_1_instance, + {spider::core::TaskOutput{"1.1", "float"}} + ) + .success()); + + // Cancel job by task should succeed + std::string const error_message = "test error message"; + REQUIRE(storage->cancel_job_by_task(*conn, parent_2.get_id(), error_message).success()); + // Job status should be cancelled + spider::core::JobStatus job_status = spider::core::JobStatus::Running; + REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); + REQUIRE(spider::core::JobStatus::Cancelled == job_status); + // Job error message should be set + std::string error_message_res; + REQUIRE(storage->get_job_message(*conn, job_id, &error_message_res).success()); + REQUIRE(error_message == error_message_res); + + // Parent 1 state should be success + spider::core::TaskState task_state = spider::core::TaskState::Running; + REQUIRE(storage->get_task_state(*conn, parent_1.get_id(), &task_state).success()); + REQUIRE(spider::core::TaskState::Succeed == task_state); + // Parent 2 and child states should be cancelled + REQUIRE(storage->get_task_state(*conn, parent_2.get_id(), &task_state).success()); + REQUIRE(spider::core::TaskState::Canceled == task_state); + REQUIRE(storage->get_task_state(*conn, child_task.get_id(), &task_state).success()); + REQUIRE(spider::core::TaskState::Canceled == task_state); + + // Clean up + REQUIRE(storage->remove_job(*conn, job_id).success()); +} + TEMPLATE_LIST_TEST_CASE("Job reset", "[storage]", spider::test::StorageFactoryTypeList) { std::unique_ptr storage_factory = spider::test::create_storage_factory(); From 6a1f9e013ad4df15562178efaf40bfa3e7b2b6dc Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Fri, 9 May 2025 15:46:30 +0000 Subject: [PATCH 10/64] Add job cancel in client --- src/spider/client/Job.hpp | 15 ++++++++++++++- src/spider/client/TaskContext.cpp | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/spider/client/Job.hpp b/src/spider/client/Job.hpp index c79b6d66..0f8d9fe4 100644 --- a/src/spider/client/Job.hpp +++ b/src/spider/client/Job.hpp @@ -82,7 +82,20 @@ class Job { * * @throw spider::ConnectionException */ - auto cancel(); + auto cancel() -> void { + std::variant, core::StorageErr> conn_result + = m_storage_factory->provide_storage_connection(); + if (std::holds_alternative(conn_result)) { + throw ConnectionException(std::get(conn_result).description); + } + auto conn = std::move(std::get>(conn_result)); + + core::StorageErr const err + = m_metadata_storage->cancel_job(*conn, m_id, "Job cancelled by user"); + if (!err.success()) { + throw ConnectionException(err.description); + } + } /** * @return Status of the job. diff --git a/src/spider/client/TaskContext.cpp b/src/spider/client/TaskContext.cpp index 75dc6474..900ae936 100644 --- a/src/spider/client/TaskContext.cpp +++ b/src/spider/client/TaskContext.cpp @@ -69,4 +69,19 @@ auto TaskContext::get_jobs() -> std::vector { } return job_ids; } + +auto TaskContext::abort(std::string const& message) -> void { + std::variant, core::StorageErr> conn_result + = m_storage_factory->provide_storage_connection(); + if (std::holds_alternative(conn_result)) { + throw ConnectionException(std::get(conn_result).description); + } + auto conn = std::move(std::get>(conn_result)); + + core::StorageErr const err = m_metadata_store->cancel_job_by_task(*conn, m_task_id, message); + if (!err.success()) { + throw ConnectionException(err.description); + } + std::quick_exit(1); +} } // namespace spider From e4c8f99544e8d5137730c2c5e0581a6f4caae73f Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 10:25:10 -0400 Subject: [PATCH 11/64] Do not fail the task if it is not running --- src/spider/storage/mysql/MySqlStorage.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index 4f66e898..ecbb420f 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1840,7 +1840,8 @@ auto MySqlMetadataStorage::task_fail( // Set the task fail if the last task instance fails std::unique_ptr const task_statement( static_cast(conn)->prepareStatement( - "UPDATE `tasks` SET `state` = 'fail' WHERE `id` = ?" + "UPDATE `tasks` SET `state` = 'fail' WHERE `id` = ? AND `state` = " + "'running'" ) ); task_statement->setBytes(1, &task_id_bytes); @@ -1849,7 +1850,7 @@ auto MySqlMetadataStorage::task_fail( std::unique_ptr const job_statement( static_cast(conn)->prepareStatement( "UPDATE `jobs` SET `state` = 'fail' WHERE `id` = (SELECT `job_id` FROM " - "`tasks` WHERE `id` = ?)" + "`tasks` WHERE `id` = ?) AND `state` = 'running'" ) ); job_statement->setBytes(1, &task_id_bytes); From 127cd7458c41e559c01ae0125ed479ecb0cb9aa1 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 11:01:44 -0400 Subject: [PATCH 12/64] Fix clang-tidy --- src/spider/client/TaskContext.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spider/client/TaskContext.cpp b/src/spider/client/TaskContext.cpp index 900ae936..d0ea5d08 100644 --- a/src/spider/client/TaskContext.cpp +++ b/src/spider/client/TaskContext.cpp @@ -1,5 +1,6 @@ #include "TaskContext.hpp" +#include #include #include #include From d964d67106203256882f9f3b7f401525d19edc6a Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 11:07:30 -0400 Subject: [PATCH 13/64] Add task executor cancel state check --- src/spider/worker/TaskExecutor.cpp | 13 +++++++++---- src/spider/worker/TaskExecutor.hpp | 9 +++++---- src/spider/worker/worker.cpp | 11 ++++++++++- tests/worker/test-TaskExecutor.cpp | 10 +++++----- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/spider/worker/TaskExecutor.cpp b/src/spider/worker/TaskExecutor.cpp index b4a9f7ff..2dec351e 100644 --- a/src/spider/worker/TaskExecutor.cpp +++ b/src/spider/worker/TaskExecutor.cpp @@ -21,27 +21,32 @@ auto TaskExecutor::get_pid() const -> pid_t { return m_process->get_pid(); } -auto TaskExecutor::completed() -> bool { +auto TaskExecutor::is_completed() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Succeed == m_state || TaskExecutorState::Error == m_state || TaskExecutorState::Cancelled == m_state; } -auto TaskExecutor::waiting() -> bool { +auto TaskExecutor::is_waiting() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Waiting == m_state; } -auto TaskExecutor::succeed() -> bool { +auto TaskExecutor::is_succeeded() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Succeed == m_state; } -auto TaskExecutor::error() -> bool { +auto TaskExecutor::is_error() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Error == m_state; } +auto TaskExecutor::is_cancelled() -> bool { + std::lock_guard const lock(m_state_mutex); + return TaskExecutorState::Cancelled == m_state; +} + void TaskExecutor::wait() { int const exit_code = m_process->wait(); if (exit_code != 0) { diff --git a/src/spider/worker/TaskExecutor.hpp b/src/spider/worker/TaskExecutor.hpp index e417adc7..31de05db 100644 --- a/src/spider/worker/TaskExecutor.hpp +++ b/src/spider/worker/TaskExecutor.hpp @@ -155,10 +155,11 @@ class TaskExecutor { */ [[nodiscard]] auto get_pid() const -> pid_t; - auto completed() -> bool; - auto waiting() -> bool; - auto succeed() -> bool; - auto error() -> bool; + auto is_completed() -> bool; + auto is_waiting() -> bool; + auto is_succeeded() -> bool; + auto is_error() -> bool; + auto is_cancelled() -> bool; void wait(); diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index d494827a..1b53977d 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -283,7 +283,7 @@ auto handle_executor_result( } auto conn = std::move(std::get>(conn_result)); - if (!executor.succeed()) { + if (!executor.is_succeeded()) { spdlog::warn("Task {} failed", task.get_function_name()); metadata_store->task_fail( *conn, @@ -399,6 +399,15 @@ auto task_loop( spider::core::ChildPid::set_pid(0); + if (executor.is_cancelled()) { + // If task is cancelled by user or other tasks, the states have been updated in the + // storage, no need to do anything. + // If task is cancelled by calling `TaskContext::abort`, the storage has also been + // updated, so we also don't need to do anything. + spdlog::debug("Task {} was cancelled", task.get_function_name()); + continue; + } + if (handle_executor_result(storage_factory, metadata_store, instance, task, executor)) { fail_task_id = std::nullopt; } else { diff --git a/tests/worker/test-TaskExecutor.cpp b/tests/worker/test-TaskExecutor.cpp index 2df25cf2..d47e9839 100644 --- a/tests/worker/test-TaskExecutor.cpp +++ b/tests/worker/test-TaskExecutor.cpp @@ -86,7 +86,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.succeed()); + REQUIRE(executor.is_succeeded()); std::optional const result_option = executor.get_result(); REQUIRE(result_option.has_value()); REQUIRE(5 == result_option.value_or(0)); @@ -117,7 +117,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.error()); + REQUIRE(executor.is_error()); std::tuple error = executor.get_error(); REQUIRE(spider::core::FunctionInvokeError::WrongNumberOfArguments == std::get<0>(error)); } @@ -147,7 +147,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.error()); + REQUIRE(executor.is_error()); std::tuple error = executor.get_error(); REQUIRE(spider::core::FunctionInvokeError::FunctionExecutionError == std::get<0>(error)); } @@ -197,7 +197,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.succeed()); + REQUIRE(executor.is_succeeded()); std::optional const optional_result = executor.get_result(); REQUIRE(optional_result.has_value()); if (optional_result.has_value()) { @@ -239,7 +239,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.succeed()); + REQUIRE(executor.is_succeeded()); std::optional const result_option = executor.get_result(); REQUIRE(result_option.has_value()); REQUIRE(input_1 + input_2 == result_option.value_or("")); From 2e2ddf8ee8840ca873ded9b95760e661f2ef99ca Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 12:17:07 -0400 Subject: [PATCH 14/64] Add ExecutorHandle class --- src/spider/CMakeLists.txt | 2 ++ src/spider/worker/ExecutorHandle.cpp | 30 ++++++++++++++++++++++++++++ src/spider/worker/ExecutorHandle.hpp | 30 ++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 src/spider/worker/ExecutorHandle.cpp create mode 100644 src/spider/worker/ExecutorHandle.hpp diff --git a/src/spider/CMakeLists.txt b/src/spider/CMakeLists.txt index b3f92f4a..12849937 100644 --- a/src/spider/CMakeLists.txt +++ b/src/spider/CMakeLists.txt @@ -61,6 +61,8 @@ set(SPIDER_WORKER_SOURCES worker/ChildPid.cpp worker/DllLoader.hpp worker/DllLoader.cpp + worker/ExecutorHandle.hpp + worker/ExecutorHandle.cpp worker/Process.hpp worker/Process.cpp worker/TaskExecutor.hpp diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp new file mode 100644 index 00000000..4a3b4281 --- /dev/null +++ b/src/spider/worker/ExecutorHandle.cpp @@ -0,0 +1,30 @@ +#include "ExecutorHandle.hpp" + +#include + +namespace spider::worker { +auto ExecutorHandle::get_task_id() const -> std::optional { + std::lock_guard const lock_guard{m_mutex}; + if (nullptr != m_executor) { + return m_task_id; + } + return std::nullopt; +} + +auto ExecutorHandle::get_executor() const -> TaskExecutor* { + std::lock_guard const lock_guard{m_mutex}; + return m_executor; +} + +auto ExecutorHandle::set(boost::uuids::uuid const task_id, TaskExecutor* executor) -> void { + std::lock_guard const lock_guard{m_mutex}; + m_task_id = task_id; + m_executor = executor; +} + +auto ExecutorHandle::clear() -> void { + std::lock_guard const lock_guard{m_mutex}; + m_task_id = boost::uuids::uuid{}; + m_executor = nullptr; +} +} // namespace spider::worker diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp new file mode 100644 index 00000000..c6cc501f --- /dev/null +++ b/src/spider/worker/ExecutorHandle.hpp @@ -0,0 +1,30 @@ +#ifndef SPIDER_WORKER_EXECUTORHANDLE_HPP +#define SPIDER_WORKER_EXECUTORHANDLE_HPP + +#include + +#include + +#include "TaskExecutor.hpp" + +namespace spider::worker { +/** + * Provides a thread-safe access to the task executor and other task related variables across + * threads. + */ +class ExecutorHandle { +public: + [[nodiscard]] auto get_task_id() const -> std::optional; + [[nodiscard]] auto get_executor() const -> TaskExecutor*; + auto set(boost::uuids::uuid task_id, TaskExecutor* executor) -> void; + auto clear() -> void; + +private: + boost::uuids::uuid m_task_id; // The task id is only valid if there is an executor. + TaskExecutor* m_executor + = nullptr; // Do not use std::shared_ptr to avoid calling destructor twice. + std::mutex m_mutex; +}; +} // namespace spider::worker + +#endif From 1a724ce84042b9e7b22a10972e2fdbca019ad9d5 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 12:56:17 -0400 Subject: [PATCH 15/64] Add job cancel check in worker --- src/spider/worker/worker.cpp | 55 +++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 1b53977d..22fd8f0b 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -47,6 +47,7 @@ #include "../storage/StorageFactory.hpp" #include "../utils/StopFlag.hpp" #include "ChildPid.hpp" +#include "ExecutorHandle.hpp" #include "TaskExecutor.hpp" #include "WorkerClient.hpp" @@ -123,14 +124,62 @@ auto get_environment_variable() -> absl::flat_hash_map< return environment_variables; } +/** + * Checks if the task is cancelled. If the task state is set to cancelled, cancels the running task. + * @param storage_factory + * @param metadata_store + * @param executor_handle Handle to the task id and executor. + */ +auto check_task_cancel( + std::shared_ptr storage_factory, + std::shared_ptr metadata_store, + spider::worker::ExecutorHandle const& executor_handle +) -> void { + std::optional const optional_task_id = executor_handle.get_task_id(); + if (!optional_task_id.has_value()) { + return; + } + boost::uuids::uuid const task_id = optional_task_id.value(); + + // Check if the task is cancelled + std::variant, spider::core::StorageErr> + conn_result = storage_factory->provide_storage_connection(); + if (std::holds_alternative(conn_result)) { + spdlog::error( + "Failed to connect to storage: {}", + std::get(conn_result).description + ); + return; + } + auto conn = std::move(std::get>(conn_result)); + spider::core::TaskState task_state = spider::core::TaskState::Running; + spider::core::StorageErr err = metadata_store->get_task_state(*conn, task_id, &task_state); + if (false == err.success()) { + spdlog::error("Failed to get task state: {}", err.description); + return; + } + + if (spider::core::TaskState::Canceled != task_state) { + return; + } + + // Cancel thet task. + spider::worker::TaskExecutor* executor = executor_handle.get_executor(); + executor->cancel(); +} + auto heartbeat_loop( std::shared_ptr const& storage_factory, std::shared_ptr const& metadata_store, - spider::core::Driver const& driver + spider::core::Driver const& driver, + spider::worker::ExecutorHandle const& executor_handle ) -> void { int fail_count = 0; while (!spider::core::StopFlag::is_stop_requested()) { std::this_thread::sleep_for(std::chrono::seconds(1)); + + check_task_cancel(storage_factory, metadata_store, executor_handle); + spdlog::debug("Updating heartbeat"); std::variant, spider::core::StorageErr> conn_result = storage_factory->provide_storage_connection(); @@ -345,6 +394,7 @@ auto handle_executor_result( auto task_loop( std::shared_ptr const& storage_factory, std::shared_ptr const& metadata_store, + spider::worker::ExecutorHandle& executor_handle, spider::worker::WorkerClient& client, std::string const& storage_url, std::vector const& libs, @@ -386,6 +436,8 @@ auto task_loop( arg_buffers }; + executor_handle.set(task_id, &executor); + pid_t const pid = executor.get_pid(); spider::core::ChildPid::set_pid(pid); // Double check if stop token is set to avoid any missing signal @@ -397,6 +449,7 @@ auto task_loop( context.run(); executor.wait(); + executor_handle.clear(); spider::core::ChildPid::set_pid(0); if (executor.is_cancelled()) { From 4b59d39ac263202de6e8e91658e880554d8357f0 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 13:09:47 -0400 Subject: [PATCH 16/64] Fix missing argument for worker threads --- src/spider/worker/worker.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 22fd8f0b..79002266 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -564,12 +564,15 @@ auto main(int argc, char** argv) -> int { boost::process::v2::environment::value> const environment_variables = get_environment_variable(); + spider::worker::ExecutorHandle executor_handle; + // Start a thread that periodically updates the scheduler's heartbeat std::thread heartbeat_thread{ heartbeat_loop, std::cref(storage_factory), std::cref(metadata_store), std::ref(driver), + std::cref(executor_handle), }; // Start a thread that processes tasks @@ -577,6 +580,7 @@ auto main(int argc, char** argv) -> int { task_loop, std::cref(storage_factory), std::cref(metadata_store), + std::ref(executor_handle), std::ref(client), std::cref(storage_url), std::cref(libs), From c79eefc4193cae4e3aaba180bad9f386cf02875d Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 13:19:08 -0400 Subject: [PATCH 17/64] Fixed lock guard const function --- src/spider/worker/ExecutorHandle.cpp | 4 ++-- src/spider/worker/ExecutorHandle.hpp | 4 ++-- src/spider/worker/worker.cpp | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp index 4a3b4281..8344f60e 100644 --- a/src/spider/worker/ExecutorHandle.cpp +++ b/src/spider/worker/ExecutorHandle.cpp @@ -3,7 +3,7 @@ #include namespace spider::worker { -auto ExecutorHandle::get_task_id() const -> std::optional { +auto ExecutorHandle::get_task_id() -> std::optional { std::lock_guard const lock_guard{m_mutex}; if (nullptr != m_executor) { return m_task_id; @@ -11,7 +11,7 @@ auto ExecutorHandle::get_task_id() const -> std::optional { return std::nullopt; } -auto ExecutorHandle::get_executor() const -> TaskExecutor* { +auto ExecutorHandle::get_executor() -> TaskExecutor* { std::lock_guard const lock_guard{m_mutex}; return m_executor; } diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index c6cc501f..e67405f0 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -14,8 +14,8 @@ namespace spider::worker { */ class ExecutorHandle { public: - [[nodiscard]] auto get_task_id() const -> std::optional; - [[nodiscard]] auto get_executor() const -> TaskExecutor*; + [[nodiscard]] auto get_task_id() -> std::optional; + [[nodiscard]] auto get_executor() -> TaskExecutor*; auto set(boost::uuids::uuid task_id, TaskExecutor* executor) -> void; auto clear() -> void; diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 79002266..e31d25f9 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -133,7 +133,7 @@ auto get_environment_variable() -> absl::flat_hash_map< auto check_task_cancel( std::shared_ptr storage_factory, std::shared_ptr metadata_store, - spider::worker::ExecutorHandle const& executor_handle + spider::worker::ExecutorHandle& executor_handle ) -> void { std::optional const optional_task_id = executor_handle.get_task_id(); if (!optional_task_id.has_value()) { @@ -172,7 +172,7 @@ auto heartbeat_loop( std::shared_ptr const& storage_factory, std::shared_ptr const& metadata_store, spider::core::Driver const& driver, - spider::worker::ExecutorHandle const& executor_handle + spider::worker::ExecutorHandle& executor_handle ) -> void { int fail_count = 0; while (!spider::core::StopFlag::is_stop_requested()) { @@ -572,7 +572,7 @@ auto main(int argc, char** argv) -> int { std::cref(storage_factory), std::cref(metadata_store), std::ref(driver), - std::cref(executor_handle), + std::ref(executor_handle), }; // Start a thread that processes tasks From f3cb222841c26102289f8a7f61b16840af437d6f Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 13:34:26 -0400 Subject: [PATCH 18/64] Fix clang-tidy --- src/spider/worker/ExecutorHandle.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp index 8344f60e..cfa5da67 100644 --- a/src/spider/worker/ExecutorHandle.cpp +++ b/src/spider/worker/ExecutorHandle.cpp @@ -1,6 +1,11 @@ #include "ExecutorHandle.hpp" #include +#include + +#include + +#include "TaskExecutor.hpp" namespace spider::worker { auto ExecutorHandle::get_task_id() -> std::optional { From 734292538957390a0c0a6574e86bcef2eb431b31 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 14:05:59 -0400 Subject: [PATCH 19/64] Add function name in job_errors table --- src/spider/storage/mysql/MySqlStorage.cpp | 12 ++++++++---- src/spider/storage/mysql/mysql_stmt.hpp | 1 + tools/scripts/storage/init_db.sql | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index ecbb420f..55ece918 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1080,7 +1080,8 @@ auto MySqlMetadataStorage::cancel_job( // Set the cancel message std::unique_ptr message_statement( static_cast(conn)->prepareStatement( - "INSERT INTO `job_errors` (`job_id`, `message`) VALUES (?, ?) " + "INSERT INTO `job_errors` (`job_id`, `func_name`, `message`) VALUES (?, " + "'', ?) " ) ); message_statement->setBytes(1, &id_bytes); @@ -1104,7 +1105,7 @@ auto MySqlMetadataStorage::cancel_job_by_task( sql::bytes task_id_bytes = uuid_get_bytes(id); std::unique_ptr statement( static_cast(conn)->prepareStatement( - "SELECT `job_id` FROM `tasks` WHERE `id` = ?" + "SELECT `job_id`, `func_name` FROM `tasks` WHERE `id` = ?" ) ); statement->setBytes(1, &task_id_bytes); @@ -1116,6 +1117,7 @@ auto MySqlMetadataStorage::cancel_job_by_task( res->next(); boost::uuids::uuid const job_id = read_id(res->getBinaryStream("job_id")); sql::bytes job_id_bytes = uuid_get_bytes(job_id); + std::string const function_name = get_sql_string(res->getString("func_name")); // Set all pending/ready/running tasks from the job to cancelled std::unique_ptr task_statement( static_cast(conn)->prepareStatement( @@ -1136,11 +1138,13 @@ auto MySqlMetadataStorage::cancel_job_by_task( // Set the cancel message std::unique_ptr message_statement( static_cast(conn)->prepareStatement( - "INSERT INTO `job_errors` (`job_id`, `message`) VALUES (?, ?) " + "INSERT INTO `job_errors` (`job_id`, `func_name`, `message`) VALUES (?, ?, " + "?) " ) ); message_statement->setBytes(1, &job_id_bytes); - message_statement->setString(2, message); + message_statement->setString(2, function_name); + message_statement->setString(3, message); message_statement->executeUpdate(); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); diff --git a/src/spider/storage/mysql/mysql_stmt.hpp b/src/spider/storage/mysql/mysql_stmt.hpp index 136826cf..ffac77d1 100644 --- a/src/spider/storage/mysql/mysql_stmt.hpp +++ b/src/spider/storage/mysql/mysql_stmt.hpp @@ -34,6 +34,7 @@ std::string const cCreateJobTable = R"(CREATE TABLE IF NOT EXISTS jobs ( std::string const cCreateJobErrorTable = R"(CREATE TABLE IF NOT EXISTS `job_errors` ( `job_id` BINARY(16) NOT NULL, + `func_name` VARCHAR(64) NOT NULL, `message` VARCHAR(999) NOT NULL, CONSTRAINT `job_error_job_id` FOREIGN KEY (`job_id`) REFERENCES `jobs` (`id`) ON UPDATE NO ACTION ON DELETE CASCADE, PRIMARY KEY (`job_id`) diff --git a/tools/scripts/storage/init_db.sql b/tools/scripts/storage/init_db.sql index 61afbd96..4a97791b 100644 --- a/tools/scripts/storage/init_db.sql +++ b/tools/scripts/storage/init_db.sql @@ -25,6 +25,7 @@ CREATE TABLE IF NOT EXISTS jobs ); CREATE TABLE IF NOT EXISTS `job_errors` ( `job_id` BINARY(16) NOT NULL, + `func_name` VARCHAR(64) NOT NULL, `message` VARCHAR(999) NOT NULL, CONSTRAINT `job_error_job_id` FOREIGN KEY (`job_id`) REFERENCES `jobs` (`id`) ON UPDATE NO ACTION ON DELETE CASCADE, PRIMARY KEY (`job_id`) From 5e841f807923f2dc15061d6021d89bd6d0870aa9 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 14:11:48 -0400 Subject: [PATCH 20/64] Add client and tasks for cancel tests --- tests/CMakeLists.txt | 12 ++++ tests/client/cancel-test.cpp | 100 ++++++++++++++++++++++++++++ tests/integration/client.py | 2 +- tests/storage/StorageTestHelper.hpp | 2 +- tests/worker/worker-test.cpp | 14 ++++ tests/worker/worker-test.hpp | 4 ++ 6 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 tests/client/cancel-test.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3e9b6646..e69cb2d9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -83,6 +83,18 @@ target_link_libraries( spider_client ) +add_executable(cancel_test) +target_sources(cancel_test PRIVATE client/cancel-test.cpp) +target_link_libraries( + cancel_test + PRIVATE + spider_core + spider_client + worker_test + Boost::program_options + spider_client +) + add_custom_target(integrationTest ALL) add_custom_command( TARGET integrationTest diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp new file mode 100644 index 00000000..4fe0225c --- /dev/null +++ b/tests/client/cancel-test.cpp @@ -0,0 +1,100 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include // IWYU pragma: keep +#include + +#include "../../src/spider/client/Data.hpp" +#include "../../src/spider/client/Driver.hpp" +#include "../../src/spider/client/Job.hpp" +#include "../../src/spider/client/TaskGraph.hpp" +#include "../worker/worker-test.hpp" + +namespace { +auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { + boost::program_options::options_description desc; + desc.add_options()("help", "spider client test"); + desc.add_options()( + "storage_url", + boost::program_options::value(), + "storage server url" + ); + + boost::program_options::variables_map variables; + boost::program_options::store( + // NOLINTNEXTLINE(misc-include-cleaner) + boost::program_options::parse_command_line(argc, argv, desc), + variables + ); + boost::program_options::notify(variables); + return variables; +} + +constexpr int cCmdArgParseErr = 1; +constexpr int cJobNotCancelled = 2; +constexpr int cWrongErrorMessage = 3; +} // namespace + +auto main(int argc, char** argv) -> int { + // NOLINTNEXTLINE(misc-include-cleaner) + spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] [spider.client] %v"); +#ifndef NDEBUG + spdlog::set_level(spdlog::level::trace); +#endif + + boost::program_options::variables_map const args = parse_args(argc, argv); + + std::string storage_url; + try { + if (!args.contains("storage_url")) { + spdlog::error("storage_url is required"); + return cCmdArgParseErr; + } + storage_url = args["storage_url"].as(); + } catch (boost::bad_any_cast& e) { + return cCmdArgParseErr; + } catch (boost::program_options::error& e) { + return cCmdArgParseErr; + } + + // Create driver + spider::Driver driver{storage_url}; + spdlog::debug("Driver created"); + + // Cancel task from user + spider::Job sleep_job = driver.start(&sleep_test, 3); + sleep_job.cancel(); + // Check for job status + sleep_job.wait_complete(); + if (spider::JobStatus::Cancelled != sleep_job.get_status()) { + spdlog::error("Job status is not cancelled."); + return cJobNotCancelled; + } + + // Cancel task from task + spider::Job abort_job = driver.start(&abort_test); + abort_job.wait_complete(); + if (spider::JobStatus::Cancelled != abort_job.get_status()) { + spdlog::error("Job status is not cancelled."); + return cJobNotCancelled; + } + std::pair const job_errors = abort_job.get_error(); + if ("abort_test" != job_errors.first) { + spdlog::error("Cancelled task wrong function name"); + return cWrongErrorMessage; + } + if ("Abort test" != job_errors.second) { + spdlog::error("Cancelled task wrong error message"); + return cWrongErrorMessage; + } + + return 0; +} diff --git a/tests/integration/client.py b/tests/integration/client.py index 1b658c5a..eda411a8 100644 --- a/tests/integration/client.py +++ b/tests/integration/client.py @@ -72,7 +72,7 @@ def is_head_task(task_id: uuid.UUID, dependencies: List[Tuple[uuid.UUID, uuid.UU return not any(dependency[1] == task_id for dependency in dependencies) -g_storage_url = "jdbc:mariadb://localhost:3306/spider_test?user=root&password=password" +g_storage_url = "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" @pytest.fixture(scope="session") diff --git a/tests/storage/StorageTestHelper.hpp b/tests/storage/StorageTestHelper.hpp index 0f9406ae..cd433b4e 100644 --- a/tests/storage/StorageTestHelper.hpp +++ b/tests/storage/StorageTestHelper.hpp @@ -12,7 +12,7 @@ namespace spider::test { std::string const cMySqlStorageUrl - = "jdbc:mariadb://localhost:3306/spider_test?user=root&password=password"; + = "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password"; using StorageFactoryTypeList = std::tuple; diff --git a/tests/worker/worker-test.cpp b/tests/worker/worker-test.cpp index 1daf725f..7e358519 100644 --- a/tests/worker/worker-test.cpp +++ b/tests/worker/worker-test.cpp @@ -1,9 +1,11 @@ #include "worker-test.hpp" +#include #include #include #include #include +#include #include #include @@ -70,6 +72,16 @@ auto join_string_test( return input_1 + input_2; } +auto sleep_test(spider::TaskContext& /*context*/, int milliseconds) -> int { + std::this_thread::sleep_for(std::chrono::milliseconds{milliseconds}); + return milliseconds; +} + +auto abort_test(spider::TaskContext& context) -> int { + context.abort("Abort test"); + return 0; +} + // NOLINTBEGIN(cert-err58-cpp) SPIDER_REGISTER_TASK(sum_test); SPIDER_REGISTER_TASK(swap_test); @@ -79,4 +91,6 @@ SPIDER_REGISTER_TASK(random_fail_test); SPIDER_REGISTER_TASK(create_data_test); SPIDER_REGISTER_TASK(create_task_test); SPIDER_REGISTER_TASK(join_string_test); +SPIDER_REGISTER_TASK(sleep_test); +SPIDER_REGISTER_TASK(abort_test); // NOLINTEND(cert-err58-cpp) diff --git a/tests/worker/worker-test.hpp b/tests/worker/worker-test.hpp index f1624586..4734eeb2 100644 --- a/tests/worker/worker-test.hpp +++ b/tests/worker/worker-test.hpp @@ -27,4 +27,8 @@ auto join_string_test( std::string const& input_2 ) -> std::string; +auto sleep_test(spider::TaskContext& context, int milliseconds) -> int; + +auto abort_test(spider::TaskContext& context) -> int; + #endif From a9887517fef7cc0c4c1fc2c7890ff8f0697d4368 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 14:14:31 -0400 Subject: [PATCH 21/64] Add get func_name column in storage --- src/spider/storage/MetadataStorage.hpp | 10 +++++++--- src/spider/storage/mysql/MySqlStorage.cpp | 4 +++- src/spider/storage/mysql/MySqlStorage.hpp | 8 ++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index b85279a7..7390c715 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -100,12 +100,16 @@ class MetadataStorage { * Get the error message of a cancelled job. * @param conn * @param id The job id. + * @page function_name The function name of the cancelled task. * @param message The error message of the cancellation. * @return The error code. */ - virtual auto - get_job_message(StorageConnection& conn, boost::uuids::uuid id, std::string* message) - -> StorageErr + virtual auto get_job_message( + StorageConnection& conn, + boost::uuids::uuid id, + std::string* function_name, + std::string* message + ) -> StorageErr = 0; virtual auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; virtual auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index 55ece918..a4b83d2e 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1157,12 +1157,13 @@ auto MySqlMetadataStorage::cancel_job_by_task( auto MySqlMetadataStorage::get_job_message( StorageConnection& conn, boost::uuids::uuid const id, + std::string* function_name, std::string* message ) -> StorageErr { try { std::unique_ptr statement{ static_cast(conn)->prepareStatement( - "SELECT `message` FROM `job_errors` WHERE `job_id` = ?" + "SELECT `func_name`, `message` FROM `job_errors` WHERE `job_id` = ?" ) }; sql::bytes id_bytes = uuid_get_bytes(id); @@ -1173,6 +1174,7 @@ auto MySqlMetadataStorage::get_job_message( return StorageErr{StorageErrType::KeyNotFoundErr, "No messages found"}; } res->next(); + *function_name = get_sql_string(res->getString("func_name")); *message = get_sql_string(res->getString("message")); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index d6e0c95c..abe7b898 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -76,8 +76,12 @@ class MySqlMetadataStorage : public MetadataStorage { auto cancel_job_by_task(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr override; - auto get_job_message(StorageConnection& conn, boost::uuids::uuid id, std::string* message) - -> StorageErr override; + auto get_job_message( + StorageConnection& conn, + boost::uuids::uuid id, + std::string* function_name, + std::string* message + ) -> StorageErr override; auto remove_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto reset_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto add_child(StorageConnection& conn, boost::uuids::uuid parent_id, Task const& child) From a58f58eb0b8575633b99be5638d622c2d33bc2f2 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 14:19:25 -0400 Subject: [PATCH 22/64] Add get_error in job --- src/spider/client/Job.hpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/spider/client/Job.hpp b/src/spider/client/Job.hpp index 0f8d9fe4..be6e0c0c 100644 --- a/src/spider/client/Job.hpp +++ b/src/spider/client/Job.hpp @@ -166,7 +166,30 @@ class Job { * @throw spider::ConnectionException */ auto get_error() -> std::pair { - throw ConnectionException{"Not implemented"}; + if (nullptr == m_conn) { + std::variant, core::StorageErr> conn_result + = m_storage_factory->provide_storage_connection(); + if (std::holds_alternative(conn_result)) { + throw ConnectionException(std::get(conn_result).description); + } + auto conn = std::move(std::get>(conn_result)); + + std::pair res; + core::StorageErr const err + = m_metadata_storage->get_job_message(*conn, m_id, &res.first, &res.second); + if (false == err.success()) { + throw ConnectionException{err.description}; + } + return res; + } + + std::pair res; + core::StorageErr const err + = m_metadata_storage->get_job_message(*m_conn, m_id, &res.first, &res.second); + if (false == err.success()) { + throw ConnectionException{err.description}; + } + return res; } private: From 23d94ff0855deb9e6836570660707a4f648af020 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 14:31:39 -0400 Subject: [PATCH 23/64] Fix clang tidy --- src/spider/worker/worker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index e31d25f9..b8ba4a49 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -131,8 +131,8 @@ auto get_environment_variable() -> absl::flat_hash_map< * @param executor_handle Handle to the task id and executor. */ auto check_task_cancel( - std::shared_ptr storage_factory, - std::shared_ptr metadata_store, + std::shared_ptr const& storage_factory, + std::shared_ptr const& metadata_store, spider::worker::ExecutorHandle& executor_handle ) -> void { std::optional const optional_task_id = executor_handle.get_task_id(); From d9e837f057c7d80ebdb7530efae163ca2b3a0752 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 14:35:44 -0400 Subject: [PATCH 24/64] Update unit test with new get_error --- tests/storage/test-MetadataStorage.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index e6787513..5cd77c03 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -504,8 +504,10 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); // Job error message should be set + std::string error_task_res{"random"}; std::string error_message_res; - REQUIRE(storage->get_job_message(*conn, job_id, &error_message_res).success()); + REQUIRE(storage->get_job_message(*conn, job_id, &error_task_res, &error_message_res).success()); + REQUIRE(error_task_res.empty()); REQUIRE(error_message == error_message_res); // Parent 1 state should be success @@ -584,8 +586,10 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); // Job error message should be set + std::string error_task_res; std::string error_message_res; - REQUIRE(storage->get_job_message(*conn, job_id, &error_message_res).success()); + REQUIRE(storage->get_job_message(*conn, job_id, &error_task_res, &error_message_res).success()); + REQUIRE("p2" == error_task_res); REQUIRE(error_message == error_message_res); // Parent 1 state should be success From 24563cefb33e9e4ddc1a04e42b06d8607f09d062 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 15:43:41 -0400 Subject: [PATCH 25/64] Check for running status before parsing result --- src/spider/worker/TaskExecutor.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/spider/worker/TaskExecutor.cpp b/src/spider/worker/TaskExecutor.cpp index 2dec351e..bda9e7f5 100644 --- a/src/spider/worker/TaskExecutor.cpp +++ b/src/spider/worker/TaskExecutor.cpp @@ -82,6 +82,12 @@ auto TaskExecutor::process_output_handler() -> boost::asio::awaitable { while (true) { std::optional const response_option = co_await receive_message_async(m_read_pipe); + { + std::lock_guard const lock(m_state_mutex); + if (m_state != TaskExecutorState::Waiting && m_state != TaskExecutorState::Running) { + co_return; + } + } if (!response_option.has_value()) { std::lock_guard const lock(m_state_mutex); m_state = TaskExecutorState::Error; From f1afa622529fca8e4c8de8ffb8ec5515ef9b3f4d Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:05:03 -0400 Subject: [PATCH 26/64] Improve storage task_fail state check --- src/spider/storage/mysql/MySqlStorage.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index a4b83d2e..be220bf9 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1851,12 +1851,16 @@ auto MySqlMetadataStorage::task_fail( ) ); task_statement->setBytes(1, &task_id_bytes); - task_statement->executeUpdate(); + int32_t const task_count = task_statement->executeUpdate(); + if (task_count == 0) { + static_cast(conn)->commit(); + return StorageErr{}; + } // Set the job fails std::unique_ptr const job_statement( static_cast(conn)->prepareStatement( "UPDATE `jobs` SET `state` = 'fail' WHERE `id` = (SELECT `job_id` FROM " - "`tasks` WHERE `id` = ?) AND `state` = 'running'" + "`tasks` WHERE `id` = ?)" ) ); job_statement->setBytes(1, &task_id_bytes); From afbf6b9c0b427cf8401a16d3055e66062e691736 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:07:29 -0400 Subject: [PATCH 27/64] Add integration tests for cancel and abort --- tests/CMakeLists.txt | 1 + tests/client/cancel-test.cpp | 8 +-- tests/integration/test_cancel.py | 84 ++++++++++++++++++++++++++++++++ tests/worker/worker-test.cpp | 2 +- tests/worker/worker-test.hpp | 2 +- 5 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_cancel.py diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e69cb2d9..bc491fe7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -108,4 +108,5 @@ add_dependencies( worker_test client_test signal_test + cancel_test ) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index 4fe0225c..4814268a 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -71,19 +71,21 @@ auto main(int argc, char** argv) -> int { // Cancel task from user spider::Job sleep_job = driver.start(&sleep_test, 3); + // Wait for the task to run + std::this_thread::sleep_for(std::chrono::seconds(1)); sleep_job.cancel(); // Check for job status sleep_job.wait_complete(); if (spider::JobStatus::Cancelled != sleep_job.get_status()) { - spdlog::error("Job status is not cancelled."); + spdlog::error("Sleep job status is not cancelled"); return cJobNotCancelled; } // Cancel task from task - spider::Job abort_job = driver.start(&abort_test); + spider::Job abort_job = driver.start(&abort_test, 0); abort_job.wait_complete(); if (spider::JobStatus::Cancelled != abort_job.get_status()) { - spdlog::error("Job status is not cancelled."); + spdlog::error("Abort job status is not cancelled"); return cJobNotCancelled; } std::pair const job_errors = abort_job.get_error(); diff --git a/tests/integration/test_cancel.py b/tests/integration/test_cancel.py new file mode 100644 index 00000000..41cc5139 --- /dev/null +++ b/tests/integration/test_cancel.py @@ -0,0 +1,84 @@ +import subprocess +import time +import uuid +from pathlib import Path +from typing import Tuple + +import msgpack +import pytest + +from .client import ( + add_driver, + add_driver_data, + Data, + Driver, + g_storage_url, + get_task_outputs, + get_task_state, + remove_data, + remove_job, + storage, + submit_job, + Task, + TaskGraph, + TaskInput, + TaskOutput, +) +from .utils import g_scheduler_port + + +def start_scheduler_worker( + storage_url: str, scheduler_port: int +) -> Tuple[subprocess.Popen, subprocess.Popen]: + # Start the scheduler + dir_path = Path(__file__).resolve().parent + dir_path = dir_path / ".." / ".." / "src" / "spider" + scheduler_cmds = [ + str(dir_path / "spider_scheduler"), + "--host", + "127.0.0.1", + "--port", + str(scheduler_port), + "--storage_url", + storage_url, + ] + scheduler_process = subprocess.Popen(scheduler_cmds) + worker_cmds = [ + str(dir_path / "spider_worker"), + "--host", + "127.0.0.1", + "--storage_url", + storage_url, + "--libs", + "tests/libworker_test.so", + ] + worker_process = subprocess.Popen(worker_cmds) + return scheduler_process, worker_process + + +@pytest.fixture(scope="class") +def scheduler_worker(storage): + scheduler_process, worker_process = start_scheduler_worker( + storage_url=g_storage_url, scheduler_port=g_scheduler_port + ) + # Wait for 5 second to make sure the scheduler and worker are started + time.sleep(5) + yield + scheduler_process.kill() + worker_process.kill() + + +class TestCancel: + + # Test that the task can be cancelled by user and from the task. + # Execute the cancel_test client. + def test_task_cancel(self, scheduler_worker): + dir_path = Path(__file__).resolve().parent + dir_path = dir_path / ".." + client_cmds = [ + str(dir_path / "cancel_test"), + "--storage_url", + g_storage_url, + ] + p = subprocess.run(client_cmds, timeout=20) + assert p.returncode == 0 diff --git a/tests/worker/worker-test.cpp b/tests/worker/worker-test.cpp index 7e358519..191aefb4 100644 --- a/tests/worker/worker-test.cpp +++ b/tests/worker/worker-test.cpp @@ -77,7 +77,7 @@ auto sleep_test(spider::TaskContext& /*context*/, int milliseconds) -> int { return milliseconds; } -auto abort_test(spider::TaskContext& context) -> int { +auto abort_test(spider::TaskContext& context, int /*x*/) -> int { context.abort("Abort test"); return 0; } diff --git a/tests/worker/worker-test.hpp b/tests/worker/worker-test.hpp index 4734eeb2..9408ac6f 100644 --- a/tests/worker/worker-test.hpp +++ b/tests/worker/worker-test.hpp @@ -29,6 +29,6 @@ auto join_string_test( auto sleep_test(spider::TaskContext& context, int milliseconds) -> int; -auto abort_test(spider::TaskContext& context) -> int; +auto abort_test(spider::TaskContext& context, int /*x*/) -> int; #endif From e2cad8f84716991e35b1385d7c6264326eea404f Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:11:05 -0400 Subject: [PATCH 28/64] Revert test storage url change --- tests/integration/client.py | 2 +- tests/storage/StorageTestHelper.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/client.py b/tests/integration/client.py index eda411a8..1b658c5a 100644 --- a/tests/integration/client.py +++ b/tests/integration/client.py @@ -72,7 +72,7 @@ def is_head_task(task_id: uuid.UUID, dependencies: List[Tuple[uuid.UUID, uuid.UU return not any(dependency[1] == task_id for dependency in dependencies) -g_storage_url = "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" +g_storage_url = "jdbc:mariadb://localhost:3306/spider_test?user=root&password=password" @pytest.fixture(scope="session") diff --git a/tests/storage/StorageTestHelper.hpp b/tests/storage/StorageTestHelper.hpp index cd433b4e..0f9406ae 100644 --- a/tests/storage/StorageTestHelper.hpp +++ b/tests/storage/StorageTestHelper.hpp @@ -12,7 +12,7 @@ namespace spider::test { std::string const cMySqlStorageUrl - = "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password"; + = "jdbc:mariadb://localhost:3306/spider_test?user=root&password=password"; using StorageFactoryTypeList = std::tuple; From a55ffac0e7cb650ec68433cec0ee7e84e0ec06ff Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:30:32 -0400 Subject: [PATCH 29/64] Fix test cmake --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bc491fe7..aafdb22c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -92,7 +92,7 @@ target_link_libraries( spider_client worker_test Boost::program_options - spider_client + spdlog::spdlog ) add_custom_target(integrationTest ALL) From 709422d79f4bc0bae44b22cb93ee42ee90ba5fe1 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:32:49 -0400 Subject: [PATCH 30/64] Fix clang-tidy --- src/spider/worker/ExecutorHandle.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index e67405f0..df83de8d 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -2,6 +2,7 @@ #define SPIDER_WORKER_EXECUTORHANDLE_HPP #include +#include #include From 228d32aeca7e66fe50592efcfe9dda872b24211b Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:33:19 -0400 Subject: [PATCH 31/64] Fix storage docstring --- src/spider/storage/MetadataStorage.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 7390c715..570dcb26 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -100,7 +100,7 @@ class MetadataStorage { * Get the error message of a cancelled job. * @param conn * @param id The job id. - * @page function_name The function name of the cancelled task. + * @param function_name The function name of the cancelled task. * @param message The error message of the cancellation. * @return The error code. */ From b351b112ffe36771c3b493cb3809481c86c2ee0c Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:43:23 -0400 Subject: [PATCH 32/64] Reuse connection in worker heartbeat thread --- src/spider/worker/worker.cpp | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index b8ba4a49..d2347919 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -126,12 +126,12 @@ auto get_environment_variable() -> absl::flat_hash_map< /** * Checks if the task is cancelled. If the task state is set to cancelled, cancels the running task. - * @param storage_factory + * @param conn The storage connection to use. * @param metadata_store * @param executor_handle Handle to the task id and executor. */ auto check_task_cancel( - std::shared_ptr const& storage_factory, + std::shared_ptr const& conn, std::shared_ptr const& metadata_store, spider::worker::ExecutorHandle& executor_handle ) -> void { @@ -142,16 +142,6 @@ auto check_task_cancel( boost::uuids::uuid const task_id = optional_task_id.value(); // Check if the task is cancelled - std::variant, spider::core::StorageErr> - conn_result = storage_factory->provide_storage_connection(); - if (std::holds_alternative(conn_result)) { - spdlog::error( - "Failed to connect to storage: {}", - std::get(conn_result).description - ); - return; - } - auto conn = std::move(std::get>(conn_result)); spider::core::TaskState task_state = spider::core::TaskState::Running; spider::core::StorageErr err = metadata_store->get_task_state(*conn, task_id, &task_state); if (false == err.success()) { @@ -178,9 +168,7 @@ auto heartbeat_loop( while (!spider::core::StopFlag::is_stop_requested()) { std::this_thread::sleep_for(std::chrono::seconds(1)); - check_task_cancel(storage_factory, metadata_store, executor_handle); - - spdlog::debug("Updating heartbeat"); + // Getting a storage connection std::variant, spider::core::StorageErr> conn_result = storage_factory->provide_storage_connection(); if (std::holds_alternative(conn_result)) { @@ -191,10 +179,14 @@ auto heartbeat_loop( fail_count++; continue; } - auto conn = std::move( + std::shared_ptr const conn = std::move( std::get>(conn_result) ); + check_task_cancel(conn, metadata_store, executor_handle); + + spdlog::debug("Updating heartbeat"); + spider::core::StorageErr const err = metadata_store->update_heartbeat(*conn, driver.get_id()); if (!err.success()) { From 6711918fd2cb40e918962e82adb6133a205a15a8 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:44:04 -0400 Subject: [PATCH 33/64] Fix typo --- src/spider/worker/worker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index d2347919..0c1f2227 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -153,7 +153,7 @@ auto check_task_cancel( return; } - // Cancel thet task. + // Cancel the task. spider::worker::TaskExecutor* executor = executor_handle.get_executor(); executor->cancel(); } From 134c8113d56d03a182bfcee51574a60fafa2e3fd Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 16:54:58 -0400 Subject: [PATCH 34/64] Fix clang-tidy --- tests/client/cancel-test.cpp | 73 +++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index 4814268a..dada8113 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -1,7 +1,8 @@ -#include +#include +#include #include -#include -#include +#include +#include #include #include @@ -12,10 +13,8 @@ #include // IWYU pragma: keep #include -#include "../../src/spider/client/Data.hpp" #include "../../src/spider/client/Driver.hpp" #include "../../src/spider/client/Job.hpp" -#include "../../src/spider/client/TaskGraph.hpp" #include "../worker/worker-test.hpp" namespace { @@ -41,6 +40,7 @@ auto parse_args(int const argc, char** argv) -> boost::program_options::variable constexpr int cCmdArgParseErr = 1; constexpr int cJobNotCancelled = 2; constexpr int cWrongErrorMessage = 3; +constexpr int cException = 4; } // namespace auto main(int argc, char** argv) -> int { @@ -65,37 +65,42 @@ auto main(int argc, char** argv) -> int { return cCmdArgParseErr; } - // Create driver - spider::Driver driver{storage_url}; - spdlog::debug("Driver created"); + try { + // Create driver + spider::Driver driver{storage_url}; + spdlog::debug("Driver created"); - // Cancel task from user - spider::Job sleep_job = driver.start(&sleep_test, 3); - // Wait for the task to run - std::this_thread::sleep_for(std::chrono::seconds(1)); - sleep_job.cancel(); - // Check for job status - sleep_job.wait_complete(); - if (spider::JobStatus::Cancelled != sleep_job.get_status()) { - spdlog::error("Sleep job status is not cancelled"); - return cJobNotCancelled; - } + // Cancel task from user + spider::Job sleep_job = driver.start(&sleep_test, 3); + // Wait for the task to run + std::this_thread::sleep_for(std::chrono::seconds(1)); + sleep_job.cancel(); + // Check for job status + sleep_job.wait_complete(); + if (spider::JobStatus::Cancelled != sleep_job.get_status()) { + spdlog::error("Sleep job status is not cancelled"); + return cJobNotCancelled; + } - // Cancel task from task - spider::Job abort_job = driver.start(&abort_test, 0); - abort_job.wait_complete(); - if (spider::JobStatus::Cancelled != abort_job.get_status()) { - spdlog::error("Abort job status is not cancelled"); - return cJobNotCancelled; - } - std::pair const job_errors = abort_job.get_error(); - if ("abort_test" != job_errors.first) { - spdlog::error("Cancelled task wrong function name"); - return cWrongErrorMessage; - } - if ("Abort test" != job_errors.second) { - spdlog::error("Cancelled task wrong error message"); - return cWrongErrorMessage; + // Cancel task from task + spider::Job abort_job = driver.start(&abort_test, 0); + abort_job.wait_complete(); + if (spider::JobStatus::Cancelled != abort_job.get_status()) { + spdlog::error("Abort job status is not cancelled"); + return cJobNotCancelled; + } + std::pair const job_errors = abort_job.get_error(); + if ("abort_test" != job_errors.first) { + spdlog::error("Cancelled task wrong function name"); + return cWrongErrorMessage; + } + if ("Abort test" != job_errors.second) { + spdlog::error("Cancelled task wrong error message"); + return cWrongErrorMessage; + } + } catch (std::exception& e) { + spdlog::error("Exception: {}", e.what()); + return cException; } return 0; From 9a33bbdece8c7dda9e7d889df036a99d8a0cb20e Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 17:02:34 -0400 Subject: [PATCH 35/64] Do not add error message when cancel job from user --- src/spider/client/Job.hpp | 3 +-- src/spider/storage/MetadataStorage.hpp | 6 +----- src/spider/storage/mysql/MySqlStorage.cpp | 17 ++--------------- src/spider/storage/mysql/MySqlStorage.hpp | 3 +-- tests/storage/test-MetadataStorage.cpp | 9 +-------- 5 files changed, 6 insertions(+), 32 deletions(-) diff --git a/src/spider/client/Job.hpp b/src/spider/client/Job.hpp index be6e0c0c..35497ad1 100644 --- a/src/spider/client/Job.hpp +++ b/src/spider/client/Job.hpp @@ -90,8 +90,7 @@ class Job { } auto conn = std::move(std::get>(conn_result)); - core::StorageErr const err - = m_metadata_storage->cancel_job(*conn, m_id, "Job cancelled by user"); + core::StorageErr const err = m_metadata_storage->cancel_job(*conn, m_id); if (!err.success()) { throw ConnectionException(err.description); } diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 570dcb26..5ec2317e 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -77,13 +77,9 @@ class MetadataStorage { * finished or started to CANCEL. * @param conn * @param id The job id. - * @param message The error message of the cancellation. * @return The error code. */ - virtual auto - cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) - -> StorageErr - = 0; + virtual auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; /** * Cancel a job that owns the task. This will set the job state to CANCEL and set all tasks * that have not finished or started to CANCEL. diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index be220bf9..912749cb 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1053,11 +1053,8 @@ auto MySqlMetadataStorage::get_jobs_by_client_id( return StorageErr{}; } -auto MySqlMetadataStorage::cancel_job( - StorageConnection& conn, - boost::uuids::uuid const id, - std::string const& message -) -> StorageErr { +auto MySqlMetadataStorage::cancel_job(StorageConnection& conn, boost::uuids::uuid const id) + -> StorageErr { try { // Set all pending/ready/running tasks from the job to cancelled std::unique_ptr task_statement( @@ -1077,16 +1074,6 @@ auto MySqlMetadataStorage::cancel_job( ); job_statement->setBytes(1, &id_bytes); job_statement->executeUpdate(); - // Set the cancel message - std::unique_ptr message_statement( - static_cast(conn)->prepareStatement( - "INSERT INTO `job_errors` (`job_id`, `func_name`, `message`) VALUES (?, " - "'', ?) " - ) - ); - message_statement->setBytes(1, &id_bytes); - message_statement->setString(2, message); - message_statement->executeUpdate(); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); return StorageErr{StorageErrType::OtherErr, e.what()}; diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index abe7b898..81b154a2 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -71,8 +71,7 @@ class MySqlMetadataStorage : public MetadataStorage { boost::uuids::uuid client_id, std::vector* job_ids ) -> StorageErr override; - auto cancel_job(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) - -> StorageErr override; + auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; auto cancel_job_by_task(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr override; diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index 5cd77c03..d3aa537f 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -497,18 +497,11 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT .success()); // Cancel job should succeed - std::string const error_message = "test error message"; - REQUIRE(storage->cancel_job(*conn, job_id, error_message).success()); + REQUIRE(storage->cancel_job(*conn, job_id).success()); // Job status should be cancelled spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); - // Job error message should be set - std::string error_task_res{"random"}; - std::string error_message_res; - REQUIRE(storage->get_job_message(*conn, job_id, &error_task_res, &error_message_res).success()); - REQUIRE(error_task_res.empty()); - REQUIRE(error_message == error_message_res); // Parent 1 state should be success spider::core::TaskState task_state = spider::core::TaskState::Running; From 17dd4df977887f23c2ddf5ece8751983fc0c91ec Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 12 May 2025 19:54:21 -0400 Subject: [PATCH 36/64] Add cancel inside ExecutorHandle --- src/spider/worker/ExecutorHandle.cpp | 7 +++++++ src/spider/worker/ExecutorHandle.hpp | 1 + src/spider/worker/worker.cpp | 3 +-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp index cfa5da67..ab98db8a 100644 --- a/src/spider/worker/ExecutorHandle.cpp +++ b/src/spider/worker/ExecutorHandle.cpp @@ -21,6 +21,13 @@ auto ExecutorHandle::get_executor() -> TaskExecutor* { return m_executor; } +auto ExecutorHandle::executor_cancel() -> void { + std::lock_guard const lock_guard{m_mutex}; + if (nullptr != m_executor) { + m_executor->cancel(); + } +} + auto ExecutorHandle::set(boost::uuids::uuid const task_id, TaskExecutor* executor) -> void { std::lock_guard const lock_guard{m_mutex}; m_task_id = task_id; diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index df83de8d..e0dc2d18 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -17,6 +17,7 @@ class ExecutorHandle { public: [[nodiscard]] auto get_task_id() -> std::optional; [[nodiscard]] auto get_executor() -> TaskExecutor*; + auto executor_cancel() -> void; auto set(boost::uuids::uuid task_id, TaskExecutor* executor) -> void; auto clear() -> void; diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 0c1f2227..e1db2718 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -154,8 +154,7 @@ auto check_task_cancel( } // Cancel the task. - spider::worker::TaskExecutor* executor = executor_handle.get_executor(); - executor->cancel(); + executor_handle.executor_cancel(); } auto heartbeat_loop( From 670864cc3c5fd50e253e7a36d0c9d047ba369c56 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:24:41 -0400 Subject: [PATCH 37/64] Rename get_job_message to get_error_message --- src/spider/client/Job.hpp | 4 ++-- src/spider/storage/MetadataStorage.hpp | 2 +- src/spider/storage/mysql/MySqlStorage.cpp | 2 +- src/spider/storage/mysql/MySqlStorage.hpp | 2 +- tests/storage/test-MetadataStorage.cpp | 4 +++- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/spider/client/Job.hpp b/src/spider/client/Job.hpp index 1a149552..50a86cae 100644 --- a/src/spider/client/Job.hpp +++ b/src/spider/client/Job.hpp @@ -177,7 +177,7 @@ class Job { std::pair res; core::StorageErr const err - = m_metadata_storage->get_job_message(*conn, m_id, &res.first, &res.second); + = m_metadata_storage->get_error_message(*conn, m_id, &res.first, &res.second); if (false == err.success()) { throw ConnectionException{err.description}; } @@ -186,7 +186,7 @@ class Job { std::pair res; core::StorageErr const err - = m_metadata_storage->get_job_message(*m_conn, m_id, &res.first, &res.second); + = m_metadata_storage->get_error_message(*m_conn, m_id, &res.first, &res.second); if (false == err.success()) { throw ConnectionException{err.description}; } diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 5d1053d4..1b9cd818 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -103,7 +103,7 @@ class MetadataStorage { * @param message The error message of the cancellation. * @return The error code. */ - virtual auto get_job_message( + virtual auto get_error_message( StorageConnection& conn, boost::uuids::uuid id, std::string* function_name, diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index a61ee82c..6d8d626f 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1164,7 +1164,7 @@ auto MySqlMetadataStorage::cancel_job_by_task( return StorageErr{}; } -auto MySqlMetadataStorage::get_job_message( +auto MySqlMetadataStorage::get_error_message( StorageConnection& conn, boost::uuids::uuid const id, std::string* function_name, diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index c5903b0c..f5eb6426 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -77,7 +77,7 @@ class MySqlMetadataStorage : public MetadataStorage { auto cancel_job_by_task(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr override; - auto get_job_message( + auto get_error_message( StorageConnection& conn, boost::uuids::uuid id, std::string* function_name, diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index 9c5f2d78..fec43519 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -587,7 +587,9 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage // Job error message should be set std::string error_task_res; std::string error_message_res; - REQUIRE(storage->get_job_message(*conn, job_id, &error_task_res, &error_message_res).success()); + REQUIRE( + storage->get_error_message(*conn, job_id, &error_task_res, &error_message_res).success() + ); REQUIRE("p2" == error_task_res); REQUIRE(error_message == error_message_res); From f500c1ada4948f10ccab90c451c5f58129d94f84 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:39:00 -0400 Subject: [PATCH 38/64] Rename TaskExecutor is_* functions --- src/spider/worker/TaskExecutor.cpp | 10 +++++----- src/spider/worker/TaskExecutor.hpp | 10 +++++----- src/spider/worker/worker.cpp | 4 ++-- tests/worker/test-TaskExecutor.cpp | 10 +++++----- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/spider/worker/TaskExecutor.cpp b/src/spider/worker/TaskExecutor.cpp index 24010e8b..c05350af 100644 --- a/src/spider/worker/TaskExecutor.cpp +++ b/src/spider/worker/TaskExecutor.cpp @@ -21,28 +21,28 @@ auto TaskExecutor::get_pid() const -> pid_t { return m_process->get_pid(); } -auto TaskExecutor::is_completed() -> bool { +auto TaskExecutor::completed() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Succeed == m_state || TaskExecutorState::Error == m_state || TaskExecutorState::Cancelled == m_state; } -auto TaskExecutor::is_waiting() -> bool { +auto TaskExecutor::waiting() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Waiting == m_state; } -auto TaskExecutor::is_succeeded() -> bool { +auto TaskExecutor::succeeded() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Succeed == m_state; } -auto TaskExecutor::is_error() -> bool { +auto TaskExecutor::errored() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Error == m_state; } -auto TaskExecutor::is_cancelled() -> bool { +auto TaskExecutor::cancelled() -> bool { std::lock_guard const lock(m_state_mutex); return TaskExecutorState::Cancelled == m_state; } diff --git a/src/spider/worker/TaskExecutor.hpp b/src/spider/worker/TaskExecutor.hpp index 7cd402d2..7c60627d 100644 --- a/src/spider/worker/TaskExecutor.hpp +++ b/src/spider/worker/TaskExecutor.hpp @@ -155,11 +155,11 @@ class TaskExecutor { */ [[nodiscard]] auto get_pid() const -> pid_t; - auto is_completed() -> bool; - auto is_waiting() -> bool; - auto is_succeeded() -> bool; - auto is_error() -> bool; - auto is_cancelled() -> bool; + auto completed() -> bool; + auto waiting() -> bool; + auto succeeded() -> bool; + auto errored() -> bool; + auto cancelled() -> bool; void wait(); diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 6b6627d7..bcdade89 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -323,7 +323,7 @@ auto handle_executor_result( } auto conn = std::move(std::get>(conn_result)); - if (!executor.is_succeeded()) { + if (!executor.succeeded()) { spdlog::warn("Task {} failed", task.get_function_name()); metadata_store->task_fail( *conn, @@ -443,7 +443,7 @@ auto task_loop( executor_handle.clear(); spider::core::ChildPid::set_pid(0); - if (executor.is_cancelled()) { + if (executor.cancelled()) { // If task is cancelled by user or other tasks, the states have been updated in the // storage, no need to do anything. // If task is cancelled by calling `TaskContext::abort`, the storage has also been diff --git a/tests/worker/test-TaskExecutor.cpp b/tests/worker/test-TaskExecutor.cpp index 0acda983..4245a544 100644 --- a/tests/worker/test-TaskExecutor.cpp +++ b/tests/worker/test-TaskExecutor.cpp @@ -88,7 +88,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.is_succeeded()); + REQUIRE(executor.succeeded()); std::optional const result_option = executor.get_result(); REQUIRE(result_option.has_value()); REQUIRE(5 == result_option.value_or(0)); @@ -119,7 +119,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.is_error()); + REQUIRE(executor.errored()); std::tuple error = executor.get_error(); REQUIRE(spider::core::FunctionInvokeError::WrongNumberOfArguments == std::get<0>(error)); } @@ -149,7 +149,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.is_error()); + REQUIRE(executor.errored()); std::tuple error = executor.get_error(); REQUIRE(spider::core::FunctionInvokeError::FunctionExecutionError == std::get<0>(error)); } @@ -211,7 +211,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.is_succeeded()); + REQUIRE(executor.succeeded()); std::optional const optional_result = executor.get_result(); REQUIRE(optional_result.has_value()); if (optional_result.has_value()) { @@ -255,7 +255,7 @@ TEMPLATE_LIST_TEST_CASE( }; context.run(); executor.wait(); - REQUIRE(executor.is_succeeded()); + REQUIRE(executor.succeeded()); std::optional const result_option = executor.get_result(); REQUIRE(result_option.has_value()); REQUIRE(input_1 + input_2 == result_option.value_or("")); From bef31da88ff8c872ec5ca0f4aa413eae3b846fbc Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:45:32 -0400 Subject: [PATCH 39/64] Fix comment grammer --- src/spider/worker/worker.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index bcdade89..e436dbec 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -444,10 +444,10 @@ auto task_loop( spider::core::ChildPid::set_pid(0); if (executor.cancelled()) { - // If task is cancelled by user or other tasks, the states have been updated in the - // storage, no need to do anything. - // If task is cancelled by calling `TaskContext::abort`, the storage has also been - // updated, so we also don't need to do anything. + // If the task is cancelled by the user or other tasks, the states have already been + // updated in the storage, so there's no need to do anything. + // If the task is cancelled by calling `TaskContext::abort`, the storage has also been + // updated, so again, no further action is needed. spdlog::debug("Task {} was cancelled", task.get_function_name()); continue; } From 216913e80de633152a029faec65f82bd85b1d6cc Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:46:37 -0400 Subject: [PATCH 40/64] Remove unnecessary comment --- src/spider/worker/worker.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index e436dbec..9ccfca8b 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -141,7 +141,6 @@ auto check_task_cancel( } boost::uuids::uuid const task_id = optional_task_id.value(); - // Check if the task is cancelled spider::core::TaskState task_state = spider::core::TaskState::Running; spider::core::StorageErr err = metadata_store->get_task_state(*conn, task_id, &task_state); if (false == err.success()) { From 594f32ef8129500495cb90ac9761ab801807d3b2 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:52:05 -0400 Subject: [PATCH 41/64] Improve ExeuctorHandle docstring --- src/spider/worker/ExecutorHandle.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index e0dc2d18..fb86b185 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -10,8 +10,9 @@ namespace spider::worker { /** - * Provides a thread-safe access to the task executor and other task related variables across - * threads. + * This class acts as a handle for thread-safe access to the task executor and task id. + * It maintains a weak reference to the task executor to prevent multiple destructor calls and + * ensures that access remains valid only while the executor itself is valid. */ class ExecutorHandle { public: @@ -22,7 +23,7 @@ class ExecutorHandle { auto clear() -> void; private: - boost::uuids::uuid m_task_id; // The task id is only valid if there is an executor. + boost::uuids::uuid m_task_id; TaskExecutor* m_executor = nullptr; // Do not use std::shared_ptr to avoid calling destructor twice. std::mutex m_mutex; From 623ee0e5c61fb3a5316b587505f6211012ba8a5d Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:54:05 -0400 Subject: [PATCH 42/64] Fix relative include header --- tests/client/cancel-test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index dada8113..ba12458f 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -13,9 +13,9 @@ #include // IWYU pragma: keep #include -#include "../../src/spider/client/Driver.hpp" -#include "../../src/spider/client/Job.hpp" -#include "../worker/worker-test.hpp" +#include +#include +#include namespace { auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { From 5b8ab0133cb5b184b31094a544b72a6383b6fc99 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:55:40 -0400 Subject: [PATCH 43/64] Fix library include --- tests/client/cancel-test.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index ba12458f..cebaf84f 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -4,13 +4,8 @@ #include #include -#include -#include -#include -#include -#include -#include -#include // IWYU pragma: keep +#include +#include #include #include @@ -29,7 +24,6 @@ auto parse_args(int const argc, char** argv) -> boost::program_options::variable boost::program_options::variables_map variables; boost::program_options::store( - // NOLINTNEXTLINE(misc-include-cleaner) boost::program_options::parse_command_line(argc, argv, desc), variables ); From cd55a1e623fa48d4f714028578eee9cfcd81a024 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:58:03 -0400 Subject: [PATCH 44/64] Improve integration test docstring --- tests/integration/test_cancel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_cancel.py b/tests/integration/test_cancel.py index 41cc5139..01d428b5 100644 --- a/tests/integration/test_cancel.py +++ b/tests/integration/test_cancel.py @@ -71,7 +71,8 @@ def scheduler_worker(storage): class TestCancel: # Test that the task can be cancelled by user and from the task. - # Execute the cancel_test client. + # Execute the cancel_test client, which includes cancelling a running task + # and executing a task that cancels itself. def test_task_cancel(self, scheduler_worker): dir_path = Path(__file__).resolve().parent dir_path = dir_path / ".." From 62976a24fd8a90ebaa46e7890e8466855b30b968 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 15:59:39 -0400 Subject: [PATCH 45/64] Remove unnecessary comment --- tests/client/cancel-test.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index cebaf84f..bb845aa4 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -64,12 +64,11 @@ auto main(int argc, char** argv) -> int { spider::Driver driver{storage_url}; spdlog::debug("Driver created"); - // Cancel task from user spider::Job sleep_job = driver.start(&sleep_test, 3); - // Wait for the task to run + std::this_thread::sleep_for(std::chrono::seconds(1)); sleep_job.cancel(); - // Check for job status + sleep_job.wait_complete(); if (spider::JobStatus::Cancelled != sleep_job.get_status()) { spdlog::error("Sleep job status is not cancelled"); From 89b2813c42d386a048b5ad28efd5ca96fd616304 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 16:00:37 -0400 Subject: [PATCH 46/64] Remove library header nolint --- tests/client/cancel-test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index bb845aa4..1de3d631 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -38,7 +38,6 @@ constexpr int cException = 4; } // namespace auto main(int argc, char** argv) -> int { - // NOLINTNEXTLINE(misc-include-cleaner) spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] [spider.client] %v"); #ifndef NDEBUG spdlog::set_level(spdlog::level::trace); From c0f533f67178756e14de4c526af70bd943bfd1f5 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 16:03:04 -0400 Subject: [PATCH 47/64] Remove unncecessary comments --- tests/storage/test-MetadataStorage.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index fec43519..49eea8ec 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -462,7 +462,6 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT boost::uuids::random_generator gen; boost::uuids::uuid const job_id = gen(); - // Create a complicated task graph spider::core::Task child_task{"child"}; spider::core::Task parent_1{"p1"}; spider::core::Task parent_2{"p2"}; @@ -479,7 +478,6 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT parent_2.set_max_retries(1); child_task.set_max_retries(1); spider::core::TaskGraph graph; - // Add task and dependencies to task graph in wrong order graph.add_task(child_task); graph.add_task(parent_1); graph.add_task(parent_2); @@ -488,10 +486,8 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT graph.add_input_task(parent_1.get_id()); graph.add_input_task(parent_2.get_id()); graph.add_output_task(child_task.get_id()); - // Submit job should success REQUIRE(storage->add_job(*conn, job_id, gen(), graph).success()); - // Task finish for parent 1 should succeed spider::core::TaskInstance const parent_1_instance{gen(), parent_1.get_id()}; REQUIRE(storage->set_task_state(*conn, parent_1.get_id(), spider::core::TaskState::Running) .success()); @@ -502,24 +498,19 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT ) .success()); - // Cancel job should succeed REQUIRE(storage->cancel_job(*conn, job_id).success()); - // Job status should be cancelled spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); - // Parent 1 state should be success spider::core::TaskState task_state = spider::core::TaskState::Running; REQUIRE(storage->get_task_state(*conn, parent_1.get_id(), &task_state).success()); REQUIRE(spider::core::TaskState::Succeed == task_state); - // Parent 2 and child states should be cancelled REQUIRE(storage->get_task_state(*conn, parent_2.get_id(), &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); REQUIRE(storage->get_task_state(*conn, child_task.get_id(), &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); - // Clean up REQUIRE(storage->remove_job(*conn, job_id).success()); } @@ -537,7 +528,6 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage boost::uuids::random_generator gen; boost::uuids::uuid const job_id = gen(); - // Create a complicated task graph spider::core::Task child_task{"child"}; spider::core::Task parent_1{"p1"}; spider::core::Task parent_2{"p2"}; @@ -554,7 +544,6 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage parent_2.set_max_retries(1); child_task.set_max_retries(1); spider::core::TaskGraph graph; - // Add task and dependencies to task graph in wrong order graph.add_task(child_task); graph.add_task(parent_1); graph.add_task(parent_2); @@ -563,10 +552,8 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage graph.add_input_task(parent_1.get_id()); graph.add_input_task(parent_2.get_id()); graph.add_output_task(child_task.get_id()); - // Submit job should success REQUIRE(storage->add_job(*conn, job_id, gen(), graph).success()); - // Task finish for parent 1 should succeed spider::core::TaskInstance const parent_1_instance{gen(), parent_1.get_id()}; REQUIRE(storage->set_task_state(*conn, parent_1.get_id(), spider::core::TaskState::Running) .success()); @@ -577,14 +564,11 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage ) .success()); - // Cancel job by task should succeed std::string const error_message = "test error message"; REQUIRE(storage->cancel_job_by_task(*conn, parent_2.get_id(), error_message).success()); - // Job status should be cancelled spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); - // Job error message should be set std::string error_task_res; std::string error_message_res; REQUIRE( @@ -593,17 +577,14 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage REQUIRE("p2" == error_task_res); REQUIRE(error_message == error_message_res); - // Parent 1 state should be success spider::core::TaskState task_state = spider::core::TaskState::Running; REQUIRE(storage->get_task_state(*conn, parent_1.get_id(), &task_state).success()); REQUIRE(spider::core::TaskState::Succeed == task_state); - // Parent 2 and child states should be cancelled REQUIRE(storage->get_task_state(*conn, parent_2.get_id(), &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); REQUIRE(storage->get_task_state(*conn, child_task.get_id(), &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); - // Clean up REQUIRE(storage->remove_job(*conn, job_id).success()); } From e2a2ea69ca357706f7452bfe5370e2b0ee971bd8 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 16:20:14 -0400 Subject: [PATCH 48/64] Refactor job cancel unit test setup into a separate function --- tests/storage/test-MetadataStorage.cpp | 91 ++++++++++---------------- 1 file changed, 35 insertions(+), 56 deletions(-) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index 49eea8ec..904aa5d8 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -448,17 +448,17 @@ TEMPLATE_LIST_TEST_CASE("Task finish", "[storage]", spider::test::StorageFactory REQUIRE(storage->remove_job(*conn, job_id).success()); } -TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryTypeList) { - std::unique_ptr storage_factory - = spider::test::create_storage_factory(); - std::unique_ptr storage - = storage_factory->provide_metadata_storage(); - - std::variant, spider::core::StorageErr> - conn_result = storage_factory->provide_storage_connection(); - REQUIRE(std::holds_alternative>(conn_result)); - auto conn = std::move(std::get>(conn_result)); - +/** + * Create a common job cancel test setup. Create a job with a task graph and set parent_1 to + * succeed. + * @param storage + * @param conn + * @return A tuple containing the job_id, parent_1_id, parent_2_id, and child_task_id. + */ +auto job_cancel_setup( + std::unique_ptr& storage, + std::unique_ptr& conn +) -> std::tuple { boost::uuids::random_generator gen; boost::uuids::uuid const job_id = gen(); @@ -498,17 +498,33 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT ) .success()); + return std::make_tuple(job_id, parent_1.get_id(), parent_2.get_id(), child_task.get_id()); +} + +TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryTypeList) { + std::unique_ptr storage_factory + = spider::test::create_storage_factory(); + std::unique_ptr storage + = storage_factory->provide_metadata_storage(); + + std::variant, spider::core::StorageErr> + conn_result = storage_factory->provide_storage_connection(); + REQUIRE(std::holds_alternative>(conn_result)); + auto conn = std::move(std::get>(conn_result)); + + auto [job_id, parent_1_id, parent_2_id, child_id] = job_cancel_setup(storage, conn); + REQUIRE(storage->cancel_job(*conn, job_id).success()); spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); spider::core::TaskState task_state = spider::core::TaskState::Running; - REQUIRE(storage->get_task_state(*conn, parent_1.get_id(), &task_state).success()); + REQUIRE(storage->get_task_state(*conn, parent_1_id, &task_state).success()); REQUIRE(spider::core::TaskState::Succeed == task_state); - REQUIRE(storage->get_task_state(*conn, parent_2.get_id(), &task_state).success()); + REQUIRE(storage->get_task_state(*conn, parent_2_id, &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); - REQUIRE(storage->get_task_state(*conn, child_task.get_id(), &task_state).success()); + REQUIRE(storage->get_task_state(*conn, child_id, &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); REQUIRE(storage->remove_job(*conn, job_id).success()); @@ -525,47 +541,10 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage REQUIRE(std::holds_alternative>(conn_result)); auto conn = std::move(std::get>(conn_result)); - boost::uuids::random_generator gen; - boost::uuids::uuid const job_id = gen(); - - spider::core::Task child_task{"child"}; - spider::core::Task parent_1{"p1"}; - spider::core::Task parent_2{"p2"}; - parent_1.add_input(spider::core::TaskInput{"1", "float"}); - parent_1.add_input(spider::core::TaskInput{"2", "float"}); - parent_2.add_input(spider::core::TaskInput{"3", "int"}); - parent_2.add_input(spider::core::TaskInput{"4", "int"}); - parent_1.add_output(spider::core::TaskOutput{"float"}); - parent_2.add_output(spider::core::TaskOutput{"int"}); - child_task.add_input(spider::core::TaskInput{parent_1.get_id(), 0, "float"}); - child_task.add_input(spider::core::TaskInput{parent_2.get_id(), 0, "int"}); - child_task.add_output(spider::core::TaskOutput{"float"}); - parent_1.set_max_retries(1); - parent_2.set_max_retries(1); - child_task.set_max_retries(1); - spider::core::TaskGraph graph; - graph.add_task(child_task); - graph.add_task(parent_1); - graph.add_task(parent_2); - graph.add_dependency(parent_2.get_id(), child_task.get_id()); - graph.add_dependency(parent_1.get_id(), child_task.get_id()); - graph.add_input_task(parent_1.get_id()); - graph.add_input_task(parent_2.get_id()); - graph.add_output_task(child_task.get_id()); - REQUIRE(storage->add_job(*conn, job_id, gen(), graph).success()); - - spider::core::TaskInstance const parent_1_instance{gen(), parent_1.get_id()}; - REQUIRE(storage->set_task_state(*conn, parent_1.get_id(), spider::core::TaskState::Running) - .success()); - REQUIRE(storage->task_finish( - *conn, - parent_1_instance, - {spider::core::TaskOutput{"1.1", "float"}} - ) - .success()); + auto [job_id, parent_1_id, parent_2_id, child_id] = job_cancel_setup(storage, conn); std::string const error_message = "test error message"; - REQUIRE(storage->cancel_job_by_task(*conn, parent_2.get_id(), error_message).success()); + REQUIRE(storage->cancel_job_by_task(*conn, parent_2_id, error_message).success()); spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); @@ -578,11 +557,11 @@ TEMPLATE_LIST_TEST_CASE("Job cancel by task", "[storage]", spider::test::Storage REQUIRE(error_message == error_message_res); spider::core::TaskState task_state = spider::core::TaskState::Running; - REQUIRE(storage->get_task_state(*conn, parent_1.get_id(), &task_state).success()); + REQUIRE(storage->get_task_state(*conn, parent_1_id, &task_state).success()); REQUIRE(spider::core::TaskState::Succeed == task_state); - REQUIRE(storage->get_task_state(*conn, parent_2.get_id(), &task_state).success()); + REQUIRE(storage->get_task_state(*conn, parent_2_id, &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); - REQUIRE(storage->get_task_state(*conn, child_task.get_id(), &task_state).success()); + REQUIRE(storage->get_task_state(*conn, child_id, &task_state).success()); REQUIRE(spider::core::TaskState::Canceled == task_state); REQUIRE(storage->remove_job(*conn, job_id).success()); From f7b36bb17538d05bc6d2e262316b679cf1b87d45 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 16:36:11 -0400 Subject: [PATCH 49/64] Fix clang tidy --- tests/storage/test-MetadataStorage.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index 904aa5d8..ff38a233 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include From 1d20f50d3c1a3e554d99c060a0ee19cfde3f05a1 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 29 May 2025 16:54:28 -0400 Subject: [PATCH 50/64] Fix clang-tidy --- tests/client/cancel-test.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index 1de3d631..e84df235 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -4,8 +4,13 @@ #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include // IWYU pragma: keep #include #include From 736890078d862085caf383fb08f5955e166151c1 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 2 Jun 2025 10:56:41 -0400 Subject: [PATCH 51/64] Fix clang tidy --- tests/client/cancel-test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index e84df235..72361e4f 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -29,6 +29,7 @@ auto parse_args(int const argc, char** argv) -> boost::program_options::variable boost::program_options::variables_map variables; boost::program_options::store( + // NOLINTNEXTLINE(misc-include-cleaner) boost::program_options::parse_command_line(argc, argv, desc), variables ); From 2f9d378ab546a258ec726eb66ff2e831c47b207e Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 00:19:51 -0400 Subject: [PATCH 52/64] Apply suggestions from code review Co-authored-by: davidlion --- src/spider/worker/ExecutorHandle.hpp | 6 ++++-- src/spider/worker/worker.cpp | 1 - tests/worker/worker-test.hpp | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index fb86b185..ce2af5de 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -24,8 +24,10 @@ class ExecutorHandle { private: boost::uuids::uuid m_task_id; - TaskExecutor* m_executor - = nullptr; // Do not use std::shared_ptr to avoid calling destructor twice. + + // Do not use std::shared_ptr to avoid calling destructor twice. + TaskExecutor* m_executor = nullptr; + std::mutex m_mutex; }; } // namespace spider::worker diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 9ccfca8b..f4b24b1a 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -184,7 +184,6 @@ auto heartbeat_loop( check_task_cancel(conn, metadata_store, executor_handle); spdlog::debug("Updating heartbeat"); - spider::core::StorageErr const err = metadata_store->update_heartbeat(*conn, driver.get_id()); if (!err.success()) { diff --git a/tests/worker/worker-test.hpp b/tests/worker/worker-test.hpp index 9408ac6f..d50f03f6 100644 --- a/tests/worker/worker-test.hpp +++ b/tests/worker/worker-test.hpp @@ -29,6 +29,6 @@ auto join_string_test( auto sleep_test(spider::TaskContext& context, int milliseconds) -> int; -auto abort_test(spider::TaskContext& context, int /*x*/) -> int; +auto abort_test(spider::TaskContext& context, int x) -> int; #endif From 8ebdfd1d3981b1e934d4e8bc2db59eeb18994b3b Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 00:27:42 -0400 Subject: [PATCH 53/64] Remove unused function --- src/spider/worker/ExecutorHandle.cpp | 5 ----- src/spider/worker/ExecutorHandle.hpp | 1 - 2 files changed, 6 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp index ab98db8a..6041a49c 100644 --- a/src/spider/worker/ExecutorHandle.cpp +++ b/src/spider/worker/ExecutorHandle.cpp @@ -16,11 +16,6 @@ auto ExecutorHandle::get_task_id() -> std::optional { return std::nullopt; } -auto ExecutorHandle::get_executor() -> TaskExecutor* { - std::lock_guard const lock_guard{m_mutex}; - return m_executor; -} - auto ExecutorHandle::executor_cancel() -> void { std::lock_guard const lock_guard{m_mutex}; if (nullptr != m_executor) { diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index ce2af5de..826136bd 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -17,7 +17,6 @@ namespace spider::worker { class ExecutorHandle { public: [[nodiscard]] auto get_task_id() -> std::optional; - [[nodiscard]] auto get_executor() -> TaskExecutor*; auto executor_cancel() -> void; auto set(boost::uuids::uuid task_id, TaskExecutor* executor) -> void; auto clear() -> void; From 10bd8ff6c107fb345aac229dc6fdd4bf219ede10 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 15:55:42 -0400 Subject: [PATCH 54/64] Add error message for cancel test by user --- src/spider/client/Job.hpp | 11 +++++++---- src/spider/storage/MetadataStorage.hpp | 14 +++++++++----- src/spider/storage/mysql/MySqlStorage.cpp | 18 ++++++++++++++++-- src/spider/storage/mysql/MySqlStorage.hpp | 4 +++- tests/storage/test-MetadataStorage.cpp | 12 ++++++++++-- 5 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/spider/client/Job.hpp b/src/spider/client/Job.hpp index 50a86cae..a3bf4526 100644 --- a/src/spider/client/Job.hpp +++ b/src/spider/client/Job.hpp @@ -92,7 +92,8 @@ class Job { } auto conn = std::move(std::get>(conn_result)); - core::StorageErr const err = m_metadata_storage->cancel_job(*conn, m_id); + core::StorageErr const err + = m_metadata_storage->cancel_job_by_user(*conn, m_id, "Job cancelled by user."); if (!err.success()) { throw ConnectionException(err.description); } @@ -158,12 +159,14 @@ class Job { } /** - * NOTE: It is undefined behavior to call this method for a job that is not in the `Failed` + * NOTE: It is undefined behavior to call this method for a job that is not in the `Cancelled` * state. * * @return A pair: - * - the name of the task function that failed. - * - the error message sent from the task through `TaskContext::abort` or from Spider. + * - the name of the task function that called `TaskContext::abort`, or "user" if job is + * cancelled by calling `Job::cancel`. + * - the error message sent from the task through `TaskContext::abort` or "Job cancelled by + * user." if job is cancelled through `Job::cancel`. * @throw spider::ConnectionException */ auto get_error() -> std::pair { diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 1b9cd818..86c8bf48 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -76,16 +76,20 @@ class MetadataStorage { ) -> StorageErr = 0; /** - * Cancel a job. This will set the job state to CANCEL and set all tasks that have not - * finished or started to CANCEL. + * Cancel a job by user. Set the job state to CANCEL and set all tasks that have not finished + * or started to CANCEL. * @param conn * @param id The job id. + * @param message The error message of the cancellation. * @return The error code. */ - virtual auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr = 0; + virtual auto + cancel_job_by_user(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) + -> StorageErr + = 0; /** - * Cancel a job that owns the task. This will set the job state to CANCEL and set all tasks - * that have not finished or started to CANCEL. + * Cancel a job that owns the task. Set the job state to CANCEL and set all tasks that have not + * finished or started to CANCEL. * @param conn * @param id The task id. * @param message The error message of the cancellation. diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index 6d8d626f..f56378a2 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1076,8 +1076,11 @@ auto MySqlMetadataStorage::get_jobs_by_client_id( return StorageErr{}; } -auto MySqlMetadataStorage::cancel_job(StorageConnection& conn, boost::uuids::uuid const id) - -> StorageErr { +auto MySqlMetadataStorage::cancel_job_by_user( + StorageConnection& conn, + boost::uuids::uuid const id, + std::string const& message +) -> StorageErr { try { // Set all pending/ready/running tasks from the job to cancelled std::unique_ptr task_statement( @@ -1097,6 +1100,17 @@ auto MySqlMetadataStorage::cancel_job(StorageConnection& conn, boost::uuids::uui ); job_statement->setBytes(1, &id_bytes); job_statement->executeUpdate(); + // Set the cancel message + std::unique_ptr message_statement( + static_cast(conn)->prepareStatement( + "INSERT INTO `job_errors` (`job_id`, `func_name`, `message`) VALUES (?, ?, " + "?) " + ) + ); + message_statement->setBytes(1, &id_bytes); + message_statement->setString(2, "user"); + message_statement->setString(3, message); + message_statement->executeUpdate(); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); return StorageErr{StorageErrType::OtherErr, e.what()}; diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index f5eb6426..bae2fffb 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -73,7 +73,9 @@ class MySqlMetadataStorage : public MetadataStorage { boost::uuids::uuid client_id, std::vector* job_ids ) -> StorageErr override; - auto cancel_job(StorageConnection& conn, boost::uuids::uuid id) -> StorageErr override; + auto + cancel_job_by_user(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) + -> StorageErr override; auto cancel_job_by_task(StorageConnection& conn, boost::uuids::uuid id, std::string const& message) -> StorageErr override; diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index ff38a233..e4459674 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -502,7 +502,7 @@ auto job_cancel_setup( return std::make_tuple(job_id, parent_1.get_id(), parent_2.get_id(), child_task.get_id()); } -TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryTypeList) { +TEMPLATE_LIST_TEST_CASE("Job cancel by user", "[storage]", spider::test::StorageFactoryTypeList) { std::unique_ptr storage_factory = spider::test::create_storage_factory(); std::unique_ptr storage @@ -515,10 +515,18 @@ TEMPLATE_LIST_TEST_CASE("Job cancel", "[storage]", spider::test::StorageFactoryT auto [job_id, parent_1_id, parent_2_id, child_id] = job_cancel_setup(storage, conn); - REQUIRE(storage->cancel_job(*conn, job_id).success()); + std::string const error_message = "Job cancelled by user."; + REQUIRE(storage->cancel_job_by_user(*conn, job_id, error_message).success()); spider::core::JobStatus job_status = spider::core::JobStatus::Running; REQUIRE(storage->get_job_status(*conn, job_id, &job_status).success()); REQUIRE(spider::core::JobStatus::Cancelled == job_status); + std::string error_task_res; + std::string error_message_res; + REQUIRE( + storage->get_error_message(*conn, job_id, &error_task_res, &error_message_res).success() + ); + REQUIRE("user" == error_task_res); + REQUIRE(error_message == error_message_res); spider::core::TaskState task_state = spider::core::TaskState::Running; REQUIRE(storage->get_task_state(*conn, parent_1_id, &task_state).success()); From a344f885df001752e33b77ed3447483c837b98d0 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 17:13:17 -0400 Subject: [PATCH 55/64] Refactor cancel integration test --- tests/client/cancel-test.cpp | 77 ++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/tests/client/cancel-test.cpp b/tests/client/cancel-test.cpp index 72361e4f..7207cbe1 100644 --- a/tests/client/cancel-test.cpp +++ b/tests/client/cancel-test.cpp @@ -41,6 +41,51 @@ constexpr int cCmdArgParseErr = 1; constexpr int cJobNotCancelled = 2; constexpr int cWrongErrorMessage = 3; constexpr int cException = 4; + +auto test_user_cancel(spider::Driver& driver) -> int { + spider::Job sleep_job = driver.start(&sleep_test, 3); + + std::this_thread::sleep_for(std::chrono::seconds(1)); + sleep_job.cancel(); + + sleep_job.wait_complete(); + if (spider::JobStatus::Cancelled != sleep_job.get_status()) { + spdlog::error("Sleep job status is not cancelled"); + return cJobNotCancelled; + } + + std::pair const job_errors = sleep_job.get_error(); + if ("user" != job_errors.first) { + spdlog::error("User job cancel wrong name"); + return cWrongErrorMessage; + } + if ("Job cancelled by user." != job_errors.second) { + spdlog::error("User job cancel wrong error message"); + return cWrongErrorMessage; + } + + return 0; +} + +auto test_task_cancel(spider::Driver& driver) -> int { + spider::Job abort_job = driver.start(&abort_test, 0); + abort_job.wait_complete(); + if (spider::JobStatus::Cancelled != abort_job.get_status()) { + spdlog::error("Abort job status is not cancelled"); + return cJobNotCancelled; + } + std::pair const job_errors = abort_job.get_error(); + if ("abort_test" != job_errors.first) { + spdlog::error("Cancelled task wrong function name"); + return cWrongErrorMessage; + } + if ("Abort test" != job_errors.second) { + spdlog::error("Cancelled task wrong error message"); + return cWrongErrorMessage; + } + + return 0; +} } // namespace auto main(int argc, char** argv) -> int { @@ -65,41 +110,21 @@ auto main(int argc, char** argv) -> int { } try { - // Create driver spider::Driver driver{storage_url}; spdlog::debug("Driver created"); - spider::Job sleep_job = driver.start(&sleep_test, 3); - - std::this_thread::sleep_for(std::chrono::seconds(1)); - sleep_job.cancel(); - - sleep_job.wait_complete(); - if (spider::JobStatus::Cancelled != sleep_job.get_status()) { - spdlog::error("Sleep job status is not cancelled"); - return cJobNotCancelled; + int result = test_user_cancel(driver); + if (0 != result) { + return result; } - // Cancel task from task - spider::Job abort_job = driver.start(&abort_test, 0); - abort_job.wait_complete(); - if (spider::JobStatus::Cancelled != abort_job.get_status()) { - spdlog::error("Abort job status is not cancelled"); - return cJobNotCancelled; - } - std::pair const job_errors = abort_job.get_error(); - if ("abort_test" != job_errors.first) { - spdlog::error("Cancelled task wrong function name"); - return cWrongErrorMessage; - } - if ("Abort test" != job_errors.second) { - spdlog::error("Cancelled task wrong error message"); - return cWrongErrorMessage; + result = test_task_cancel(driver); + if (0 != result) { + return result; } } catch (std::exception& e) { spdlog::error("Exception: {}", e.what()); return cException; } - return 0; } From 8672fd87b125e26caa3aa30f9b4654d75e278933 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 20:57:44 -0400 Subject: [PATCH 56/64] Use maybe_unused --- tests/worker/worker-test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/worker/worker-test.cpp b/tests/worker/worker-test.cpp index 191aefb4..21a96291 100644 --- a/tests/worker/worker-test.cpp +++ b/tests/worker/worker-test.cpp @@ -72,12 +72,12 @@ auto join_string_test( return input_1 + input_2; } -auto sleep_test(spider::TaskContext& /*context*/, int milliseconds) -> int { +auto sleep_test([[maybe_unused]] spider::TaskContext& context, int milliseconds) -> int { std::this_thread::sleep_for(std::chrono::milliseconds{milliseconds}); return milliseconds; } -auto abort_test(spider::TaskContext& context, int /*x*/) -> int { +auto abort_test(spider::TaskContext& context, [[maybe_unused]] int x) -> int { context.abort("Abort test"); return 0; } From 9fa08844a5ef12e40a9ba4765d5f38c3f981c507 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 23:06:51 -0400 Subject: [PATCH 57/64] Rename job_errors' func_name column to offender; Improvce docstring --- src/spider/storage/MetadataStorage.hpp | 12 +++++++----- src/spider/storage/mysql/MySqlStorage.cpp | 10 +++++----- src/spider/storage/mysql/MySqlStorage.hpp | 2 +- src/spider/storage/mysql/mysql_stmt.hpp | 2 +- tools/scripts/storage/init_db.sql | 2 +- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/spider/storage/MetadataStorage.hpp b/src/spider/storage/MetadataStorage.hpp index 86c8bf48..4fa33fa0 100644 --- a/src/spider/storage/MetadataStorage.hpp +++ b/src/spider/storage/MetadataStorage.hpp @@ -77,7 +77,7 @@ class MetadataStorage { = 0; /** * Cancel a job by user. Set the job state to CANCEL and set all tasks that have not finished - * or started to CANCEL. + * or started to CANCEL. Set the error message of the job and offender to "user". * @param conn * @param id The job id. * @param message The error message of the cancellation. @@ -88,8 +88,9 @@ class MetadataStorage { -> StorageErr = 0; /** - * Cancel a job that owns the task. Set the job state to CANCEL and set all tasks that have not - * finished or started to CANCEL. + * Cancel the job from the task. Set the job state to CANCEL and set all tasks that have not + * finished or started to CANCEL. Se the error message of the job and offender to the function + * name of the task. * @param conn * @param id The task id. * @param message The error message of the cancellation. @@ -103,14 +104,15 @@ class MetadataStorage { * Get the error message of a cancelled job. * @param conn * @param id The job id. - * @param function_name The function name of the cancelled task. + * @param offender The function name of the cancelling task if job cancelled by task, "user" if + * the job is cancelled by user. * @param message The error message of the cancellation. * @return The error code. */ virtual auto get_error_message( StorageConnection& conn, boost::uuids::uuid id, - std::string* function_name, + std::string* offender, std::string* message ) -> StorageErr = 0; diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index f56378a2..b1c84108 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1103,7 +1103,7 @@ auto MySqlMetadataStorage::cancel_job_by_user( // Set the cancel message std::unique_ptr message_statement( static_cast(conn)->prepareStatement( - "INSERT INTO `job_errors` (`job_id`, `func_name`, `message`) VALUES (?, ?, " + "INSERT INTO `job_errors` (`job_id`, `offender`, `message`) VALUES (?, ?, " "?) " ) ); @@ -1162,7 +1162,7 @@ auto MySqlMetadataStorage::cancel_job_by_task( // Set the cancel message std::unique_ptr message_statement( static_cast(conn)->prepareStatement( - "INSERT INTO `job_errors` (`job_id`, `func_name`, `message`) VALUES (?, ?, " + "INSERT INTO `job_errors` (`job_id`, `offender`, `message`) VALUES (?, ?, " "?) " ) ); @@ -1181,13 +1181,13 @@ auto MySqlMetadataStorage::cancel_job_by_task( auto MySqlMetadataStorage::get_error_message( StorageConnection& conn, boost::uuids::uuid const id, - std::string* function_name, + std::string* offender, std::string* message ) -> StorageErr { try { std::unique_ptr statement{ static_cast(conn)->prepareStatement( - "SELECT `func_name`, `message` FROM `job_errors` WHERE `job_id` = ?" + "SELECT `offender`, `message` FROM `job_errors` WHERE `job_id` = ?" ) }; sql::bytes id_bytes = uuid_get_bytes(id); @@ -1198,7 +1198,7 @@ auto MySqlMetadataStorage::get_error_message( return StorageErr{StorageErrType::KeyNotFoundErr, "No messages found"}; } res->next(); - *function_name = get_sql_string(res->getString("func_name")); + *offender = get_sql_string(res->getString("func_name")); *message = get_sql_string(res->getString("message")); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); diff --git a/src/spider/storage/mysql/MySqlStorage.hpp b/src/spider/storage/mysql/MySqlStorage.hpp index bae2fffb..0987b6b5 100644 --- a/src/spider/storage/mysql/MySqlStorage.hpp +++ b/src/spider/storage/mysql/MySqlStorage.hpp @@ -82,7 +82,7 @@ class MySqlMetadataStorage : public MetadataStorage { auto get_error_message( StorageConnection& conn, boost::uuids::uuid id, - std::string* function_name, + std::string* offender, std::string* message ) -> StorageErr override; auto remove_job(StorageConnection& conn, boost::uuids::uuid id) noexcept -> StorageErr override; diff --git a/src/spider/storage/mysql/mysql_stmt.hpp b/src/spider/storage/mysql/mysql_stmt.hpp index ffac77d1..5ab528bc 100644 --- a/src/spider/storage/mysql/mysql_stmt.hpp +++ b/src/spider/storage/mysql/mysql_stmt.hpp @@ -34,7 +34,7 @@ std::string const cCreateJobTable = R"(CREATE TABLE IF NOT EXISTS jobs ( std::string const cCreateJobErrorTable = R"(CREATE TABLE IF NOT EXISTS `job_errors` ( `job_id` BINARY(16) NOT NULL, - `func_name` VARCHAR(64) NOT NULL, + `offender` VARCHAR(64) NOT NULL, `message` VARCHAR(999) NOT NULL, CONSTRAINT `job_error_job_id` FOREIGN KEY (`job_id`) REFERENCES `jobs` (`id`) ON UPDATE NO ACTION ON DELETE CASCADE, PRIMARY KEY (`job_id`) diff --git a/tools/scripts/storage/init_db.sql b/tools/scripts/storage/init_db.sql index 4a97791b..3575f56d 100644 --- a/tools/scripts/storage/init_db.sql +++ b/tools/scripts/storage/init_db.sql @@ -25,7 +25,7 @@ CREATE TABLE IF NOT EXISTS jobs ); CREATE TABLE IF NOT EXISTS `job_errors` ( `job_id` BINARY(16) NOT NULL, - `func_name` VARCHAR(64) NOT NULL, + `offender` VARCHAR(64) NOT NULL, `message` VARCHAR(999) NOT NULL, CONSTRAINT `job_error_job_id` FOREIGN KEY (`job_id`) REFERENCES `jobs` (`id`) ON UPDATE NO ACTION ON DELETE CASCADE, PRIMARY KEY (`job_id`) From 573a74bc923b0d43c7fd1a71646ad2dd0e78b992 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 23:13:31 -0400 Subject: [PATCH 58/64] Improve docstring of job_cancel_setup in test --- tests/storage/test-MetadataStorage.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index e4459674..5b6e8360 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -450,8 +450,9 @@ TEMPLATE_LIST_TEST_CASE("Task finish", "[storage]", spider::test::StorageFactory } /** - * Create a common job cancel test setup. Create a job with a task graph and set parent_1 to - * succeed. + * Create a common job cancel test setup. Create a job with a task graph that consists of two + * parent tasks and one child task. Set the state of parent 1 to succeed. Parent 2 state remains + * ready and child state remains pending. * @param storage * @param conn * @return A tuple containing the job_id, parent_1_id, parent_2_id, and child_task_id. From 2afd0e21d7b1dafc3f7a15edbe2762aee92730d8 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 23:13:40 -0400 Subject: [PATCH 59/64] Bug fix --- src/spider/storage/mysql/MySqlStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/storage/mysql/MySqlStorage.cpp b/src/spider/storage/mysql/MySqlStorage.cpp index b1c84108..c8aaf227 100644 --- a/src/spider/storage/mysql/MySqlStorage.cpp +++ b/src/spider/storage/mysql/MySqlStorage.cpp @@ -1198,7 +1198,7 @@ auto MySqlMetadataStorage::get_error_message( return StorageErr{StorageErrType::KeyNotFoundErr, "No messages found"}; } res->next(); - *offender = get_sql_string(res->getString("func_name")); + *offender = get_sql_string(res->getString("offender")); *message = get_sql_string(res->getString("message")); } catch (sql::SQLException& e) { static_cast(conn)->rollback(); From 40e5fb52b9689af02aeb15ce7821ade0a1520d93 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 23:15:13 -0400 Subject: [PATCH 60/64] Rename function --- src/spider/worker/ExecutorHandle.cpp | 2 +- src/spider/worker/ExecutorHandle.hpp | 2 +- src/spider/worker/worker.cpp | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp index 6041a49c..25e995a0 100644 --- a/src/spider/worker/ExecutorHandle.cpp +++ b/src/spider/worker/ExecutorHandle.cpp @@ -16,7 +16,7 @@ auto ExecutorHandle::get_task_id() -> std::optional { return std::nullopt; } -auto ExecutorHandle::executor_cancel() -> void { +auto ExecutorHandle::cancel_executor() -> void { std::lock_guard const lock_guard{m_mutex}; if (nullptr != m_executor) { m_executor->cancel(); diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index 826136bd..b73e9dcf 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -17,7 +17,7 @@ namespace spider::worker { class ExecutorHandle { public: [[nodiscard]] auto get_task_id() -> std::optional; - auto executor_cancel() -> void; + auto cancel_executor() -> void; auto set(boost::uuids::uuid task_id, TaskExecutor* executor) -> void; auto clear() -> void; diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index f4b24b1a..8b956616 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -152,8 +152,7 @@ auto check_task_cancel( return; } - // Cancel the task. - executor_handle.executor_cancel(); + executor_handle.cancel_executor(); } auto heartbeat_loop( From b65d18751d30e88306e1b458d97643b119927168 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 23:37:16 -0400 Subject: [PATCH 61/64] Store task id inside TaskExecutor --- src/spider/worker/ExecutorHandle.cpp | 6 ++---- src/spider/worker/ExecutorHandle.hpp | 4 +--- src/spider/worker/TaskExecutor.cpp | 5 +++++ src/spider/worker/TaskExecutor.hpp | 10 ++++++++-- src/spider/worker/worker.cpp | 2 +- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp index 25e995a0..4d4594f0 100644 --- a/src/spider/worker/ExecutorHandle.cpp +++ b/src/spider/worker/ExecutorHandle.cpp @@ -11,7 +11,7 @@ namespace spider::worker { auto ExecutorHandle::get_task_id() -> std::optional { std::lock_guard const lock_guard{m_mutex}; if (nullptr != m_executor) { - return m_task_id; + return m_executor->get_task_id(); } return std::nullopt; } @@ -23,15 +23,13 @@ auto ExecutorHandle::cancel_executor() -> void { } } -auto ExecutorHandle::set(boost::uuids::uuid const task_id, TaskExecutor* executor) -> void { +auto ExecutorHandle::set(TaskExecutor* executor) -> void { std::lock_guard const lock_guard{m_mutex}; - m_task_id = task_id; m_executor = executor; } auto ExecutorHandle::clear() -> void { std::lock_guard const lock_guard{m_mutex}; - m_task_id = boost::uuids::uuid{}; m_executor = nullptr; } } // namespace spider::worker diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index b73e9dcf..70dddfe2 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -18,12 +18,10 @@ class ExecutorHandle { public: [[nodiscard]] auto get_task_id() -> std::optional; auto cancel_executor() -> void; - auto set(boost::uuids::uuid task_id, TaskExecutor* executor) -> void; + auto set(TaskExecutor* executor) -> void; auto clear() -> void; private: - boost::uuids::uuid m_task_id; - // Do not use std::shared_ptr to avoid calling destructor twice. TaskExecutor* m_executor = nullptr; diff --git a/src/spider/worker/TaskExecutor.cpp b/src/spider/worker/TaskExecutor.cpp index c05350af..0d422270 100644 --- a/src/spider/worker/TaskExecutor.cpp +++ b/src/spider/worker/TaskExecutor.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include // IWYU pragma: keep @@ -17,6 +18,10 @@ #include namespace spider::worker { +auto TaskExecutor::get_task_id() const -> boost::uuids::uuid { + return m_task_id; +} + auto TaskExecutor::get_pid() const -> pid_t { return m_process->get_pid(); } diff --git a/src/spider/worker/TaskExecutor.hpp b/src/spider/worker/TaskExecutor.hpp index 7c60627d..395cf1c1 100644 --- a/src/spider/worker/TaskExecutor.hpp +++ b/src/spider/worker/TaskExecutor.hpp @@ -50,7 +50,8 @@ class TaskExecutor { boost::process::v2::environment::value> const& environment, Args&&... args ) - : m_read_pipe(context), + : m_task_id(task_id), + m_read_pipe(context), m_write_pipe(context) { std::vector process_args{ "--func", @@ -103,7 +104,8 @@ class TaskExecutor { boost::process::v2::environment::value> const& environment, std::vector const& args_buffers ) - : m_read_pipe(context), + : m_task_id(task_id), + m_read_pipe(context), m_write_pipe(context) { std::vector process_args{ "--func", @@ -165,6 +167,8 @@ class TaskExecutor { void cancel(); + auto get_task_id() const -> boost::uuids::uuid; + template auto get_result() const -> std::optional { return core::response_get_result(m_result_buffer); @@ -177,6 +181,8 @@ class TaskExecutor { private: auto process_output_handler() -> boost::asio::awaitable; + boost::uuids::uuid m_task_id; + std::mutex m_state_mutex; std::condition_variable m_complete_cv; TaskExecutorState m_state = TaskExecutorState::Running; diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 8b956616..3b0377df 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -424,7 +424,7 @@ auto task_loop( arg_buffers }; - executor_handle.set(task_id, &executor); + executor_handle.set(&executor); pid_t const pid = executor.get_pid(); spider::core::ChildPid::set_pid(pid); From e4da187ece49f0d20c783b8dee2b885a35f62b18 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Wed, 25 Jun 2025 23:49:10 -0400 Subject: [PATCH 62/64] Make ExecutorHandle singleton --- src/spider/worker/ExecutorHandle.cpp | 3 +++ src/spider/worker/ExecutorHandle.hpp | 25 ++++++++++++++++++------- src/spider/worker/worker.cpp | 23 ++++++++--------------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/spider/worker/ExecutorHandle.cpp b/src/spider/worker/ExecutorHandle.cpp index 4d4594f0..62dbbe80 100644 --- a/src/spider/worker/ExecutorHandle.cpp +++ b/src/spider/worker/ExecutorHandle.cpp @@ -32,4 +32,7 @@ auto ExecutorHandle::clear() -> void { std::lock_guard const lock_guard{m_mutex}; m_executor = nullptr; } + +TaskExecutor* ExecutorHandle::m_executor = nullptr; +std::mutex ExecutorHandle::m_mutex; } // namespace spider::worker diff --git a/src/spider/worker/ExecutorHandle.hpp b/src/spider/worker/ExecutorHandle.hpp index 70dddfe2..279f124c 100644 --- a/src/spider/worker/ExecutorHandle.hpp +++ b/src/spider/worker/ExecutorHandle.hpp @@ -10,22 +10,33 @@ namespace spider::worker { /** - * This class acts as a handle for thread-safe access to the task executor and task id. + * This singleton class acts as a handle for thread-safe access to the task executor and task id. * It maintains a weak reference to the task executor to prevent multiple destructor calls and * ensures that access remains valid only while the executor itself is valid. */ class ExecutorHandle { public: - [[nodiscard]] auto get_task_id() -> std::optional; - auto cancel_executor() -> void; - auto set(TaskExecutor* executor) -> void; - auto clear() -> void; + [[nodiscard]] static auto get_task_id() -> std::optional; + static auto cancel_executor() -> void; + static auto set(TaskExecutor* executor) -> void; + static auto clear() -> void; + + // Delete default constructor + ExecutorHandle() = delete; + // Delete copy constructor and assignment operator + ExecutorHandle(ExecutorHandle const&) = delete; + auto operator=(ExecutorHandle const&) -> ExecutorHandle& = delete; + // Delete move constructor and assignment operator + ExecutorHandle(ExecutorHandle&&) = delete; + auto operator=(ExecutorHandle&&) -> ExecutorHandle& = delete; + // Default destructor + ~ExecutorHandle() = default; private: // Do not use std::shared_ptr to avoid calling destructor twice. - TaskExecutor* m_executor = nullptr; + static TaskExecutor* m_executor; - std::mutex m_mutex; + static std::mutex m_mutex; }; } // namespace spider::worker diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 3b0377df..7245e0d2 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -128,14 +128,13 @@ auto get_environment_variable() -> absl::flat_hash_map< * Checks if the task is cancelled. If the task state is set to cancelled, cancels the running task. * @param conn The storage connection to use. * @param metadata_store - * @param executor_handle Handle to the task id and executor. */ auto check_task_cancel( std::shared_ptr const& conn, - std::shared_ptr const& metadata_store, - spider::worker::ExecutorHandle& executor_handle + std::shared_ptr const& metadata_store ) -> void { - std::optional const optional_task_id = executor_handle.get_task_id(); + std::optional const optional_task_id + = spider::worker::ExecutorHandle::get_task_id(); if (!optional_task_id.has_value()) { return; } @@ -152,14 +151,13 @@ auto check_task_cancel( return; } - executor_handle.cancel_executor(); + spider::worker::ExecutorHandle::cancel_executor(); } auto heartbeat_loop( std::shared_ptr const& storage_factory, std::shared_ptr const& metadata_store, - spider::core::Driver const& driver, - spider::worker::ExecutorHandle& executor_handle + spider::core::Driver const& driver ) -> void { int fail_count = 0; while (!spider::core::StopFlag::is_stop_requested()) { @@ -180,7 +178,7 @@ auto heartbeat_loop( std::get>(conn_result) ); - check_task_cancel(conn, metadata_store, executor_handle); + check_task_cancel(conn, metadata_store); spdlog::debug("Updating heartbeat"); spider::core::StorageErr const err @@ -382,7 +380,6 @@ auto handle_executor_result( auto task_loop( std::shared_ptr const& storage_factory, std::shared_ptr const& metadata_store, - spider::worker::ExecutorHandle& executor_handle, spider::worker::WorkerClient& client, std::string const& storage_url, std::vector const& libs, @@ -424,7 +421,7 @@ auto task_loop( arg_buffers }; - executor_handle.set(&executor); + spider::worker::ExecutorHandle::set(&executor); pid_t const pid = executor.get_pid(); spider::core::ChildPid::set_pid(pid); @@ -437,7 +434,7 @@ auto task_loop( context.run(); executor.wait(); - executor_handle.clear(); + spider::worker::ExecutorHandle::clear(); spider::core::ChildPid::set_pid(0); if (executor.cancelled()) { @@ -552,15 +549,12 @@ auto main(int argc, char** argv) -> int { boost::process::v2::environment::value> const environment_variables = get_environment_variable(); - spider::worker::ExecutorHandle executor_handle; - // Start a thread that periodically updates the scheduler's heartbeat std::thread heartbeat_thread{ heartbeat_loop, std::cref(storage_factory), std::cref(metadata_store), std::ref(driver), - std::ref(executor_handle), }; // Start a thread that processes tasks @@ -568,7 +562,6 @@ auto main(int argc, char** argv) -> int { task_loop, std::cref(storage_factory), std::cref(metadata_store), - std::ref(executor_handle), std::ref(client), std::cref(storage_url), std::cref(libs), From 18ce4e94f0d1975f71629c5d137630c5d89b1fa8 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 26 Jun 2025 00:02:14 -0400 Subject: [PATCH 63/64] Improve docstring for cancel test job setup --- tests/storage/test-MetadataStorage.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/storage/test-MetadataStorage.cpp b/tests/storage/test-MetadataStorage.cpp index 5b6e8360..1da37e61 100644 --- a/tests/storage/test-MetadataStorage.cpp +++ b/tests/storage/test-MetadataStorage.cpp @@ -450,12 +450,21 @@ TEMPLATE_LIST_TEST_CASE("Task finish", "[storage]", spider::test::StorageFactory } /** - * Create a common job cancel test setup. Create a job with a task graph that consists of two - * parent tasks and one child task. Set the state of parent 1 to succeed. Parent 2 state remains - * ready and child state remains pending. + * Creates a test job with a task dependency graph for cancellation tests. + * + * Task graph structure: + * parent_1 (p1) ──┐ + * ├──> child_task + * parent_2 (p2) ──┘ + * + * Task states after setup: + * - parent_1: Succeeded (with output "1.1") + * - parent_2: Ready + * - child_task: Pending (waiting for parent_2 to complete) + * * @param storage * @param conn - * @return A tuple containing the job_id, parent_1_id, parent_2_id, and child_task_id. + * @return A tuple containing (job_id, parent_1_id, parent_2_id, and child_task_id). */ auto job_cancel_setup( std::unique_ptr& storage, From 8015388a22453dda8ca5e0417c9a83ab585c480d Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 26 Jun 2025 00:23:38 -0400 Subject: [PATCH 64/64] Fix clang tidy --- src/spider/worker/TaskExecutor.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/worker/TaskExecutor.hpp b/src/spider/worker/TaskExecutor.hpp index 395cf1c1..36a3d9e9 100644 --- a/src/spider/worker/TaskExecutor.hpp +++ b/src/spider/worker/TaskExecutor.hpp @@ -167,7 +167,7 @@ class TaskExecutor { void cancel(); - auto get_task_id() const -> boost::uuids::uuid; + [[nodiscard]] auto get_task_id() const -> boost::uuids::uuid; template auto get_result() const -> std::optional {