From ba2977a6ae23c563b21b3c6c25dc164a5ecc59f8 Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 14 Jan 2025 23:53:54 -0500 Subject: [PATCH 01/17] Improve command line argument handling --- src/spider/scheduler/scheduler.cpp | 92 +++++++++++++++++------------- src/spider/worker/worker.cpp | 88 ++++++++++++++-------------- 2 files changed, 97 insertions(+), 83 deletions(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index 8b778112..d676bdf4 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -38,33 +39,62 @@ constexpr int cCleanupInterval = 5; constexpr int cRetryCount = 5; namespace { -auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { + +char const* const cUsage + = "Usage: spider_scheduler --host --port --storage_url "; + +auto parse_args( + int const argc, + char** argv, + std::string& host, + unsigned short& port, + std::string& storage_url +) -> bool { boost::program_options::options_description desc; - desc.add_options()("help", "spider scheduler"); - desc.add_options()( + // clang-format off + desc.add_options() + ("help", "spider scheduler") + ( "host", - boost::program_options::value(), + boost::program_options::value(&host)->required(), "scheduler host address" - ); - desc.add_options()( + ) + ( "port", - boost::program_options::value(), + boost::program_options::value(&port)->required(), "port to listen on" - ); - desc.add_options()( + ) + ( "storage_url", - boost::program_options::value(), + boost::program_options::value(&storage_url)->required(), "storage server url" - ); - - boost::program_options::variables_map variables; - boost::program_options::store( - // NOLINTNEXTLINE(misc-include-cleaner) - boost::program_options::parse_command_line(argc, argv, desc), - variables - ); - boost::program_options::notify(variables); - return variables; + ); + // clang-format on + + try { + boost::program_options::variables_map variables; + boost::program_options::store( + // NOLINTNEXTLINE(misc-include-cleaner) + boost::program_options::parse_command_line(argc, argv, desc), + variables + ); + + if (!variables.contains("host") && !variables.contains("port") + && !variables.contains("storage_url")) + { + std::cout << cUsage << "\n"; + std::cout << desc << "\n"; + return false; + } + + boost::program_options::notify(variables); + return true; + } catch (boost::program_options::error& e) { + std::cerr << "spider_scheduler: " << e.what() << "\n"; + std::cerr << cUsage << "\n"; + std::cerr << "Try 'spider_scheduler --help' for more information.\n"; + return false; + } } auto heartbeat_loop( @@ -137,30 +167,10 @@ auto main(int argc, char** argv) -> int { spdlog::set_level(spdlog::level::trace); #endif - boost::program_options::variables_map const args = parse_args(argc, argv); - unsigned short port = 0; std::string scheduler_addr; std::string storage_url; - try { - if (!args.contains("port")) { - spdlog::error("port is required"); - return cCmdArgParseErr; - } - port = args["port"].as(); - if (!args.contains("host")) { - spdlog::error("host is required"); - return cCmdArgParseErr; - } - scheduler_addr = args["host"].as(); - if (!args.contains("storage_url")) { - spdlog::error("storage_url is required"); - return cCmdArgParseErr; - } - storage_url = args["storage_url"].as(); - } catch (boost::bad_any_cast& e) { - return cCmdArgParseErr; - } catch (boost::program_options::error& e) { + if (!parse_args(argc, argv, scheduler_addr, port, storage_url)) { return cCmdArgParseErr; } diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 64fa339e..7446b193 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -50,29 +51,54 @@ constexpr int cTaskErr = 5; constexpr int cRetryCount = 5; namespace { -auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { + +char const* const cUsage = "Usage: spider_worker --host --storage_url --libs "; + +auto parse_args(int const argc, char** argv, std::string& host, std::string& storage_url, std::vector& libs) -> bool { boost::program_options::options_description desc; - desc.add_options()("help", "spider scheduler"); - desc.add_options()( - "storage_url", - boost::program_options::value(), + // clang-format off + desc.add_options() + ("help", "spider scheduler") + ( + "host", + boost::program_options::value(&host)->required(), + "worker host address" + ) + ( + "storage_url", + boost::program_options::value(&storage_url)->required(), "storage server url" - ); - desc.add_options()( + ) + ( "libs", - boost::program_options::value>(), + boost::program_options::value>(&libs), "dynamic libraries that include the spider tasks" - ); - desc.add_options()("host", boost::program_options::value(), "worker host address"); - - boost::program_options::variables_map variables; - boost::program_options::store( - // NOLINTNEXTLINE(misc-include-cleaner) - boost::program_options::parse_command_line(argc, argv, desc), - variables - ); - boost::program_options::notify(variables); - return variables; + ); + // clang-format on + + try { + boost::program_options::variables_map variables; + boost::program_options::store( + // NOLINTNEXTLINE(misc-include-cleaner) + boost::program_options::parse_command_line(argc, argv, desc), + variables + ); + + if (!variables.contains("host") && !variables.contains("storage_url") && !variables.contains("libs")) + { + std::cout << cUsage << "\n"; + std::cout << desc << "\n"; + return false; + } + + boost::program_options::notify(variables); + return true; + } catch (boost::program_options::error& e) { + std::cerr << "spider_worker: " << e.what() << "\n"; + std::cerr << cUsage << "\n"; + std::cerr << "Try 'spider_worker --help' for more information.\n"; + return false; + } } auto get_environment_variable() -> absl::flat_hash_map< @@ -329,32 +355,10 @@ auto main(int argc, char** argv) -> int { spdlog::set_level(spdlog::level::trace); #endif - boost::program_options::variables_map const args = parse_args(argc, argv); - std::string storage_url; std::vector libs; std::string worker_addr; - try { - if (!args.contains("storage_url")) { - spdlog::error("Missing storage_url"); - return cCmdArgParseErr; - } - storage_url = args["storage_url"].as(); - if (!args.contains("host")) { - spdlog::error("Missing host"); - return cCmdArgParseErr; - } - worker_addr = args["host"].as(); - if (!args.contains("libs") || args["libs"].empty()) { - spdlog::error("Missing libs"); - return cCmdArgParseErr; - } - libs = args["libs"].as>(); - } catch (boost::bad_any_cast const& e) { - spdlog::error("Error: {}", e.what()); - return cCmdArgParseErr; - } catch (boost::program_options::error const& e) { - spdlog::error("Error: {}", e.what()); + if (!parse_args(argc, argv, worker_addr, storage_url, libs)) { return cCmdArgParseErr; } From 3c5e18b1ff5ddeafc1cab516304de6926dde4b65 Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 14 Jan 2025 23:57:28 -0500 Subject: [PATCH 02/17] Fix clang tidy --- src/spider/scheduler/scheduler.cpp | 1 - src/spider/worker/worker.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index d676bdf4..23471f55 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include #include diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 7446b193..b2b7565e 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -12,7 +12,6 @@ #include #include -#include #include #include #include From ff98aafbc35096e6af3847bc4860b3bb52cf0539 Mon Sep 17 00:00:00 2001 From: sitao Date: Wed, 15 Jan 2025 00:08:42 -0500 Subject: [PATCH 03/17] Fix clang format --- src/spider/worker/worker.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index b2b7565e..615b1731 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -3,9 +3,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -51,9 +51,16 @@ constexpr int cRetryCount = 5; namespace { -char const* const cUsage = "Usage: spider_worker --host --storage_url --libs "; +char const* const cUsage + = "Usage: spider_worker --host --storage_url --libs "; -auto parse_args(int const argc, char** argv, std::string& host, std::string& storage_url, std::vector& libs) -> bool { +auto parse_args( + int const argc, + char** argv, + std::string& host, + std::string& storage_url, + std::vector& libs +) -> bool { boost::program_options::options_description desc; // clang-format off desc.add_options() @@ -83,7 +90,8 @@ auto parse_args(int const argc, char** argv, std::string& host, std::string& sto variables ); - if (!variables.contains("host") && !variables.contains("storage_url") && !variables.contains("libs")) + if (!variables.contains("host") && !variables.contains("storage_url") + && !variables.contains("libs")) { std::cout << cUsage << "\n"; std::cout << desc << "\n"; From 3611eaaa487cce5eb3b706aeeb6b86d691f1a5c0 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Tue, 4 Feb 2025 15:14:39 -0500 Subject: [PATCH 04/17] Use string_view for static string literal Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/scheduler/scheduler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index 23471f55..491d9c25 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -39,7 +39,9 @@ constexpr int cRetryCount = 5; namespace { -char const* const cUsage +constexpr std::string_view cUsage{ + "Usage: spider_scheduler --host --port --storage_url " +}; = "Usage: spider_scheduler --host --port --storage_url "; auto parse_args( From a68618861a9bf15b9cf971a0b0a29149b6069f2d Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Tue, 4 Feb 2025 15:15:11 -0500 Subject: [PATCH 05/17] Improve help message printing Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index 491d9c25..fd46c819 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -54,7 +54,7 @@ auto parse_args( boost::program_options::options_description desc; // clang-format off desc.add_options() - ("help", "spider scheduler") + ("help", "Prints this help text.") ( "host", boost::program_options::value(&host)->required(), From e80ffa84fa6693772dd843155438e92dd7b4cde2 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Tue, 4 Feb 2025 15:15:55 -0500 Subject: [PATCH 06/17] Improve address argument help message Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index fd46c819..c364f717 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -58,7 +58,7 @@ auto parse_args( ( "host", boost::program_options::value(&host)->required(), - "scheduler host address" + "The host address to bind to" ) ( "port", From 8bddb5e7e915c2c18921a6d3dfbabfae2ddb61fc Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Tue, 4 Feb 2025 15:16:21 -0500 Subject: [PATCH 07/17] Improve port argument help message Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index c364f717..a07dac58 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -63,7 +63,7 @@ auto parse_args( ( "port", boost::program_options::value(&port)->required(), - "port to listen on" + "The port to listen on" ) ( "storage_url", From bc894a85f909e807d34f11e2919f1bd334f117b9 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Tue, 4 Feb 2025 15:16:48 -0500 Subject: [PATCH 08/17] Improve storage argument help message Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index a07dac58..acd1e859 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -68,7 +68,7 @@ auto parse_args( ( "storage_url", boost::program_options::value(&storage_url)->required(), - "storage server url" + "The storage server's URL" ); // clang-format on From 1d6ee5c185751b5463afd35bd15603c0f4796841 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Tue, 4 Feb 2025 15:24:31 -0500 Subject: [PATCH 09/17] Improve argument error message Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index acd1e859..55aed5b9 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -93,7 +93,7 @@ auto parse_args( } catch (boost::program_options::error& e) { std::cerr << "spider_scheduler: " << e.what() << "\n"; std::cerr << cUsage << "\n"; - std::cerr << "Try 'spider_scheduler --help' for more information.\n"; + std::cerr << "Try 'spider_scheduler --help' for detailed usage instructions.\n"; return false; } } From 52d0503d44ea9e1241a990179eb265c2fe756012 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Tue, 4 Feb 2025 15:25:00 -0500 Subject: [PATCH 10/17] Use `false ==` instead of `!` Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index 55aed5b9..d559f20b 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -171,7 +171,7 @@ auto main(int argc, char** argv) -> int { unsigned short port = 0; std::string scheduler_addr; std::string storage_url; - if (!parse_args(argc, argv, scheduler_addr, port, storage_url)) { + if (false == parse_args(argc, argv, scheduler_addr, port, storage_url)) { return cCmdArgParseErr; } From 6b759743d3a40d6e000fe1bef67c1503452540a9 Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 4 Feb 2025 17:09:38 -0500 Subject: [PATCH 11/17] Move all program options string into one header --- src/spider/CMakeLists.txt | 5 ++- src/spider/scheduler/scheduler.cpp | 31 +++++++--------- src/spider/utils/ProgramOptions.hpp | 57 +++++++++++++++++++++++++++++ src/spider/worker/TaskExecutor.hpp | 18 +++++---- src/spider/worker/task_executor.cpp | 38 +++++++++++-------- src/spider/worker/worker.cpp | 29 +++++++-------- 6 files changed, 121 insertions(+), 57 deletions(-) create mode 100644 src/spider/utils/ProgramOptions.hpp diff --git a/src/spider/CMakeLists.txt b/src/spider/CMakeLists.txt index 62d8e11a..313ac355 100644 --- a/src/spider/CMakeLists.txt +++ b/src/spider/CMakeLists.txt @@ -20,7 +20,6 @@ set(SPIDER_CORE_HEADERS io/MsgPack.hpp io/msgpack_message.hpp io/Serializer.hpp - utils/TimedCache.hpp storage/MetadataStorage.hpp storage/DataStorage.hpp storage/MysqlStorage.hpp @@ -53,6 +52,7 @@ set(SPIDER_WORKER_SOURCES worker/message_pipe.hpp worker/WorkerClient.hpp worker/WorkerClient.cpp + utils/ProgramOptions.hpp utils/StopToken.hpp CACHE INTERNAL "spider worker source files" @@ -64,6 +64,7 @@ set(SPIDER_TASK_EXECUTOR_SOURCES worker/message_pipe.cpp worker/message_pipe.hpp worker/task_executor.cpp + utils/ProgramOptions.hpp CACHE INTERNAL "spider task executor source files" ) @@ -108,7 +109,9 @@ set(SPIDER_SCHEDULER_SOURCES scheduler/SchedulerMessage.hpp scheduler/SchedulerServer.cpp scheduler/SchedulerServer.hpp + utils/ProgramOptions.hpp utils/StopToken.hpp + utils/TimedCache.hpp CACHE INTERNAL "spider scheduler source files" ) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index d559f20b..2ecd65fd 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -24,6 +24,7 @@ #include "../storage/DataStorage.hpp" #include "../storage/MetadataStorage.hpp" #include "../storage/MysqlStorage.hpp" +#include "../utils/ProgramOptions.hpp" #include "../utils/StopToken.hpp" #include "FifoPolicy.hpp" #include "SchedulerPolicy.hpp" @@ -39,11 +40,6 @@ constexpr int cRetryCount = 5; namespace { -constexpr std::string_view cUsage{ - "Usage: spider_scheduler --host --port --storage_url " -}; - = "Usage: spider_scheduler --host --port --storage_url "; - auto parse_args( int const argc, char** argv, @@ -54,21 +50,21 @@ auto parse_args( boost::program_options::options_description desc; // clang-format off desc.add_options() - ("help", "Prints this help text.") + (spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()) ( - "host", + spider::core::cHostOption.data(), boost::program_options::value(&host)->required(), - "The host address to bind to" + spider::core::cHostMessage.data() ) ( - "port", + spider::core::cPortOption.data(), boost::program_options::value(&port)->required(), - "The port to listen on" + spider::core::cPortMessage.data() ) ( - "storage_url", + spider::core::cStorageUrlOption.data(), boost::program_options::value(&storage_url)->required(), - "The storage server's URL" + spider::core::cStorageUrlMessage.data() ); // clang-format on @@ -80,10 +76,11 @@ auto parse_args( variables ); - if (!variables.contains("host") && !variables.contains("port") - && !variables.contains("storage_url")) + if (false == variables.contains(spider::core::cHostOption.data()) + && false == variables.contains(spider::core::cPortOption.data()) + && false == variables.contains(spider::core::cStorageUrlOption.data())) { - std::cout << cUsage << "\n"; + std::cout << spider::core::cSchedulerUsage << "\n"; std::cout << desc << "\n"; return false; } @@ -92,8 +89,8 @@ auto parse_args( return true; } catch (boost::program_options::error& e) { std::cerr << "spider_scheduler: " << e.what() << "\n"; - std::cerr << cUsage << "\n"; - std::cerr << "Try 'spider_scheduler --help' for detailed usage instructions.\n"; + std::cerr << spider::core::cSchedulerUsage << "\n"; + std::cerr << spider::core::cSchedulerHelpMessage; return false; } } diff --git a/src/spider/utils/ProgramOptions.hpp b/src/spider/utils/ProgramOptions.hpp new file mode 100644 index 00000000..9765d991 --- /dev/null +++ b/src/spider/utils/ProgramOptions.hpp @@ -0,0 +1,57 @@ +#ifndef SPIDER_UTILS_PROGRAMOPTIONS_HPP +#define SPIDER_UTILS_PROGRAMOPTIONS_HPP + +#include + +namespace spider::core { + +constexpr std::string_view cSchedulerUsage + = {"Usage: spider_scheduler --host --port --storage_url "}; + +constexpr std::string_view cSchedulerHelpMessage + = {"Try 'spider_scheduler --help' for detailed usage instructions.\n"}; + +constexpr std::string_view cWorkerUsage + = {"Usage: spider_worker --host --storage_url --libs "}; + +constexpr std::string_view cWorkerHelpMessage + = {"Try 'spider_worker --help' for detailed usage instructions.\n"}; + +constexpr std::string_view cTaskExecutorUsage + = {"Usage: spider_task_executor --func --task_id --storage_url " + " --libs "}; + +constexpr std::string_view cTaskExecutorHelpMessage + = {"Try 'spider_task_executor --help' for detailed usage instructions.\n"}; + +constexpr std::string_view cHelpOption = {"help"}; + +constexpr std::string_view cHelpMessage = {"Print this help text."}; + +constexpr std::string_view cHostOption = {"host"}; + +constexpr std::string_view cHostMessage = {"The host address to bind to"}; + +constexpr std::string_view cPortOption = {"port"}; + +constexpr std::string_view cPortMessage = {"The port to listen on"}; + +constexpr std::string_view cStorageUrlOption = {"storage_url"}; + +constexpr std::string_view cStorageUrlMessage = {"The storage server's URL"}; + +constexpr std::string_view cLibsOption = {"libs"}; + +constexpr std::string_view cLibsMessage = {"The tasks libraries to load"}; + +constexpr std::string_view cFunctionOption = {"func"}; + +constexpr std::string_view cFunctionMessage = {"The function to execute"}; + +constexpr std::string_view cTaskIdOption = {"task_id"}; + +constexpr std::string_view cTaskIdMessage = {"The id of the task to execute"}; + +} // namespace spider::core + +#endif diff --git a/src/spider/worker/TaskExecutor.hpp b/src/spider/worker/TaskExecutor.hpp index f9c910e3..0b49892d 100644 --- a/src/spider/worker/TaskExecutor.hpp +++ b/src/spider/worker/TaskExecutor.hpp @@ -18,9 +18,11 @@ #include #include #include +#include #include "../io/BoostAsio.hpp" // IWYU pragma: keep #include "../io/MsgPack.hpp" // IWYU pragma: keep +#include "../utils/ProgramOptions.hpp" #include "FunctionManager.hpp" #include "message_pipe.hpp" @@ -51,13 +53,13 @@ class TaskExecutor { : m_read_pipe(context), m_write_pipe(context) { std::vector process_args{ - "--func", + fmt::format("--{}", core::cFunctionOption), func_name, - "--task_id", + fmt::format("--{}", core::cTaskIdOption), to_string(task_id), - "--storage_url", + fmt::format("--{}", core::cStorageUrlOption), storage_url, - "--libs" + fmt::format("--{}", core::cLibsOption), }; process_args.insert(process_args.end(), libs.begin(), libs.end()); boost::filesystem::path const exe = boost::process::v2::environment::find_executable( @@ -99,13 +101,13 @@ class TaskExecutor { : m_read_pipe(context), m_write_pipe(context) { std::vector process_args{ - "--func", + fmt::format("--{}", core::cFunctionOption), func_name, - "--task_id", + fmt::format("--{}", core::cTaskIdOption), to_string(task_id), - "--storage_url", + fmt::format("--{}", core::cStorageUrlOption), storage_url, - "--libs" + fmt::format("--{}", core::cLibsOption), }; process_args.insert(process_args.end(), libs.begin(), libs.end()); boost::filesystem::path const exe = boost::process::v2::environment::find_executable( diff --git a/src/spider/worker/task_executor.cpp b/src/spider/worker/task_executor.cpp index d536f85e..ec5dd80c 100644 --- a/src/spider/worker/task_executor.cpp +++ b/src/spider/worker/task_executor.cpp @@ -26,6 +26,7 @@ #include "../storage/DataStorage.hpp" #include "../storage/MetadataStorage.hpp" #include "../storage/MysqlStorage.hpp" +#include "../utils/ProgramOptions.hpp" #include "DllLoader.hpp" #include "FunctionManager.hpp" #include "message_pipe.hpp" @@ -35,22 +36,26 @@ namespace { auto parse_arg(int const argc, char** const& argv) -> boost::program_options::variables_map { boost::program_options::options_description desc; - desc.add_options()("help", "spider task executor"); - desc.add_options()("func", boost::program_options::value(), "function to run"); + desc.add_options()(spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()); desc.add_options()( - "task_id", + spider::core::cFunctionOption.data(), boost::program_options::value(), - "task id of the function" + spider::core::cFunctionMessage.data() ); desc.add_options()( - "libs", + spider::core::cTaskIdOption.data(), + boost::program_options::value(), + spider::core::cTaskIdMessage.data() + ); + desc.add_options()( + spider::core::cLibsOption.data(), boost::program_options::value>(), - "dynamic libraries that include the spider tasks" + spider::core::cLibsMessage.data() ); desc.add_options()( - "storage_url", + spider::core::cStorageUrlOption.data(), boost::program_options::value(), - "storage server url" + spider::core::cStorageUrlMessage.data() ); boost::program_options::variables_map variables; @@ -87,22 +92,23 @@ auto main(int const argc, char** argv) -> int { std::string storage_url; std::string task_id_string; try { - if (!args.contains("func")) { + if (!args.contains(spider::core::cFunctionOption.data())) { return cCmdArgParseErr; } - func_name = args["func"].as(); - if (!args.contains("task_id")) { + func_name = args[spider::core::cFunctionOption.data()].as(); + if (!args.contains(spider::core::cTaskIdOption.data())) { return cCmdArgParseErr; } - task_id_string = args["task_id"].as(); - if (!args.contains("storage_url")) { + task_id_string = args[spider::core::cTaskIdOption.data()].as(); + if (!args.contains(spider::core::cStorageUrlOption.data())) { return cCmdArgParseErr; } - storage_url = args["storage_url"].as(); - if (!args.contains("libs")) { + storage_url = args[spider::core::cStorageUrlOption.data()].as(); + if (!args.contains(spider::core::cLibsOption.data())) { return cCmdArgParseErr; } - std::vector const libs = args["libs"].as>(); + std::vector const libs + = args[spider::core::cLibsOption.data()].as>(); spider::worker::DllLoader& dll_loader = spider::worker::DllLoader::get_instance(); for (std::string const& lib : libs) { if (false == dll_loader.load_dll(lib)) { diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 615b1731..23c60840 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -37,6 +37,7 @@ #include "../storage/DataStorage.hpp" #include "../storage/MetadataStorage.hpp" #include "../storage/MysqlStorage.hpp" +#include "../utils/ProgramOptions.hpp" #include "../utils/StopToken.hpp" #include "TaskExecutor.hpp" #include "WorkerClient.hpp" @@ -51,9 +52,6 @@ constexpr int cRetryCount = 5; namespace { -char const* const cUsage - = "Usage: spider_worker --host --storage_url --libs "; - auto parse_args( int const argc, char** argv, @@ -64,21 +62,21 @@ auto parse_args( boost::program_options::options_description desc; // clang-format off desc.add_options() - ("help", "spider scheduler") + (spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()) ( - "host", + spider::core::cHostOption.data(), boost::program_options::value(&host)->required(), - "worker host address" + spider::core::cHostMessage.data() ) ( - "storage_url", + spider::core::cStorageUrlOption.data(), boost::program_options::value(&storage_url)->required(), - "storage server url" + spider::core::cStorageUrlMessage.data() ) ( - "libs", + spider::core::cLibsOption.data(), boost::program_options::value>(&libs), - "dynamic libraries that include the spider tasks" + spider::core::cLibsMessage.data() ); // clang-format on @@ -90,10 +88,11 @@ auto parse_args( variables ); - if (!variables.contains("host") && !variables.contains("storage_url") - && !variables.contains("libs")) + if (!variables.contains(spider::core::cHostOption.data()) + && !variables.contains(spider::core::cStorageUrlOption.data()) + && !variables.contains(spider::core::cLibsOption.data())) { - std::cout << cUsage << "\n"; + std::cout << spider::core::cWorkerUsage << "\n"; std::cout << desc << "\n"; return false; } @@ -102,8 +101,8 @@ auto parse_args( return true; } catch (boost::program_options::error& e) { std::cerr << "spider_worker: " << e.what() << "\n"; - std::cerr << cUsage << "\n"; - std::cerr << "Try 'spider_worker --help' for more information.\n"; + std::cerr << spider::core::cWorkerUsage << "\n"; + std::cerr << spider::core::cWorkerHelpMessage; return false; } } From 9d3ec380f1b34ca1c8dbc3c76dfe16b19f76fa71 Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 4 Feb 2025 17:19:21 -0500 Subject: [PATCH 12/17] Fix missing source file in cmake --- src/spider/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spider/CMakeLists.txt b/src/spider/CMakeLists.txt index 416a7e0c..e9aded9e 100644 --- a/src/spider/CMakeLists.txt +++ b/src/spider/CMakeLists.txt @@ -111,6 +111,7 @@ set(SPIDER_SCHEDULER_SOURCES scheduler/SchedulerMessage.hpp scheduler/SchedulerServer.cpp scheduler/SchedulerServer.hpp + scheduler/SchedulerTaskCache.cpp scheduler/SchedulerTaskCache.hpp utils/ProgramOptions.hpp utils/StopToken.hpp From a365d7b3dcbaf5709eb1d0aa90cb3c39c2454342 Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 4 Feb 2025 17:45:36 -0500 Subject: [PATCH 13/17] Use hyphen instead of underscore for cmd line arguments --- docs/src/dev-docs/testing.md | 2 +- docs/src/user-docs/guides-quick-start.md | 8 ++++---- src/spider/utils/ProgramOptions.hpp | 10 +++++----- tests/client/client-test.cpp | 13 +++++++------ tests/integration/test_client.py | 6 +++--- tests/integration/test_scheduler_worker.py | 4 ++-- 6 files changed, 22 insertions(+), 21 deletions(-) diff --git a/docs/src/dev-docs/testing.md b/docs/src/dev-docs/testing.md index 873ef7b6..fc45df70 100644 --- a/docs/src/dev-docs/testing.md +++ b/docs/src/dev-docs/testing.md @@ -23,7 +23,7 @@ require this storage backend. 4. Set the `cStorageUrl` in `tests/storage/StorageTestHelper.hpp` to `jdbc:mariadb://localhost:3306/?user=&password=`. -5. Set the `storage_url` in `tests/integration/client.py` to +5. Set the `storage-url` in `tests/integration/client.py` to `jdbc:mariadb://localhost:3306/?user=&password=`. ## Running unit tests diff --git a/docs/src/user-docs/guides-quick-start.md b/docs/src/user-docs/guides-quick-start.md index 68486611..33e92784 100644 --- a/docs/src/user-docs/guides-quick-start.md +++ b/docs/src/user-docs/guides-quick-start.md @@ -165,7 +165,7 @@ To start the scheduler, run: ```shell build/spider/src/spider/spider_scheduler \ - --storage_url \ + --storage-url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ --host "127.0.0.1" \ --port 6000 @@ -174,7 +174,7 @@ build/spider/src/spider/spider_scheduler \ NOTE: * If you used a different set of arguments to set up the storage backend, ensure you update the - `storage_url` argument in the command. + `storage-url` argument in the command. * In production, change the host to the real IP address of the machine running the scheduler. * If the scheduler fails to bind to port `6000`, change the port in the command and try again. @@ -190,7 +190,7 @@ To start a worker, run: ```shell build/spider/src/spider/spider_worker \ - --storage_url \ + --storage-url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ --host "127.0.0.1" \ --libs "build/libtasks.so" @@ -199,7 +199,7 @@ build/spider/src/spider/spider_worker \ NOTE: * If you used a different set of arguments to set up the storage backend, ensure you update the - `storage_url` argument in the command. + `storage-url` argument in the command. * In production, change the host to the real IP address of the machine running the worker. * You can specify multiple task libraries to load. The task libraries must be built with linkage to the Spider client library. diff --git a/src/spider/utils/ProgramOptions.hpp b/src/spider/utils/ProgramOptions.hpp index 9765d991..64030b2d 100644 --- a/src/spider/utils/ProgramOptions.hpp +++ b/src/spider/utils/ProgramOptions.hpp @@ -6,19 +6,19 @@ namespace spider::core { constexpr std::string_view cSchedulerUsage - = {"Usage: spider_scheduler --host --port --storage_url "}; + = {"Usage: spider_scheduler --host --port --storage-url "}; constexpr std::string_view cSchedulerHelpMessage = {"Try 'spider_scheduler --help' for detailed usage instructions.\n"}; constexpr std::string_view cWorkerUsage - = {"Usage: spider_worker --host --storage_url --libs "}; + = {"Usage: spider_worker --host --storage-url --libs "}; constexpr std::string_view cWorkerHelpMessage = {"Try 'spider_worker --help' for detailed usage instructions.\n"}; constexpr std::string_view cTaskExecutorUsage - = {"Usage: spider_task_executor --func --task_id --storage_url " + = {"Usage: spider_task_executor --func --task-id --storage-url " " --libs "}; constexpr std::string_view cTaskExecutorHelpMessage @@ -36,7 +36,7 @@ constexpr std::string_view cPortOption = {"port"}; constexpr std::string_view cPortMessage = {"The port to listen on"}; -constexpr std::string_view cStorageUrlOption = {"storage_url"}; +constexpr std::string_view cStorageUrlOption = {"storage-url"}; constexpr std::string_view cStorageUrlMessage = {"The storage server's URL"}; @@ -48,7 +48,7 @@ constexpr std::string_view cFunctionOption = {"func"}; constexpr std::string_view cFunctionMessage = {"The function to execute"}; -constexpr std::string_view cTaskIdOption = {"task_id"}; +constexpr std::string_view cTaskIdOption = {"task-id"}; constexpr std::string_view cTaskIdMessage = {"The id of the task to execute"}; diff --git a/tests/client/client-test.cpp b/tests/client/client-test.cpp index 77fbcb3c..ab3f776b 100644 --- a/tests/client/client-test.cpp +++ b/tests/client/client-test.cpp @@ -14,16 +14,17 @@ #include "../../src/spider/client/Driver.hpp" #include "../../src/spider/client/Job.hpp" #include "../../src/spider/client/TaskGraph.hpp" +#include "../../src/spider/utils/ProgramOptions.hpp" #include "../worker/worker-test.hpp" namespace { auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { boost::program_options::options_description desc; - desc.add_options()("help", "spider client test"); + desc.add_options()(spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()); desc.add_options()( - "storage_url", + spider::core::cStorageUrlOption.data(), boost::program_options::value(), - "storage server url" + spider::core::cStorageUrlMessage.data() ); boost::program_options::variables_map variables; @@ -53,11 +54,11 @@ auto main(int argc, char** argv) -> int { std::string storage_url; try { - if (!args.contains("storage_url")) { - spdlog::error("storage_url is required"); + if (!args.contains(spider::core::cStorageUrlOption.data())) { + spdlog::error("storage-url is required"); return cCmdArgParseErr; } - storage_url = args["storage_url"].as(); + storage_url = args[spider::core::cStorageUrlOption.data()].as(); } catch (boost::bad_any_cast& e) { return cCmdArgParseErr; } catch (boost::program_options::error& e) { diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index b6ebde74..17ae64eb 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -23,7 +23,7 @@ def start_scheduler_workers( "127.0.0.1", "--port", str(scheduler_port), - "--storage_url", + "--storage-url", storage_url, ] scheduler_process = subprocess.Popen(scheduler_cmds) @@ -31,7 +31,7 @@ def start_scheduler_workers( str(dir_path / "spider_worker"), "--host", "127.0.0.1", - "--storage_url", + "--storage-url", storage_url, "--libs", "tests/libworker_test.so", @@ -63,7 +63,7 @@ def test_client(self, scheduler_worker): dir_path = dir_path / ".." client_cmds = [ str(dir_path / "client_test"), - "--storage_url", + "--storage-url", storage_url, ] p = subprocess.run(client_cmds, timeout=20) diff --git a/tests/integration/test_scheduler_worker.py b/tests/integration/test_scheduler_worker.py index 7f41f2c7..21ce2f73 100644 --- a/tests/integration/test_scheduler_worker.py +++ b/tests/integration/test_scheduler_worker.py @@ -38,7 +38,7 @@ def start_scheduler_worker( "127.0.0.1", "--port", str(scheduler_port), - "--storage_url", + "--storage-url", storage_url, ] scheduler_process = subprocess.Popen(scheduler_cmds) @@ -46,7 +46,7 @@ def start_scheduler_worker( str(dir_path / "spider_worker"), "--host", "127.0.0.1", - "--storage_url", + "--storage-url", storage_url, "--libs", "tests/libworker_test.so", From df31fe2389cace11e5ef520bba9c81edda80a038 Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 4 Feb 2025 17:56:30 -0500 Subject: [PATCH 14/17] Add check for program argument empty value --- src/spider/scheduler/scheduler.cpp | 11 +++++++++++ src/spider/utils/ProgramOptions.hpp | 7 +++++++ src/spider/worker/worker.cpp | 16 ++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index f4aa8653..9f91ec6a 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -86,6 +86,17 @@ auto parse_args( } boost::program_options::notify(variables); + + if (host.empty()) { + std::cerr << spider::core::cHostEmptyMessage << "\n"; + return false; + } + + if (storage_url.empty()) { + std::cerr << spider::core::cStorageUrlEmptyMessage << "\n"; + return false; + } + return true; } catch (boost::program_options::error& e) { std::cerr << "spider_scheduler: " << e.what() << "\n"; diff --git a/src/spider/utils/ProgramOptions.hpp b/src/spider/utils/ProgramOptions.hpp index 64030b2d..f6fd644d 100644 --- a/src/spider/utils/ProgramOptions.hpp +++ b/src/spider/utils/ProgramOptions.hpp @@ -32,6 +32,8 @@ constexpr std::string_view cHostOption = {"host"}; constexpr std::string_view cHostMessage = {"The host address to bind to"}; +constexpr std::string_view cHostEmptyMessage = {"The host address should not be empty"}; + constexpr std::string_view cPortOption = {"port"}; constexpr std::string_view cPortMessage = {"The port to listen on"}; @@ -40,10 +42,15 @@ constexpr std::string_view cStorageUrlOption = {"storage-url"}; constexpr std::string_view cStorageUrlMessage = {"The storage server's URL"}; +constexpr std::string_view cStorageUrlEmptyMessage + = {"The storage server's URL should not be empty"}; + constexpr std::string_view cLibsOption = {"libs"}; constexpr std::string_view cLibsMessage = {"The tasks libraries to load"}; +constexpr std::string_view cLibsEmptyMessage = {"The tasks libraries should not be empty"}; + constexpr std::string_view cFunctionOption = {"func"}; constexpr std::string_view cFunctionMessage = {"The function to execute"}; diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index b7002a07..370df561 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -99,6 +99,22 @@ auto parse_args( } boost::program_options::notify(variables); + + if (host.empty()) { + std::cerr << spider::core::cHostEmptyMessage << "\n"; + return false; + } + + if (storage_url.empty()) { + std::cerr << spider::core::cStorageUrlEmptyMessage << "\n"; + return false; + } + + if (libs.empty()) { + std::cerr << spider::core::cLibsEmptyMessage << "\n"; + return false; + } + return true; } catch (boost::program_options::error& e) { std::cerr << "spider_worker: " << e.what() << "\n"; From f113c2980a65e1616004720ac2129f557bacb6c0 Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 4 Feb 2025 19:53:25 -0500 Subject: [PATCH 15/17] Fix clang tidy --- src/spider/scheduler/scheduler.cpp | 6 +++--- src/spider/worker/TaskExecutor.hpp | 2 +- src/spider/worker/task_executor.cpp | 16 ++++++++-------- src/spider/worker/worker.cpp | 6 +++--- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index 9f91ec6a..9ea44248 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -76,9 +76,9 @@ auto parse_args( variables ); - if (false == variables.contains(spider::core::cHostOption.data()) - && false == variables.contains(spider::core::cPortOption.data()) - && false == variables.contains(spider::core::cStorageUrlOption.data())) + if (false == variables.contains(std::string(spider::core::cHostOption)) + && false == variables.contains(std::string(spider::core::cPortOption)) + && false == variables.contains(std::string(spider::core::cStorageUrlOption))) { std::cout << spider::core::cSchedulerUsage << "\n"; std::cout << desc << "\n"; diff --git a/src/spider/worker/TaskExecutor.hpp b/src/spider/worker/TaskExecutor.hpp index 0b49892d..1789cb82 100644 --- a/src/spider/worker/TaskExecutor.hpp +++ b/src/spider/worker/TaskExecutor.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "../io/BoostAsio.hpp" // IWYU pragma: keep #include "../io/MsgPack.hpp" // IWYU pragma: keep diff --git a/src/spider/worker/task_executor.cpp b/src/spider/worker/task_executor.cpp index b2f9be72..6770a268 100644 --- a/src/spider/worker/task_executor.cpp +++ b/src/spider/worker/task_executor.cpp @@ -91,23 +91,23 @@ auto main(int const argc, char** argv) -> int { std::string storage_url; std::string task_id_string; try { - if (!args.contains(spider::core::cFunctionOption.data())) { + if (!args.contains(std::string(spider::core::cFunctionOption))) { return cCmdArgParseErr; } - func_name = args[spider::core::cFunctionOption.data()].as(); - if (!args.contains(spider::core::cTaskIdOption.data())) { + func_name = args[std::string(spider::core::cFunctionOption)].as(); + if (!args.contains(std::string(spider::core::cTaskIdOption))) { return cCmdArgParseErr; } - task_id_string = args[spider::core::cTaskIdOption.data()].as(); - if (!args.contains(spider::core::cStorageUrlOption.data())) { + task_id_string = args[std::string(spider::core::cTaskIdOption)].as(); + if (!args.contains(std::string(spider::core::cStorageUrlOption))) { return cCmdArgParseErr; } - storage_url = args[spider::core::cStorageUrlOption.data()].as(); - if (!args.contains(spider::core::cLibsOption.data())) { + storage_url = args[std::string(spider::core::cStorageUrlOption)].as(); + if (!args.contains(std::string(spider::core::cLibsOption))) { return cCmdArgParseErr; } std::vector const libs - = args[spider::core::cLibsOption.data()].as>(); + = args[std::string(spider::core::cLibsOption)].as>(); spider::worker::DllLoader& dll_loader = spider::worker::DllLoader::get_instance(); for (std::string const& lib : libs) { if (false == dll_loader.load_dll(lib)) { diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index 370df561..4f17776d 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -89,9 +89,9 @@ auto parse_args( variables ); - if (!variables.contains(spider::core::cHostOption.data()) - && !variables.contains(spider::core::cStorageUrlOption.data()) - && !variables.contains(spider::core::cLibsOption.data())) + if (!variables.contains(std::string(spider::core::cHostOption)) + && !variables.contains(std::string(spider::core::cStorageUrlOption)) + && !variables.contains(std::string(spider::core::cLibsOption))) { std::cout << spider::core::cWorkerUsage << "\n"; std::cout << desc << "\n"; From fd2e90d1f95f001f46406e2283e6b6ee451662de Mon Sep 17 00:00:00 2001 From: sitao Date: Tue, 4 Feb 2025 21:18:52 -0500 Subject: [PATCH 16/17] Fix clang tidy --- tests/client/client-test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/client/client-test.cpp b/tests/client/client-test.cpp index ab3f776b..4f0f79f1 100644 --- a/tests/client/client-test.cpp +++ b/tests/client/client-test.cpp @@ -54,11 +54,11 @@ auto main(int argc, char** argv) -> int { std::string storage_url; try { - if (!args.contains(spider::core::cStorageUrlOption.data())) { + if (!args.contains(std::string(spider::core::cStorageUrlOption))) { spdlog::error("storage-url is required"); return cCmdArgParseErr; } - storage_url = args[spider::core::cStorageUrlOption.data()].as(); + storage_url = args[std::string(spider::core::cStorageUrlOption)].as(); } catch (boost::bad_any_cast& e) { return cCmdArgParseErr; } catch (boost::program_options::error& e) { From 2788febc6377703a503549b36e34c2e3abacad55 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 2 Jun 2025 11:56:48 -0400 Subject: [PATCH 17/17] Update storage-url command for signal integration test --- tests/integration/test_signal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_signal.py b/tests/integration/test_signal.py index a32f1c3d..7766616e 100644 --- a/tests/integration/test_signal.py +++ b/tests/integration/test_signal.py @@ -33,7 +33,7 @@ def start_scheduler_worker(storage_url: str, scheduler_port: int, lib: str): "127.0.0.1", "--port", str(scheduler_port), - "--storage_url", + "--storage-url", storage_url, ] scheduler_process = subprocess.Popen(scheduler_cmds, **popen_opts) @@ -41,7 +41,7 @@ def start_scheduler_worker(storage_url: str, scheduler_port: int, lib: str): str(bin_dir / "spider_worker"), "--host", "127.0.0.1", - "--storage_url", + "--storage-url", storage_url, "--libs", lib,