diff --git a/docs/src/dev-docs/testing.md b/docs/src/dev-docs/testing.md index 873ef7b6..fc45df70 100644 --- a/docs/src/dev-docs/testing.md +++ b/docs/src/dev-docs/testing.md @@ -23,7 +23,7 @@ require this storage backend. 4. Set the `cStorageUrl` in `tests/storage/StorageTestHelper.hpp` to `jdbc:mariadb://localhost:3306/?user=&password=`. -5. Set the `storage_url` in `tests/integration/client.py` to +5. Set the `storage-url` in `tests/integration/client.py` to `jdbc:mariadb://localhost:3306/?user=&password=`. ## Running unit tests diff --git a/docs/src/user-docs/guides-quick-start.md b/docs/src/user-docs/guides-quick-start.md index ca4c5411..4b527c59 100644 --- a/docs/src/user-docs/guides-quick-start.md +++ b/docs/src/user-docs/guides-quick-start.md @@ -180,7 +180,7 @@ To start the scheduler, run: ```shell build/spider/spider/src/spider/spider_scheduler \ - --storage_url \ + --storage-url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ --host "127.0.0.1" \ --port 6000 @@ -189,7 +189,7 @@ build/spider/spider/src/spider/spider_scheduler \ NOTE: * If you used a different set of arguments to set up the storage backend, ensure you update the - `storage_url` argument in the command. + `storage-url` argument in the command. * In production, change the host to the real IP address of the machine running the scheduler. * If the scheduler fails to bind to port `6000`, change the port in the command and try again. @@ -205,7 +205,7 @@ To start a worker, run: ```shell build/spider/spider/src/spider/spider_worker \ - --storage_url \ + --storage-url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ --host "127.0.0.1" \ --libs "build/spider/libtasks.so" @@ -214,7 +214,7 @@ build/spider/spider/src/spider/spider_worker \ NOTE: * If you used a different set of arguments to set up the storage backend, ensure you update the - `storage_url` argument in the command. + `storage-url` argument in the command. * In production, change the host to the real IP address of the machine running the worker. * You can specify multiple task libraries to load. The task libraries must be built with linkage to the Spider client library. diff --git a/src/spider/CMakeLists.txt b/src/spider/CMakeLists.txt index da1e3f23..a80dff48 100644 --- a/src/spider/CMakeLists.txt +++ b/src/spider/CMakeLists.txt @@ -77,8 +77,9 @@ set(SPIDER_WORKER_SOURCES worker/message_pipe.hpp worker/WorkerClient.hpp worker/WorkerClient.cpp - utils/StopFlag.hpp + utils/ProgramOptions.hpp utils/StopFlag.cpp + utils/StopFlag.hpp CACHE INTERNAL "spider worker source files" ) @@ -89,6 +90,7 @@ set(SPIDER_TASK_EXECUTOR_SOURCES worker/message_pipe.cpp worker/message_pipe.hpp worker/task_executor.cpp + utils/ProgramOptions.hpp CACHE INTERNAL "spider task executor source files" ) @@ -137,8 +139,9 @@ set(SPIDER_SCHEDULER_SOURCES scheduler/SchedulerMessage.hpp scheduler/SchedulerServer.cpp scheduler/SchedulerServer.hpp - utils/StopFlag.hpp + utils/ProgramOptions.hpp utils/StopFlag.cpp + utils/StopFlag.hpp CACHE INTERNAL "spider scheduler source files" ) diff --git a/src/spider/scheduler/scheduler.cpp b/src/spider/scheduler/scheduler.cpp index 8629f3b2..674ae058 100644 --- a/src/spider/scheduler/scheduler.cpp +++ b/src/spider/scheduler/scheduler.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -9,7 +10,6 @@ #include #include -#include #include #include #include @@ -31,6 +31,7 @@ #include #include #include +#include #include constexpr int cCmdArgParseErr = 1; @@ -53,33 +54,70 @@ auto stop_scheduler_handler(int signal) -> void { } } -auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { +auto parse_args( + int const argc, + char** argv, + std::string& host, + unsigned short& port, + std::string& storage_url +) -> bool { boost::program_options::options_description desc; - desc.add_options()("help", "spider scheduler"); - desc.add_options()( - "host", - boost::program_options::value(), - "scheduler host address" - ); - desc.add_options()( - "port", - boost::program_options::value(), - "port to listen on" - ); - desc.add_options()( - "storage_url", - boost::program_options::value(), - "storage server url" - ); - - boost::program_options::variables_map variables; - boost::program_options::store( - // NOLINTNEXTLINE(misc-include-cleaner) - boost::program_options::parse_command_line(argc, argv, desc), - variables - ); - boost::program_options::notify(variables); - return variables; + // clang-format off + desc.add_options() + (spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()) + ( + spider::core::cHostOption.data(), + boost::program_options::value(&host)->required(), + spider::core::cHostMessage.data() + ) + ( + spider::core::cPortOption.data(), + boost::program_options::value(&port)->required(), + spider::core::cPortMessage.data() + ) + ( + spider::core::cStorageUrlOption.data(), + boost::program_options::value(&storage_url)->required(), + spider::core::cStorageUrlMessage.data() + ); + // clang-format on + + try { + boost::program_options::variables_map variables; + boost::program_options::store( + // NOLINTNEXTLINE(misc-include-cleaner) + boost::program_options::parse_command_line(argc, argv, desc), + variables + ); + + if (false == variables.contains(std::string(spider::core::cHostOption)) + && false == variables.contains(std::string(spider::core::cPortOption)) + && false == variables.contains(std::string(spider::core::cStorageUrlOption))) + { + std::cout << spider::core::cSchedulerUsage << "\n"; + std::cout << desc << "\n"; + return false; + } + + boost::program_options::notify(variables); + + if (host.empty()) { + std::cerr << spider::core::cHostEmptyMessage << "\n"; + return false; + } + + if (storage_url.empty()) { + std::cerr << spider::core::cStorageUrlEmptyMessage << "\n"; + return false; + } + + return true; + } catch (boost::program_options::error& e) { + std::cerr << "spider_scheduler: " << e.what() << "\n"; + std::cerr << spider::core::cSchedulerUsage << "\n"; + std::cerr << spider::core::cSchedulerHelpMessage; + return false; + } } auto heartbeat_loop( @@ -157,30 +195,10 @@ auto main(int argc, char** argv) -> int { spdlog::set_level(spdlog::level::trace); #endif - boost::program_options::variables_map const args = parse_args(argc, argv); - unsigned short port = 0; std::string scheduler_addr; std::string storage_url; - try { - if (!args.contains("port")) { - spdlog::error("port is required"); - return cCmdArgParseErr; - } - port = args["port"].as(); - if (!args.contains("host")) { - spdlog::error("host is required"); - return cCmdArgParseErr; - } - scheduler_addr = args["host"].as(); - if (!args.contains("storage_url")) { - spdlog::error("storage_url is required"); - return cCmdArgParseErr; - } - storage_url = args["storage_url"].as(); - } catch (boost::bad_any_cast& e) { - return cCmdArgParseErr; - } catch (boost::program_options::error& e) { + if (false == parse_args(argc, argv, scheduler_addr, port, storage_url)) { return cCmdArgParseErr; } diff --git a/src/spider/utils/ProgramOptions.hpp b/src/spider/utils/ProgramOptions.hpp new file mode 100644 index 00000000..b2dfaf1c --- /dev/null +++ b/src/spider/utils/ProgramOptions.hpp @@ -0,0 +1,62 @@ +#ifndef SPIDER_UTILS_PROGRAMOPTIONS_HPP +#define SPIDER_UTILS_PROGRAMOPTIONS_HPP + +#include + +namespace spider::core { +constexpr std::string_view cSchedulerUsage + = {"Usage: spider_scheduler --host --port --storage-url "}; + +constexpr std::string_view cSchedulerHelpMessage + = {"Try 'spider_scheduler --help' for detailed usage instructions.\n"}; + +constexpr std::string_view cWorkerUsage + = {"Usage: spider_worker --host --storage-url --libs "}; + +constexpr std::string_view cWorkerHelpMessage + = {"Try 'spider_worker --help' for detailed usage instructions.\n"}; + +constexpr std::string_view cTaskExecutorUsage + = {"Usage: spider_task_executor --func --task-id --storage-url " + " --libs "}; + +constexpr std::string_view cTaskExecutorHelpMessage + = {"Try 'spider_task_executor --help' for detailed usage instructions.\n"}; + +constexpr std::string_view cHelpOption = {"help"}; + +constexpr std::string_view cHelpMessage = {"Print this help text."}; + +constexpr std::string_view cHostOption = {"host"}; + +constexpr std::string_view cHostMessage = {"The host address to bind to"}; + +constexpr std::string_view cHostEmptyMessage = {"The host address should not be empty"}; + +constexpr std::string_view cPortOption = {"port"}; + +constexpr std::string_view cPortMessage = {"The port to listen on"}; + +constexpr std::string_view cStorageUrlOption = {"storage-url"}; + +constexpr std::string_view cStorageUrlMessage = {"The storage server's URL"}; + +constexpr std::string_view cStorageUrlEmptyMessage + = {"The storage server's URL should not be empty"}; + +constexpr std::string_view cLibsOption = {"libs"}; + +constexpr std::string_view cLibsMessage = {"The tasks libraries to load"}; + +constexpr std::string_view cLibsEmptyMessage = {"The tasks libraries should not be empty"}; + +constexpr std::string_view cFunctionOption = {"func"}; + +constexpr std::string_view cFunctionMessage = {"The function to execute"}; + +constexpr std::string_view cTaskIdOption = {"task-id"}; + +constexpr std::string_view cTaskIdMessage = {"The id of the task to execute"}; +} // namespace spider::core + +#endif diff --git a/src/spider/worker/TaskExecutor.hpp b/src/spider/worker/TaskExecutor.hpp index 3fc19caf..97b99281 100644 --- a/src/spider/worker/TaskExecutor.hpp +++ b/src/spider/worker/TaskExecutor.hpp @@ -20,9 +20,11 @@ #include #include #include +#include #include // IWYU pragma: keep #include // IWYU pragma: keep +#include #include #include #include @@ -53,13 +55,13 @@ class TaskExecutor { : m_read_pipe(context), m_write_pipe(context) { std::vector process_args{ - "--func", + fmt::format("--{}", core::cFunctionOption), func_name, - "--task_id", + fmt::format("--{}", core::cTaskIdOption), to_string(task_id), - "--storage_url", + fmt::format("--{}", core::cStorageUrlOption), storage_url, - "--libs" + fmt::format("--{}", core::cLibsOption), }; process_args.insert(process_args.end(), libs.begin(), libs.end()); boost::filesystem::path const exe = boost::process::v2::environment::find_executable( @@ -106,13 +108,13 @@ class TaskExecutor { : m_read_pipe(context), m_write_pipe(context) { std::vector process_args{ - "--func", + fmt::format("--{}", core::cFunctionOption), func_name, - "--task_id", + fmt::format("--{}", core::cTaskIdOption), to_string(task_id), - "--storage_url", + fmt::format("--{}", core::cStorageUrlOption), storage_url, - "--libs" + fmt::format("--{}", core::cLibsOption), }; process_args.insert(process_args.end(), libs.begin(), libs.end()); boost::filesystem::path const exe = boost::process::v2::environment::find_executable( diff --git a/src/spider/worker/task_executor.cpp b/src/spider/worker/task_executor.cpp index 8382d414..31c1e6d7 100644 --- a/src/spider/worker/task_executor.cpp +++ b/src/spider/worker/task_executor.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -35,22 +36,26 @@ namespace { auto parse_arg(int const argc, char** const& argv) -> boost::program_options::variables_map { boost::program_options::options_description desc; - desc.add_options()("help", "spider task executor"); - desc.add_options()("func", boost::program_options::value(), "function to run"); + desc.add_options()(spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()); desc.add_options()( - "task_id", + spider::core::cFunctionOption.data(), boost::program_options::value(), - "task id of the function" + spider::core::cFunctionMessage.data() ); desc.add_options()( - "libs", + spider::core::cTaskIdOption.data(), + boost::program_options::value(), + spider::core::cTaskIdMessage.data() + ); + desc.add_options()( + spider::core::cLibsOption.data(), boost::program_options::value>(), - "dynamic libraries that include the spider tasks" + spider::core::cLibsMessage.data() ); desc.add_options()( - "storage_url", + spider::core::cStorageUrlOption.data(), boost::program_options::value(), - "storage server url" + spider::core::cStorageUrlMessage.data() ); boost::program_options::variables_map variables; @@ -87,22 +92,23 @@ auto main(int const argc, char** argv) -> int { std::string storage_url; std::string task_id_string; try { - if (!args.contains("func")) { + if (!args.contains(std::string(spider::core::cFunctionOption))) { return cCmdArgParseErr; } - func_name = args["func"].as(); - if (!args.contains("task_id")) { + func_name = args[std::string(spider::core::cFunctionOption)].as(); + if (!args.contains(std::string(spider::core::cTaskIdOption))) { return cCmdArgParseErr; } - task_id_string = args["task_id"].as(); - if (!args.contains("storage_url")) { + task_id_string = args[std::string(spider::core::cTaskIdOption)].as(); + if (!args.contains(std::string(spider::core::cStorageUrlOption))) { return cCmdArgParseErr; } - storage_url = args["storage_url"].as(); - if (!args.contains("libs")) { + storage_url = args[std::string(spider::core::cStorageUrlOption)].as(); + if (!args.contains(std::string(spider::core::cLibsOption))) { return cCmdArgParseErr; } - std::vector const libs = args["libs"].as>(); + std::vector const libs + = args[std::string(spider::core::cLibsOption)].as>(); spider::worker::DllLoader& dll_loader = spider::worker::DllLoader::get_instance(); for (std::string const& lib : libs) { if (false == dll_loader.load_dll(lib)) { diff --git a/src/spider/worker/worker.cpp b/src/spider/worker/worker.cpp index ade6ce6b..d3f09deb 100644 --- a/src/spider/worker/worker.cpp +++ b/src/spider/worker/worker.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,6 @@ #include #include -#include #include #include #include @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -77,29 +78,75 @@ auto stop_task_handler(int signal) -> void { } } -auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { +auto parse_args( + int const argc, + char** argv, + std::string& host, + std::string& storage_url, + std::vector& libs +) -> bool { boost::program_options::options_description desc; - desc.add_options()("help", "spider scheduler"); - desc.add_options()( - "storage_url", - boost::program_options::value(), - "storage server url" - ); - desc.add_options()( - "libs", - boost::program_options::value>(), - "dynamic libraries that include the spider tasks" - ); - desc.add_options()("host", boost::program_options::value(), "worker host address"); - - boost::program_options::variables_map variables; - boost::program_options::store( - // NOLINTNEXTLINE(misc-include-cleaner) - boost::program_options::parse_command_line(argc, argv, desc), - variables - ); - boost::program_options::notify(variables); - return variables; + // clang-format off + desc.add_options() + (spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()) + ( + spider::core::cHostOption.data(), + boost::program_options::value(&host)->required(), + spider::core::cHostMessage.data() + ) + ( + spider::core::cStorageUrlOption.data(), + boost::program_options::value(&storage_url)->required(), + spider::core::cStorageUrlMessage.data() + ) + ( + spider::core::cLibsOption.data(), + boost::program_options::value>(&libs), + spider::core::cLibsMessage.data() + ); + // clang-format on + + try { + boost::program_options::variables_map variables; + boost::program_options::store( + // NOLINTNEXTLINE(misc-include-cleaner) + boost::program_options::parse_command_line(argc, argv, desc), + variables + ); + + if (!variables.contains(std::string(spider::core::cHostOption)) + && !variables.contains(std::string(spider::core::cStorageUrlOption)) + && !variables.contains(std::string(spider::core::cLibsOption))) + { + std::cout << spider::core::cWorkerUsage << "\n"; + std::cout << desc << "\n"; + return false; + } + + boost::program_options::notify(variables); + + if (host.empty()) { + std::cerr << spider::core::cHostEmptyMessage << "\n"; + return false; + } + + if (storage_url.empty()) { + std::cerr << spider::core::cStorageUrlEmptyMessage << "\n"; + return false; + } + + if (libs.empty()) { + std::cerr << spider::core::cLibsEmptyMessage << "\n"; + return false; + } + + return true; + } catch (boost::program_options::error& e) { + std::cerr << "spider_worker: " << e.what() << "\n"; + std::cerr << spider::core::cWorkerUsage << "\n"; + std::cerr << spider::core::cWorkerHelpMessage; + return false; + } } auto get_environment_variable() -> absl::flat_hash_map< @@ -420,32 +467,10 @@ auto main(int argc, char** argv) -> int { spdlog::set_level(spdlog::level::trace); #endif - boost::program_options::variables_map const args = parse_args(argc, argv); - std::string storage_url; std::vector libs; std::string worker_addr; - try { - if (!args.contains("storage_url")) { - spdlog::error("Missing storage_url"); - return cCmdArgParseErr; - } - storage_url = args["storage_url"].as(); - if (!args.contains("host")) { - spdlog::error("Missing host"); - return cCmdArgParseErr; - } - worker_addr = args["host"].as(); - if (!args.contains("libs") || args["libs"].empty()) { - spdlog::error("Missing libs"); - return cCmdArgParseErr; - } - libs = args["libs"].as>(); - } catch (boost::bad_any_cast const& e) { - spdlog::error("Error: {}", e.what()); - return cCmdArgParseErr; - } catch (boost::program_options::error const& e) { - spdlog::error("Error: {}", e.what()); + if (!parse_args(argc, argv, worker_addr, storage_url, libs)) { return cCmdArgParseErr; } diff --git a/tests/client/client-test.cpp b/tests/client/client-test.cpp index 56c03061..78914218 100644 --- a/tests/client/client-test.cpp +++ b/tests/client/client-test.cpp @@ -16,16 +16,17 @@ #include #include #include +#include #include namespace { auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map { boost::program_options::options_description desc; - desc.add_options()("help", "spider client test"); + desc.add_options()(spider::core::cHelpOption.data(), spider::core::cHelpMessage.data()); desc.add_options()( - "storage_url", + spider::core::cStorageUrlOption.data(), boost::program_options::value(), - "storage server url" + spider::core::cStorageUrlMessage.data() ); boost::program_options::variables_map variables; @@ -220,11 +221,11 @@ auto main(int argc, char** argv) -> int { std::string storage_url; try { - if (!args.contains("storage_url")) { - spdlog::error("storage_url is required"); + if (!args.contains(std::string(spider::core::cStorageUrlOption))) { + spdlog::error("storage-url is required"); return cCmdArgParseErr; } - storage_url = args["storage_url"].as(); + storage_url = args[std::string(spider::core::cStorageUrlOption)].as(); } catch (boost::bad_any_cast& e) { return cCmdArgParseErr; } catch (boost::program_options::error& e) { diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index 94b48afd..32ca58ca 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -24,7 +24,7 @@ def start_scheduler_workers( "127.0.0.1", "--port", str(scheduler_port), - "--storage_url", + "--storage-url", storage_url, ] scheduler_process = subprocess.Popen(scheduler_cmds) @@ -32,7 +32,7 @@ def start_scheduler_workers( str(dir_path / "spider_worker"), "--host", "127.0.0.1", - "--storage_url", + "--storage-url", storage_url, "--libs", "tests/libworker_test.so", @@ -61,7 +61,7 @@ def test_client(self, scheduler_worker): dir_path = dir_path / ".." client_cmds = [ str(dir_path / "client_test"), - "--storage_url", + "--storage-url", g_storage_url, ] p = subprocess.run(client_cmds, timeout=20) diff --git a/tests/integration/test_scheduler_worker.py b/tests/integration/test_scheduler_worker.py index 9dfc359d..93062c61 100644 --- a/tests/integration/test_scheduler_worker.py +++ b/tests/integration/test_scheduler_worker.py @@ -39,7 +39,7 @@ def start_scheduler_worker( "127.0.0.1", "--port", str(scheduler_port), - "--storage_url", + "--storage-url", storage_url, ] scheduler_process = subprocess.Popen(scheduler_cmds) @@ -47,7 +47,7 @@ def start_scheduler_worker( str(dir_path / "spider_worker"), "--host", "127.0.0.1", - "--storage_url", + "--storage-url", storage_url, "--libs", "tests/libworker_test.so", diff --git a/tests/integration/test_signal.py b/tests/integration/test_signal.py index a32f1c3d..7766616e 100644 --- a/tests/integration/test_signal.py +++ b/tests/integration/test_signal.py @@ -33,7 +33,7 @@ def start_scheduler_worker(storage_url: str, scheduler_port: int, lib: str): "127.0.0.1", "--port", str(scheduler_port), - "--storage_url", + "--storage-url", storage_url, ] scheduler_process = subprocess.Popen(scheduler_cmds, **popen_opts) @@ -41,7 +41,7 @@ def start_scheduler_worker(storage_url: str, scheduler_port: int, lib: str): str(bin_dir / "spider_worker"), "--host", "127.0.0.1", - "--storage_url", + "--storage-url", storage_url, "--libs", lib,